• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *****************************************************************************
5 * Copyright (C) 2001-2016, International Business Machines orporation
6 * and others. All Rights Reserved.
7 ****************************************************************************/
8 
9 #include "unicode/utypes.h"
10 
11 #if !UCONFIG_NO_COLLATION
12 
13 #include "srchtest.h"
14 #if !UCONFIG_NO_BREAK_ITERATION
15 #include "../cintltst/usrchdat.inc"
16 #endif
17 #include "unicode/stsearch.h"
18 #include "unicode/ustring.h"
19 #include "unicode/schriter.h"
20 #include "cmemory.h"
21 #include <string.h>
22 #include <stdio.h>
23 
24 // private definitions -----------------------------------------------------
25 
26 #define CASE(id,test)                 \
27     case id:                          \
28         name = #test;                 \
29         if (exec) {                   \
30             logln(#test "---");       \
31             logln((UnicodeString)""); \
32             if(areBroken) {           \
33                   dataerrln(__FILE__ " cannot test - failed to create collator.");  \
34             } else {                  \
35                 test();               \
36             }                         \
37         }                             \
38         break;
39 
40 // public constructors and destructors --------------------------------------
41 
StringSearchTest()42 StringSearchTest::StringSearchTest()
43 #if !UCONFIG_NO_BREAK_ITERATION
44 :
45     m_en_wordbreaker_(nullptr), m_en_characterbreaker_(nullptr)
46 #endif
47 {
48 #if !UCONFIG_NO_BREAK_ITERATION
49     UErrorCode    status = U_ZERO_ERROR;
50 
51     m_en_us_ = dynamic_cast<RuleBasedCollator*>(Collator::createInstance("en_US", status));
52     m_fr_fr_ = dynamic_cast<RuleBasedCollator*>(Collator::createInstance("fr_FR", status));
53     m_de_    = dynamic_cast<RuleBasedCollator*>(Collator::createInstance("de_DE", status));
54     m_es_    = dynamic_cast<RuleBasedCollator*>(Collator::createInstance("es_ES", status));
55     if(U_FAILURE(status)) {
56       delete m_en_us_;
57       delete m_fr_fr_;
58       delete m_de_;
59       delete m_es_;
60       m_en_us_ = nullptr;
61       m_fr_fr_ = nullptr;
62       m_de_ = nullptr;
63       m_es_ = nullptr;
64       errln("Collator creation failed with %s", u_errorName(status));
65       return;
66     }
67 
68 
69     UnicodeString rules;
70     rules.setTo(m_de_->getRules());
71     char16_t extrarules[128];
72     u_unescape(EXTRACOLLATIONRULE, extrarules, 128);
73     rules.append(extrarules, u_strlen(extrarules));
74     delete m_de_;
75 
76     m_de_ = new RuleBasedCollator(rules, status);
77 
78     rules.setTo(m_es_->getRules());
79     rules.append(extrarules, u_strlen(extrarules));
80 
81     delete m_es_;
82 
83     m_es_ = new RuleBasedCollator(rules, status);
84 
85 #if !UCONFIG_NO_BREAK_ITERATION
86     m_en_wordbreaker_      = BreakIterator::createWordInstance(
87                                                     Locale::getEnglish(), status);
88     m_en_characterbreaker_ = BreakIterator::createCharacterInstance(
89                                                     Locale::getEnglish(), status);
90 #endif
91 #endif
92 }
93 
~StringSearchTest()94 StringSearchTest::~StringSearchTest()
95 {
96 #if !UCONFIG_NO_BREAK_ITERATION
97     delete m_en_us_;
98     delete m_fr_fr_;
99     delete m_de_;
100     delete m_es_;
101 #if !UCONFIG_NO_BREAK_ITERATION
102     delete m_en_wordbreaker_;
103     delete m_en_characterbreaker_;
104 #endif
105 #endif
106 }
107 
108 // public methods ----------------------------------------------------------
109 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)110 void StringSearchTest::runIndexedTest(int32_t index, UBool exec,
111                                       const char* &name, char* )
112 {
113 #if !UCONFIG_NO_BREAK_ITERATION
114     UBool areBroken = false;
115     if (m_en_us_ == nullptr && m_fr_fr_ == nullptr && m_de_ == nullptr &&
116         m_es_ == nullptr && m_en_wordbreaker_ == nullptr &&
117         m_en_characterbreaker_ == nullptr && exec) {
118         areBroken = true;
119     }
120 
121     switch (index) {
122 #if !UCONFIG_NO_FILE_IO
123         CASE(0, TestOpenClose)
124 #endif
125         CASE(1, TestInitialization)
126         CASE(2, TestBasic)
127         CASE(3, TestNormExact)
128         CASE(4, TestStrength)
129 #if UCONFIG_NO_BREAK_ITERATION
130     case 5:
131         name = "TestBreakIterator";
132         break;
133 #else
134         CASE(5, TestBreakIterator)
135 #endif
136         CASE(6, TestVariable)
137         CASE(7, TestOverlap)
138         CASE(8, TestCollator)
139         CASE(9, TestPattern)
140         CASE(10, TestText)
141         CASE(11, TestCompositeBoundaries)
142         CASE(12, TestGetSetOffset)
143         CASE(13, TestGetSetAttribute)
144         CASE(14, TestGetMatch)
145         CASE(15, TestSetMatch)
146         CASE(16, TestReset)
147         CASE(17, TestSupplementary)
148         CASE(18, TestContraction)
149         CASE(19, TestIgnorable)
150         CASE(20, TestCanonical)
151         CASE(21, TestNormCanonical)
152         CASE(22, TestStrengthCanonical)
153 #if UCONFIG_NO_BREAK_ITERATION
154     case 23:
155         name = "TestBreakIteratorCanonical";
156         break;
157 #else
158         CASE(23, TestBreakIteratorCanonical)
159 #endif
160         CASE(24, TestVariableCanonical)
161         CASE(25, TestOverlapCanonical)
162         CASE(26, TestCollatorCanonical)
163         CASE(27, TestPatternCanonical)
164         CASE(28, TestTextCanonical)
165         CASE(29, TestCompositeBoundariesCanonical)
166         CASE(30, TestGetSetOffsetCanonical)
167         CASE(31, TestSupplementaryCanonical)
168         CASE(32, TestContractionCanonical)
169         CASE(33, TestUClassID)
170         CASE(34, TestSubclass)
171         CASE(35, TestCoverage)
172         CASE(36, TestDiacriticMatch)
173         default: name = ""; break;
174     }
175 #else
176     name="";
177 #endif
178 }
179 
180 #if !UCONFIG_NO_BREAK_ITERATION
181 // private methods ------------------------------------------------------
182 
getCollator(const char * collator)183 RuleBasedCollator * StringSearchTest::getCollator(const char *collator)
184 {
185     if (collator == nullptr) {
186         return m_en_us_;
187     }
188     if (strcmp(collator, "fr") == 0) {
189         return m_fr_fr_;
190     }
191     else if (strcmp(collator, "de") == 0) {
192         return m_de_;
193     }
194     else if (strcmp(collator, "es") == 0) {
195         return m_es_;
196     }
197     else {
198         return m_en_us_;
199     }
200 }
201 
getBreakIterator(const char * breaker)202 BreakIterator * StringSearchTest::getBreakIterator(const char *breaker)
203 {
204 #if UCONFIG_NO_BREAK_ITERATION
205     return nullptr;
206 #else
207     if (breaker == nullptr) {
208         return nullptr;
209     }
210     if (strcmp(breaker, "wordbreaker") == 0) {
211         return m_en_wordbreaker_;
212     }
213     else {
214         return m_en_characterbreaker_;
215     }
216 #endif
217 }
218 
toCharString(const UnicodeString & text)219 char * StringSearchTest::toCharString(const UnicodeString &text)
220 {
221     static char   result[1024];
222            int    index  = 0;
223            int    count  = 0;
224            int    length = text.length();
225 
226     for (; count < length; count ++) {
227         char16_t ch = text[count];
228         if (ch >= 0x20 && ch <= 0x7e) {
229             result[index ++] = (char)ch;
230         }
231         else {
232             snprintf(result+index, sizeof(result)-index, "\\u%04x", ch);
233             index += 6; /* \uxxxx */
234         }
235     }
236     result[index] = 0;
237 
238     return result;
239 }
240 
getECollationStrength(const UCollationStrength & strength) const241 Collator::ECollationStrength StringSearchTest::getECollationStrength(
242                                     const UCollationStrength &strength) const
243 {
244   switch (strength)
245   {
246   case UCOL_PRIMARY :
247     return Collator::PRIMARY;
248   case UCOL_SECONDARY :
249     return Collator::SECONDARY;
250   case UCOL_TERTIARY :
251     return Collator::TERTIARY;
252   default :
253     return Collator::IDENTICAL;
254   }
255 }
256 
assertEqualWithStringSearch(StringSearch * strsrch,const SearchData * search)257 UBool StringSearchTest::assertEqualWithStringSearch(StringSearch *strsrch,
258                                                     const SearchData *search)
259 {
260     int32_t         count       = 0;
261     UErrorCode      status      = U_ZERO_ERROR;
262     int32_t         matchindex  = search->offset[count];
263     UnicodeString   matchtext;
264     int32_t         matchlength;
265 
266     strsrch->setAttribute(USEARCH_ELEMENT_COMPARISON, search->elemCompare, status);
267     if (U_FAILURE(status)) {
268         errln("Error setting USEARCH_ELEMENT_COMPARISON attribute %s", u_errorName(status));
269         return false;
270     }
271 
272     if (strsrch->getMatchedStart() != USEARCH_DONE ||
273         strsrch->getMatchedLength() != 0) {
274         errln("Error with the initialization of match start and length");
275     }
276 
277     // start of next matches
278     while (U_SUCCESS(status) && matchindex >= 0) {
279         matchlength = search->size[count];
280         strsrch->next(status);
281         if (matchindex != strsrch->getMatchedStart() ||
282             matchlength != strsrch->getMatchedLength()) {
283             char *str = toCharString(strsrch->getText());
284             errln("Text: %s", str);
285             str = toCharString(strsrch->getPattern());
286             errln("Pattern: %s", str);
287             errln("Error next match found at %d (len:%d); expected %d (len:%d)",
288                     strsrch->getMatchedStart(), strsrch->getMatchedLength(),
289                     matchindex, matchlength);
290             return false;
291         }
292         count ++;
293 
294         strsrch->getMatchedText(matchtext);
295 
296         if (U_FAILURE(status) ||
297             strsrch->getText().compareBetween(matchindex,
298                                               matchindex + matchlength,
299                                               matchtext, 0,
300                                               matchtext.length())) {
301             errln("Error getting next matched text");
302         }
303 
304         matchindex = search->offset[count];
305     }
306     strsrch->next(status);
307     if (strsrch->getMatchedStart() != USEARCH_DONE ||
308         strsrch->getMatchedLength() != 0) {
309         char *str = toCharString(strsrch->getText());
310             errln("Text: %s", str);
311             str = toCharString(strsrch->getPattern());
312             errln("Pattern: %s", str);
313             errln("Error next match found at %d (len:%d); expected <NO MATCH>",
314                     strsrch->getMatchedStart(), strsrch->getMatchedLength());
315             return false;
316     }
317 
318     // start of previous matches
319     count = count == 0 ? 0 : count - 1;
320     matchindex = search->offset[count];
321     while (U_SUCCESS(status) && matchindex >= 0) {
322         matchlength = search->size[count];
323         strsrch->previous(status);
324         if (matchindex != strsrch->getMatchedStart() ||
325             matchlength != strsrch->getMatchedLength()) {
326             char *str = toCharString(strsrch->getText());
327             errln("Text: %s", str);
328             str = toCharString(strsrch->getPattern());
329             errln("Pattern: %s", str);
330             errln("Error previous match found at %d (len:%d); expected %d (len:%d)",
331                     strsrch->getMatchedStart(), strsrch->getMatchedLength(),
332                     matchindex, matchlength);
333             return false;
334         }
335 
336         strsrch->getMatchedText(matchtext);
337 
338         if (U_FAILURE(status) ||
339             strsrch->getText().compareBetween(matchindex,
340                                               matchindex + matchlength,
341                                               matchtext, 0,
342                                               matchtext.length())) {
343             errln("Error getting previous matched text");
344         }
345 
346         matchindex = count > 0 ? search->offset[count - 1] : -1;
347         count --;
348     }
349     strsrch->previous(status);
350     if (strsrch->getMatchedStart() != USEARCH_DONE ||
351         strsrch->getMatchedLength() != 0) {
352         char *str = toCharString(strsrch->getText());
353         errln("Text: %s", str);
354         str = toCharString(strsrch->getPattern());
355         errln("Pattern: %s", str);
356         errln("Error previous match found at %d (len:%d); expected <NO MATCH>",
357                 strsrch->getMatchedStart(), strsrch->getMatchedLength());
358         return false;
359     }
360 
361     int32_t nextStart;
362     UBool isOverlap = (strsrch->getAttribute(USEARCH_OVERLAP) == USEARCH_ON);
363 
364     // start of following matches
365     count = 0;
366     matchindex = search->offset[count];
367     nextStart = 0;
368 
369     while (true) {
370         strsrch->following(nextStart, status);
371 
372         if (matchindex < 0) {
373             if (strsrch->getMatchedStart() != USEARCH_DONE ||
374                     strsrch->getMatchedLength() != 0) {
375                 char *str = toCharString(strsrch->getText());
376                 errln("Text: %s", str);
377                 str = toCharString(strsrch->getPattern());
378                 errln("Pattern: %s", str);
379                 errln("Error following match starting at %d (overlap:%d) found at %d (len:%d); expected <NO MATCH>",
380                         nextStart, isOverlap,
381                         strsrch->getMatchedStart(), strsrch->getMatchedLength());
382                 return false;
383             }
384             // no more matches
385             break;
386         }
387 
388         matchlength = search->size[count];
389         if (strsrch->getMatchedStart() != matchindex
390                 || strsrch->getMatchedLength() != matchlength
391                 || U_FAILURE(status)) {
392             char *str = toCharString(strsrch->getText());
393             errln("Text: %s\n", str);
394             str = toCharString(strsrch->getPattern());
395             errln("Pattern: %s\n", str);
396             errln("Error following match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n",
397                         nextStart, isOverlap,
398                         strsrch->getMatchedStart(), strsrch->getMatchedLength(),
399                         matchindex, matchlength);
400             return false;
401         }
402 
403         if (isOverlap || strsrch->getMatchedLength() == 0) {
404             nextStart = strsrch->getMatchedStart() + 1;
405         } else {
406             nextStart = strsrch->getMatchedStart() + strsrch->getMatchedLength();
407         }
408 
409         count++;
410         matchindex = search->offset[count];
411     }
412 
413     // start preceding matches
414     count = -1; // last non-negative offset index, could be -1 if no match
415     while (search->offset[count + 1] >= 0) {
416         count++;
417     }
418     nextStart = strsrch->getText().length();
419 
420     while (true) {
421         strsrch->preceding(nextStart, status);
422 
423         if (count < 0) {
424             if (strsrch->getMatchedStart() != USEARCH_DONE || strsrch->getMatchedLength() != 0) {
425                 char *str = toCharString(strsrch->getText());
426                 errln("Text: %s\n", str);
427                 str = toCharString(strsrch->getPattern());
428                 errln("Pattern: %s\n", str);
429                 errln("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected <NO MATCH>\n",
430                             nextStart, isOverlap,
431                             strsrch->getMatchedStart(),
432                             strsrch->getMatchedLength());
433                 return false;
434             }
435             // no more matches
436             break;
437         }
438 
439         matchindex = search->offset[count];
440         matchlength = search->size[count];
441         if (strsrch->getMatchedStart() != matchindex
442                 || strsrch->getMatchedLength() != matchlength
443                 || U_FAILURE(status)) {
444             char *str = toCharString(strsrch->getText());
445             errln("Text: %s\n", str);
446             str = toCharString(strsrch->getPattern());
447             errln("Pattern: %s\n", str);
448             errln("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n",
449                         nextStart, isOverlap,
450                         strsrch->getMatchedStart(), strsrch->getMatchedLength(),
451                         matchindex, matchlength);
452             return false;
453         }
454 
455         nextStart = matchindex;
456         count--;
457     }
458 
459     strsrch->setAttribute(USEARCH_ELEMENT_COMPARISON, USEARCH_STANDARD_ELEMENT_COMPARISON, status);
460     return true;
461 }
462 
assertEqual(const SearchData * search)463 UBool StringSearchTest::assertEqual(const SearchData *search)
464 {
465     UErrorCode     status   = U_ZERO_ERROR;
466 
467     Collator      *collator = getCollator(search->collator);
468     BreakIterator *breaker  = getBreakIterator(search->breaker);
469     StringSearch  *strsrch, *strsrch2;
470     char16_t       temp[128];
471 
472 #if UCONFIG_NO_BREAK_ITERATION
473     if(search->breaker) {
474       return true; /* skip test */
475     }
476 #endif
477     u_unescape(search->text, temp, 128);
478     UnicodeString text;
479     text.setTo(temp);
480     u_unescape(search->pattern, temp, 128);
481     UnicodeString  pattern;
482     pattern.setTo(temp);
483 
484 #if !UCONFIG_NO_BREAK_ITERATION
485     if (breaker != nullptr) {
486         breaker->setText(text);
487     }
488 #endif
489     collator->setStrength(getECollationStrength(search->strength));
490     strsrch = new StringSearch(pattern, text, dynamic_cast<RuleBasedCollator*>(collator),
491                                breaker, status);
492     if (U_FAILURE(status)) {
493         errln("Error opening string search %s", u_errorName(status));
494         return false;
495     }
496 
497     if (!assertEqualWithStringSearch(strsrch, search)) {
498         collator->setStrength(getECollationStrength(UCOL_TERTIARY));
499         delete strsrch;
500         return false;
501     }
502 
503 
504     strsrch2 = strsrch->clone();
505     if( strsrch2 == strsrch || *strsrch2 != *strsrch ||
506         !assertEqualWithStringSearch(strsrch2, search)
507     ) {
508         infoln("failure with StringSearch.clone()");
509         collator->setStrength(getECollationStrength(UCOL_TERTIARY));
510         delete strsrch;
511         delete strsrch2;
512         return false;
513     }
514     delete strsrch2;
515 
516     collator->setStrength(getECollationStrength(UCOL_TERTIARY));
517     delete strsrch;
518     return true;
519 }
520 
assertCanonicalEqual(const SearchData * search)521 UBool StringSearchTest::assertCanonicalEqual(const SearchData *search)
522 {
523     UErrorCode     status   = U_ZERO_ERROR;
524     Collator      *collator = getCollator(search->collator);
525     BreakIterator *breaker  = getBreakIterator(search->breaker);
526     StringSearch  *strsrch;
527     char16_t       temp[128];
528     UBool          result = true;
529 
530 #if UCONFIG_NO_BREAK_ITERATION
531     if(search->breaker) {
532       return true; /* skip test */
533     }
534 #endif
535 
536     u_unescape(search->text, temp, 128);
537     UnicodeString text;
538     text.setTo(temp);
539     u_unescape(search->pattern, temp, 128);
540     UnicodeString  pattern;
541     pattern.setTo(temp);
542 
543 #if !UCONFIG_NO_BREAK_ITERATION
544     if (breaker != nullptr) {
545         breaker->setText(text);
546     }
547 #endif
548     collator->setStrength(getECollationStrength(search->strength));
549     collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
550     strsrch = new StringSearch(pattern, text, dynamic_cast<RuleBasedCollator*>(collator),
551                                breaker, status);
552     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
553     if (U_FAILURE(status)) {
554         errln("Error opening string search %s", u_errorName(status));
555         result = false;
556         goto bail;
557     }
558 
559     if (!assertEqualWithStringSearch(strsrch, search)) {
560         result = false;
561         goto bail;
562     }
563 
564 bail:
565     collator->setStrength(getECollationStrength(UCOL_TERTIARY));
566     collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
567     delete strsrch;
568 
569     return result;
570 }
571 
assertEqualWithAttribute(const SearchData * search,USearchAttributeValue canonical,USearchAttributeValue overlap)572 UBool StringSearchTest::assertEqualWithAttribute(const SearchData *search,
573                                             USearchAttributeValue canonical,
574                                             USearchAttributeValue overlap)
575 {
576     UErrorCode     status   = U_ZERO_ERROR;
577     Collator      *collator = getCollator(search->collator);
578     BreakIterator *breaker  = getBreakIterator(search->breaker);
579     StringSearch  *strsrch;
580     char16_t       temp[128];
581 
582 
583 #if UCONFIG_NO_BREAK_ITERATION
584     if(search->breaker) {
585       return true; /* skip test */
586     }
587 #endif
588 
589     u_unescape(search->text, temp, 128);
590     UnicodeString text;
591     text.setTo(temp);
592     u_unescape(search->pattern, temp, 128);
593     UnicodeString  pattern;
594     pattern.setTo(temp);
595 
596 #if !UCONFIG_NO_BREAK_ITERATION
597     if (breaker != nullptr) {
598         breaker->setText(text);
599     }
600 #endif
601     collator->setStrength(getECollationStrength(search->strength));
602     strsrch = new StringSearch(pattern, text, dynamic_cast<RuleBasedCollator*>(collator),
603                                breaker, status);
604     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, canonical, status);
605     strsrch->setAttribute(USEARCH_OVERLAP, overlap, status);
606 
607     if (U_FAILURE(status)) {
608         errln("Error opening string search %s", u_errorName(status));
609         return false;
610     }
611 
612     if (!assertEqualWithStringSearch(strsrch, search)) {
613         collator->setStrength(getECollationStrength(UCOL_TERTIARY));
614         delete strsrch;
615         return false;
616     }
617     collator->setStrength(getECollationStrength(UCOL_TERTIARY));
618     delete strsrch;
619     return true;
620 }
621 
TestOpenClose()622 void StringSearchTest::TestOpenClose()
623 {
624     UErrorCode               status    = U_ZERO_ERROR;
625     StringSearch            *result;
626     BreakIterator           *breakiter = m_en_wordbreaker_;
627     UnicodeString            pattern;
628     UnicodeString            text;
629     UnicodeString            temp("a");
630     StringCharacterIterator  chariter(text);
631 
632     /* testing null arguments */
633     result = new StringSearch(pattern, text, nullptr, nullptr, status);
634     if (U_SUCCESS(status)) {
635         errln("Error: nullptr arguments should produce an error");
636     }
637     delete result;
638 
639     chariter.setText(text);
640     status = U_ZERO_ERROR;
641     result = new StringSearch(pattern, chariter, nullptr, nullptr, status);
642     if (U_SUCCESS(status)) {
643         errln("Error: nullptr arguments should produce an error");
644     }
645     delete result;
646 
647     // No-op: text.append(0, 0x1); -- what was intended here?
648     status = U_ZERO_ERROR;
649     result = new StringSearch(pattern, text, nullptr, nullptr, status);
650     if (U_SUCCESS(status)) {
651         errln("Error: Empty pattern should produce an error");
652     }
653     delete result;
654 
655     chariter.setText(text);
656     status = U_ZERO_ERROR;
657     result = new StringSearch(pattern, chariter, nullptr, nullptr, status);
658     if (U_SUCCESS(status)) {
659         errln("Error: Empty pattern should produce an error");
660     }
661     delete result;
662 
663     text.remove();
664     pattern.append(temp);
665     status = U_ZERO_ERROR;
666     result = new StringSearch(pattern, text, nullptr, nullptr, status);
667     if (U_SUCCESS(status)) {
668         errln("Error: Empty text should produce an error");
669     }
670     delete result;
671 
672     chariter.setText(text);
673     status = U_ZERO_ERROR;
674     result = new StringSearch(pattern, chariter, nullptr, nullptr, status);
675     if (U_SUCCESS(status)) {
676         errln("Error: Empty text should produce an error");
677     }
678     delete result;
679 
680     text.append(temp);
681     status = U_ZERO_ERROR;
682     result = new StringSearch(pattern, text, nullptr, nullptr, status);
683     if (U_SUCCESS(status)) {
684         errln("Error: nullptr arguments should produce an error");
685     }
686     delete result;
687 
688     chariter.setText(text);
689     status = U_ZERO_ERROR;
690     result = new StringSearch(pattern, chariter, nullptr, nullptr, status);
691     if (U_SUCCESS(status)) {
692         errln("Error: nullptr arguments should produce an error");
693     }
694     delete result;
695 
696     status = U_ZERO_ERROR;
697     result = new StringSearch(pattern, text, m_en_us_, nullptr, status);
698     if (U_FAILURE(status)) {
699         errln("Error: nullptr break iterator is valid for opening search");
700     }
701     delete result;
702 
703     status = U_ZERO_ERROR;
704     result = new StringSearch(pattern, chariter, m_en_us_, nullptr, status);
705     if (U_FAILURE(status)) {
706         errln("Error: nullptr break iterator is valid for opening search");
707     }
708     delete result;
709 
710     status = U_ZERO_ERROR;
711     result = new StringSearch(pattern, text, Locale::getEnglish(), nullptr, status);
712     if (U_FAILURE(status) || result == nullptr) {
713         errln("Error: nullptr break iterator is valid for opening search");
714     }
715     delete result;
716 
717     status = U_ZERO_ERROR;
718     result = new StringSearch(pattern, chariter, Locale::getEnglish(), nullptr, status);
719     if (U_FAILURE(status)) {
720         errln("Error: nullptr break iterator is valid for opening search");
721     }
722     delete result;
723 
724     status = U_ZERO_ERROR;
725     result = new StringSearch(pattern, text, m_en_us_, breakiter, status);
726     if (U_FAILURE(status)) {
727         errln("Error: Break iterator is valid for opening search");
728     }
729     delete result;
730 
731     status = U_ZERO_ERROR;
732     result = new StringSearch(pattern, chariter, m_en_us_, nullptr, status);
733     if (U_FAILURE(status)) {
734         errln("Error: Break iterator is valid for opening search");
735     }
736     delete result;
737 }
738 
TestInitialization()739 void StringSearchTest::TestInitialization()
740 {
741     UErrorCode     status = U_ZERO_ERROR;
742     UnicodeString  pattern;
743     UnicodeString  text;
744     UnicodeString  temp("a");
745     StringSearch  *result;
746     int count;
747 
748     /* simple test on the pattern ce construction */
749     pattern.append(temp);
750     pattern.append(temp);
751     text.append(temp);
752     text.append(temp);
753     text.append(temp);
754     result = new StringSearch(pattern, text, m_en_us_, nullptr, status);
755     if (U_FAILURE(status)) {
756         errln("Error opening search %s", u_errorName(status));
757     }
758     StringSearch *copy = new StringSearch(*result);
759     if (*(copy->getCollator()) != *(result->getCollator()) ||
760         copy->getBreakIterator() != result->getBreakIterator() ||
761         copy->getMatchedLength() != result->getMatchedLength() ||
762         copy->getMatchedStart() != result->getMatchedStart() ||
763         copy->getOffset() != result->getOffset() ||
764         copy->getPattern() != result->getPattern() ||
765         copy->getText() != result->getText() ||
766         *(copy) != *(result))
767     {
768         errln("Error copying StringSearch");
769     }
770     delete copy;
771 
772     copy = result->safeClone();
773     if (*(copy->getCollator()) != *(result->getCollator()) ||
774         copy->getBreakIterator() != result->getBreakIterator() ||
775         copy->getMatchedLength() != result->getMatchedLength() ||
776         copy->getMatchedStart() != result->getMatchedStart() ||
777         copy->getOffset() != result->getOffset() ||
778         copy->getPattern() != result->getPattern() ||
779         copy->getText() != result->getText() ||
780         *(copy) != *(result)) {
781         errln("Error copying StringSearch");
782     }
783     delete result;
784 
785     /* testing if an extremely large pattern will fail the initialization */
786     for (count = 0; count < 512; count ++) {
787         pattern.append(temp);
788     }
789     result = new StringSearch(pattern, text, m_en_us_, nullptr, status);
790     if (*result != *result) {
791         errln("Error: string search object expected to match itself");
792     }
793     if (*result == *copy) {
794         errln("Error: string search objects are not expected to match");
795     }
796     *copy  = *result;
797     if (*(copy->getCollator()) != *(result->getCollator()) ||
798         copy->getBreakIterator() != result->getBreakIterator() ||
799         copy->getMatchedLength() != result->getMatchedLength() ||
800         copy->getMatchedStart() != result->getMatchedStart() ||
801         copy->getOffset() != result->getOffset() ||
802         copy->getPattern() != result->getPattern() ||
803         copy->getText() != result->getText() ||
804         *(copy) != *(result)) {
805         errln("Error copying StringSearch");
806     }
807     if (U_FAILURE(status)) {
808         errln("Error opening search %s", u_errorName(status));
809     }
810     delete result;
811     delete copy;
812 }
813 
TestBasic()814 void StringSearchTest::TestBasic()
815 {
816     int count = 0;
817     while (BASIC[count].text != nullptr) {
818         //printf("count %d", count);
819         if (!assertEqual(&BASIC[count])) {
820             infoln("Error at test number %d", count);
821         }
822         count ++;
823     }
824 }
825 
TestNormExact()826 void StringSearchTest::TestNormExact()
827 {
828     int count = 0;
829     UErrorCode status = U_ZERO_ERROR;
830     m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
831     if (U_FAILURE(status)) {
832         errln("Error setting collation normalization %s",
833               u_errorName(status));
834     }
835     while (BASIC[count].text != nullptr) {
836         if (!assertEqual(&BASIC[count])) {
837             infoln("Error at test number %d", count);
838         }
839         count ++;
840     }
841     count = 0;
842     while (NORMEXACT[count].text != nullptr) {
843         if (!assertEqual(&NORMEXACT[count])) {
844             infoln("Error at test number %d", count);
845         }
846         count ++;
847     }
848     m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
849     count = 0;
850     while (NONNORMEXACT[count].text != nullptr) {
851         if (!assertEqual(&NONNORMEXACT[count])) {
852             infoln("Error at test number %d", count);
853         }
854         count ++;
855     }
856 }
857 
TestStrength()858 void StringSearchTest::TestStrength()
859 {
860     int count = 0;
861     while (STRENGTH[count].text != nullptr) {
862         if (!assertEqual(&STRENGTH[count])) {
863             infoln("Error at test number %d", count);
864         }
865         count ++;
866     }
867 }
868 
869 #if !UCONFIG_NO_BREAK_ITERATION
870 
TestBreakIterator()871 void StringSearchTest::TestBreakIterator()
872 {
873     char16_t temp[128];
874     u_unescape(BREAKITERATOREXACT[0].text, temp, 128);
875     UnicodeString text;
876     text.setTo(temp, u_strlen(temp));
877     u_unescape(BREAKITERATOREXACT[0].pattern, temp, 128);
878     UnicodeString pattern;
879     pattern.setTo(temp, u_strlen(temp));
880 
881     UErrorCode status = U_ZERO_ERROR;
882     StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, nullptr,
883                                              status);
884     if (U_FAILURE(status)) {
885         errln("Error opening string search %s", u_errorName(status));
886     }
887 
888     strsrch->setBreakIterator(nullptr, status);
889     if (U_FAILURE(status) || strsrch->getBreakIterator() != nullptr) {
890         errln("Error usearch_getBreakIterator returned wrong object");
891     }
892 
893     strsrch->setBreakIterator(m_en_characterbreaker_, status);
894     if (U_FAILURE(status) ||
895         strsrch->getBreakIterator() != m_en_characterbreaker_) {
896         errln("Error usearch_getBreakIterator returned wrong object");
897     }
898 
899     strsrch->setBreakIterator(m_en_wordbreaker_, status);
900     if (U_FAILURE(status) ||
901         strsrch->getBreakIterator() != m_en_wordbreaker_) {
902         errln("Error usearch_getBreakIterator returned wrong object");
903     }
904 
905     delete strsrch;
906 
907     int count = 0;
908     while (count < 4) {
909         // special purposes for tests numbers 0-3
910         const SearchData        *search   = &(BREAKITERATOREXACT[count]);
911               RuleBasedCollator *collator = getCollator(search->collator);
912               BreakIterator     *breaker  = getBreakIterator(search->breaker);
913               StringSearch      *strsrch;
914 
915         u_unescape(search->text, temp, 128);
916         text.setTo(temp, u_strlen(temp));
917         u_unescape(search->pattern, temp, 128);
918         pattern.setTo(temp, u_strlen(temp));
919         if (breaker != nullptr) {
920             breaker->setText(text);
921         }
922         collator->setStrength(getECollationStrength(search->strength));
923 
924         strsrch = new StringSearch(pattern, text, collator, breaker, status);
925         if (U_FAILURE(status) ||
926             strsrch->getBreakIterator() != breaker) {
927             errln("Error setting break iterator");
928             delete strsrch;
929         }
930         if (!assertEqualWithStringSearch(strsrch, search)) {
931             collator->setStrength(getECollationStrength(UCOL_TERTIARY));
932             delete strsrch;
933         }
934         search   = &(BREAKITERATOREXACT[count + 1]);
935         breaker  = getBreakIterator(search->breaker);
936         if (breaker != nullptr) {
937             breaker->setText(text);
938         }
939         strsrch->setBreakIterator(breaker, status);
940         if (U_FAILURE(status) ||
941             strsrch->getBreakIterator() != breaker) {
942             errln("Error setting break iterator");
943             delete strsrch;
944         }
945         strsrch->reset();
946         if (!assertEqualWithStringSearch(strsrch, search)) {
947              infoln("Error at test number %d", count);
948         }
949         delete strsrch;
950         count += 2;
951     }
952     count = 0;
953     while (BREAKITERATOREXACT[count].text != nullptr) {
954          if (!assertEqual(&BREAKITERATOREXACT[count])) {
955              infoln("Error at test number %d", count);
956          }
957          count ++;
958     }
959 }
960 
961 #endif
962 
TestVariable()963 void StringSearchTest::TestVariable()
964 {
965     int count = 0;
966     UErrorCode status = U_ZERO_ERROR;
967     m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
968     if (U_FAILURE(status)) {
969         errln("Error setting collation alternate attribute %s",
970               u_errorName(status));
971     }
972     while (VARIABLE[count].text != nullptr) {
973         logln("variable %d", count);
974         if (!assertEqual(&VARIABLE[count])) {
975             infoln("Error at test number %d", count);
976         }
977         count ++;
978     }
979     m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE,
980                            status);
981 }
982 
TestOverlap()983 void StringSearchTest::TestOverlap()
984 {
985     int count = 0;
986     while (OVERLAP[count].text != nullptr) {
987         if (!assertEqualWithAttribute(&OVERLAP[count], USEARCH_OFF,
988                                       USEARCH_ON)) {
989             errln("Error at overlap test number %d", count);
990         }
991         count ++;
992     }
993     count = 0;
994     while (NONOVERLAP[count].text != nullptr) {
995         if (!assertEqual(&NONOVERLAP[count])) {
996             errln("Error at non overlap test number %d", count);
997         }
998         count ++;
999     }
1000 
1001     count = 0;
1002     while (count < 1) {
1003         const SearchData *search = &(OVERLAP[count]);
1004               char16_t    temp[128];
1005         u_unescape(search->text, temp, 128);
1006         UnicodeString text;
1007         text.setTo(temp, u_strlen(temp));
1008         u_unescape(search->pattern, temp, 128);
1009         UnicodeString pattern;
1010         pattern.setTo(temp, u_strlen(temp));
1011 
1012         RuleBasedCollator *collator = getCollator(search->collator);
1013         UErrorCode         status   = U_ZERO_ERROR;
1014         StringSearch      *strsrch  = new StringSearch(pattern, text,
1015                                                        collator, nullptr,
1016                                                        status);
1017 
1018         strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status);
1019         if (U_FAILURE(status) ||
1020             strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_ON) {
1021             errln("Error setting overlap option");
1022         }
1023         if (!assertEqualWithStringSearch(strsrch, search)) {
1024             delete strsrch;
1025             return;
1026         }
1027 
1028         search = &(NONOVERLAP[count]);
1029         strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_OFF, status);
1030         if (U_FAILURE(status) ||
1031             strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) {
1032             errln("Error setting overlap option");
1033         }
1034         strsrch->reset();
1035         if (!assertEqualWithStringSearch(strsrch, search)) {
1036             delete strsrch;
1037             errln("Error at test number %d", count);
1038          }
1039 
1040         count ++;
1041         delete strsrch;
1042     }
1043 }
1044 
TestCollator()1045 void StringSearchTest::TestCollator()
1046 {
1047     // test collator that thinks "o" and "p" are the same thing
1048     char16_t      temp[128];
1049     u_unescape(COLLATOR[0].text, temp, 128);
1050     UnicodeString text;
1051     text.setTo(temp, u_strlen(temp));
1052     u_unescape(COLLATOR[0].pattern, temp, 128);
1053     UnicodeString pattern;
1054     pattern.setTo(temp, u_strlen(temp));
1055 
1056     UErrorCode    status = U_ZERO_ERROR;
1057     StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, nullptr,
1058                                              status);
1059     if (U_FAILURE(status)) {
1060         errln("Error opening string search %s", u_errorName(status));
1061         delete strsrch;
1062         return;
1063     }
1064     if (!assertEqualWithStringSearch(strsrch, &COLLATOR[0])) {
1065         delete strsrch;
1066         return;
1067     }
1068 
1069     u_unescape(TESTCOLLATORRULE, temp, 128);
1070     UnicodeString rules;
1071     rules.setTo(temp, u_strlen(temp));
1072     RuleBasedCollator *tailored = new RuleBasedCollator(rules, status);
1073     tailored->setStrength(getECollationStrength(COLLATOR[1].strength));
1074 
1075     if (U_FAILURE(status)) {
1076         errln("Error opening rule based collator %s", u_errorName(status));
1077         delete strsrch;
1078         delete tailored;
1079         return;
1080     }
1081 
1082     strsrch->setCollator(tailored, status);
1083     if (U_FAILURE(status) || (*strsrch->getCollator()) != (*tailored)) {
1084         errln("Error setting rule based collator");
1085         delete strsrch;
1086         delete tailored;
1087     }
1088     strsrch->reset();
1089     if (!assertEqualWithStringSearch(strsrch, &COLLATOR[1])) {
1090         delete strsrch;
1091         delete tailored;
1092         return;
1093     }
1094 
1095     strsrch->setCollator(m_en_us_, status);
1096     strsrch->reset();
1097     if (U_FAILURE(status) || (*strsrch->getCollator()) != (*m_en_us_)) {
1098         errln("Error setting rule based collator");
1099         delete strsrch;
1100         delete tailored;
1101     }
1102     if (!assertEqualWithStringSearch(strsrch, &COLLATOR[0])) {
1103        errln("Error searching collator test");
1104     }
1105     delete strsrch;
1106     delete tailored;
1107 }
1108 
TestPattern()1109 void StringSearchTest::TestPattern()
1110 {
1111 
1112     char16_t temp[512];
1113     int templength;
1114     u_unescape(PATTERN[0].text, temp, 512);
1115     UnicodeString text;
1116     text.setTo(temp, u_strlen(temp));
1117     u_unescape(PATTERN[0].pattern, temp, 512);
1118     UnicodeString pattern;
1119     pattern.setTo(temp, u_strlen(temp));
1120 
1121     m_en_us_->setStrength(getECollationStrength(PATTERN[0].strength));
1122     UErrorCode    status = U_ZERO_ERROR;
1123     StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, nullptr,
1124                                              status);
1125 
1126     if (U_FAILURE(status)) {
1127         errln("Error opening string search %s", u_errorName(status));
1128         m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1129         delete strsrch;
1130         return;
1131     }
1132     if (strsrch->getPattern() != pattern) {
1133         errln("Error setting pattern");
1134     }
1135     if (!assertEqualWithStringSearch(strsrch, &PATTERN[0])) {
1136         m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1137         delete strsrch;
1138         return;
1139     }
1140 
1141     u_unescape(PATTERN[1].pattern, temp, 512);
1142     pattern.setTo(temp, u_strlen(temp));
1143     strsrch->setPattern(pattern, status);
1144     if (pattern != strsrch->getPattern()) {
1145         errln("Error setting pattern");
1146         m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1147         delete strsrch;
1148         return;
1149     }
1150     strsrch->reset();
1151     if (U_FAILURE(status)) {
1152         errln("Error setting pattern %s", u_errorName(status));
1153     }
1154     if (!assertEqualWithStringSearch(strsrch, &PATTERN[1])) {
1155         m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1156         delete strsrch;
1157         return;
1158     }
1159 
1160     u_unescape(PATTERN[0].pattern, temp, 512);
1161     pattern.setTo(temp, u_strlen(temp));
1162     strsrch->setPattern(pattern, status);
1163     if (pattern != strsrch->getPattern()) {
1164         errln("Error setting pattern");
1165         m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1166         delete strsrch;
1167         return;
1168     }
1169     strsrch->reset();
1170     if (U_FAILURE(status)) {
1171         errln("Error setting pattern %s", u_errorName(status));
1172     }
1173     if (!assertEqualWithStringSearch(strsrch, &PATTERN[0])) {
1174         m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1175         delete strsrch;
1176         return;
1177     }
1178     /* enormous pattern size to see if this crashes */
1179     for (templength = 0; templength != 512; templength ++) {
1180         temp[templength] = 0x61;
1181     }
1182     temp[511] = 0;
1183     pattern.setTo(temp, 511);
1184     strsrch->setPattern(pattern, status);
1185     if (U_FAILURE(status)) {
1186         errln("Error setting pattern with size 512, %s", u_errorName(status));
1187     }
1188     m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1189     delete strsrch;
1190 }
1191 
TestText()1192 void StringSearchTest::TestText()
1193 {
1194     char16_t temp[128];
1195     u_unescape(TEXT[0].text, temp, 128);
1196     UnicodeString text;
1197     text.setTo(temp, u_strlen(temp));
1198     u_unescape(TEXT[0].pattern, temp, 128);
1199     UnicodeString pattern;
1200     pattern.setTo(temp, u_strlen(temp));
1201 
1202     UErrorCode status = U_ZERO_ERROR;
1203     StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, nullptr,
1204                                              status);
1205     if (U_FAILURE(status)) {
1206         errln("Error opening string search %s", u_errorName(status));
1207         return;
1208     }
1209     if (text != strsrch->getText()) {
1210         errln("Error setting text");
1211     }
1212     if (!assertEqualWithStringSearch(strsrch, &TEXT[0])) {
1213         delete strsrch;
1214         return;
1215     }
1216 
1217     u_unescape(TEXT[1].text, temp, 128);
1218     text.setTo(temp, u_strlen(temp));
1219     strsrch->setText(text, status);
1220     if (text != strsrch->getText()) {
1221         errln("Error setting text");
1222         delete strsrch;
1223         return;
1224     }
1225     if (U_FAILURE(status)) {
1226         errln("Error setting text %s", u_errorName(status));
1227     }
1228     if (!assertEqualWithStringSearch(strsrch, &TEXT[1])) {
1229         delete strsrch;
1230         return;
1231     }
1232 
1233     u_unescape(TEXT[0].text, temp, 128);
1234     text.setTo(temp, u_strlen(temp));
1235     StringCharacterIterator chariter(text);
1236     strsrch->setText(chariter, status);
1237     if (text != strsrch->getText()) {
1238         errln("Error setting text");
1239         delete strsrch;
1240         return;
1241     }
1242     if (U_FAILURE(status)) {
1243         errln("Error setting pattern %s", u_errorName(status));
1244     }
1245     if (!assertEqualWithStringSearch(strsrch, &TEXT[0])) {
1246         errln("Error searching within set text");
1247     }
1248     delete strsrch;
1249 }
1250 
TestCompositeBoundaries()1251 void StringSearchTest::TestCompositeBoundaries()
1252 {
1253     int count = 0;
1254     while (COMPOSITEBOUNDARIES[count].text != nullptr) {
1255         logln("composite %d", count);
1256         if (!assertEqual(&COMPOSITEBOUNDARIES[count])) {
1257             errln("Error at test number %d", count);
1258         }
1259         count ++;
1260     }
1261 }
1262 
TestGetSetOffset()1263 void StringSearchTest::TestGetSetOffset()
1264 {
1265     UErrorCode     status  = U_ZERO_ERROR;
1266     UnicodeString  pattern("1234567890123456");
1267     UnicodeString  text("12345678901234567890123456789012");
1268     StringSearch  *strsrch = new StringSearch(pattern, text, m_en_us_,
1269                                               nullptr, status);
1270     /* testing out of bounds error */
1271     strsrch->setOffset(-1, status);
1272     if (U_SUCCESS(status)) {
1273         errln("Error expecting set offset error");
1274     }
1275     strsrch->setOffset(128, status);
1276     if (U_SUCCESS(status)) {
1277         errln("Error expecting set offset error");
1278     }
1279     int index   = 0;
1280     while (BASIC[index].text != nullptr) {
1281         UErrorCode  status      = U_ZERO_ERROR;
1282         SearchData  search      = BASIC[index ++];
1283         char16_t    temp[128];
1284 
1285         u_unescape(search.text, temp, 128);
1286         text.setTo(temp, u_strlen(temp));
1287         u_unescape(search.pattern, temp, 128);
1288         pattern.setTo(temp, u_strlen(temp));
1289         strsrch->setText(text, status);
1290         strsrch->setPattern(pattern, status);
1291         strsrch->getCollator()->setStrength(getECollationStrength(
1292                                                           search.strength));
1293         strsrch->reset();
1294 
1295         int count = 0;
1296         int32_t matchindex  = search.offset[count];
1297         while (U_SUCCESS(status) && matchindex >= 0) {
1298             int32_t matchlength = search.size[count];
1299             strsrch->next(status);
1300             if (matchindex != strsrch->getMatchedStart() ||
1301                 matchlength != strsrch->getMatchedLength()) {
1302                 char *str = toCharString(strsrch->getText());
1303                 errln("Text: %s", str);
1304                 str = toCharString(strsrch->getPattern());
1305                 errln("Pattern: %s", str);
1306                 errln("Error match found at %d %d",
1307                         strsrch->getMatchedStart(),
1308                         strsrch->getMatchedLength());
1309                 return;
1310             }
1311             matchindex = search.offset[count + 1] == -1 ? -1 :
1312                          search.offset[count + 2];
1313             if (search.offset[count + 1] != -1) {
1314                 strsrch->setOffset(search.offset[count + 1] + 1, status);
1315                 if (strsrch->getOffset() != search.offset[count + 1] + 1) {
1316                     errln("Error setting offset\n");
1317                     return;
1318                 }
1319             }
1320 
1321             count += 2;
1322         }
1323         strsrch->next(status);
1324         if (strsrch->getMatchedStart() != USEARCH_DONE) {
1325             char *str = toCharString(strsrch->getText());
1326             errln("Text: %s", str);
1327             str = toCharString(strsrch->getPattern());
1328             errln("Pattern: %s", str);
1329             errln("Error match found at %d %d",
1330                         strsrch->getMatchedStart(),
1331                         strsrch->getMatchedLength());
1332             return;
1333         }
1334     }
1335     strsrch->getCollator()->setStrength(getECollationStrength(
1336                                                              UCOL_TERTIARY));
1337     delete strsrch;
1338 }
1339 
TestGetSetAttribute()1340 void StringSearchTest::TestGetSetAttribute()
1341 {
1342     UErrorCode     status    = U_ZERO_ERROR;
1343     UnicodeString  pattern("pattern");
1344     UnicodeString  text("text");
1345     StringSearch  *strsrch = new StringSearch(pattern, text, m_en_us_, nullptr,
1346                                               status);
1347     if (U_FAILURE(status)) {
1348         errln("Error opening search %s", u_errorName(status));
1349         return;
1350     }
1351 
1352     strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_DEFAULT, status);
1353     if (U_FAILURE(status) ||
1354         strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) {
1355         errln("Error setting overlap to the default");
1356     }
1357     strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status);
1358     if (U_FAILURE(status) ||
1359         strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_ON) {
1360         errln("Error setting overlap true");
1361     }
1362     strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_OFF, status);
1363     if (U_FAILURE(status) ||
1364         strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) {
1365         errln("Error setting overlap false");
1366     }
1367     strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ATTRIBUTE_VALUE_COUNT,
1368                           status);
1369     if (U_SUCCESS(status)) {
1370         errln("Error setting overlap to illegal value");
1371     }
1372     status = U_ZERO_ERROR;
1373     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_DEFAULT, status);
1374     if (U_FAILURE(status) ||
1375         strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_OFF) {
1376         errln("Error setting canonical match to the default");
1377     }
1378     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1379     if (U_FAILURE(status) ||
1380         strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_ON) {
1381         errln("Error setting canonical match true");
1382     }
1383     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_OFF, status);
1384     if (U_FAILURE(status) ||
1385         strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_OFF) {
1386         errln("Error setting canonical match false");
1387     }
1388     strsrch->setAttribute(USEARCH_CANONICAL_MATCH,
1389                           USEARCH_ATTRIBUTE_VALUE_COUNT, status);
1390     if (U_SUCCESS(status)) {
1391         errln("Error setting canonical match to illegal value");
1392     }
1393     status = U_ZERO_ERROR;
1394     strsrch->setAttribute(USEARCH_ATTRIBUTE_COUNT, USEARCH_DEFAULT, status);
1395     if (U_SUCCESS(status)) {
1396         errln("Error setting illegal attribute success");
1397     }
1398 
1399     delete strsrch;
1400 }
1401 
TestGetMatch()1402 void StringSearchTest::TestGetMatch()
1403 {
1404     char16_t   temp[128];
1405     SearchData search = MATCH[0];
1406     u_unescape(search.text, temp, 128);
1407     UnicodeString text;
1408     text.setTo(temp, u_strlen(temp));
1409     u_unescape(search.pattern, temp, 128);
1410     UnicodeString pattern;
1411     pattern.setTo(temp, u_strlen(temp));
1412 
1413     UErrorCode    status  = U_ZERO_ERROR;
1414     StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, nullptr,
1415                                              status);
1416     if (U_FAILURE(status)) {
1417         errln("Error opening string search %s", u_errorName(status));
1418         delete strsrch;
1419         return;
1420     }
1421 
1422     int           count      = 0;
1423     int32_t   matchindex = search.offset[count];
1424     UnicodeString matchtext;
1425     while (U_SUCCESS(status) && matchindex >= 0) {
1426         int32_t matchlength = search.size[count];
1427         strsrch->next(status);
1428         if (matchindex != strsrch->getMatchedStart() ||
1429             matchlength != strsrch->getMatchedLength()) {
1430             char *str = toCharString(strsrch->getText());
1431             errln("Text: %s", str);
1432             str = toCharString(strsrch->getPattern());
1433             errln("Pattern: %s", str);
1434             errln("Error match found at %d %d", strsrch->getMatchedStart(),
1435                   strsrch->getMatchedLength());
1436             return;
1437         }
1438         count ++;
1439 
1440         status = U_ZERO_ERROR;
1441         strsrch->getMatchedText(matchtext);
1442         if (matchtext.length() != matchlength || U_FAILURE(status)){
1443             errln("Error getting match text");
1444         }
1445         matchindex = search.offset[count];
1446     }
1447     status = U_ZERO_ERROR;
1448     strsrch->next(status);
1449     if (strsrch->getMatchedStart()  != USEARCH_DONE ||
1450         strsrch->getMatchedLength() != 0) {
1451         errln("Error end of match not found");
1452     }
1453     status = U_ZERO_ERROR;
1454     strsrch->getMatchedText(matchtext);
1455     if (matchtext.length() != 0) {
1456         errln("Error getting null matches");
1457     }
1458     delete strsrch;
1459 }
1460 
TestSetMatch()1461 void StringSearchTest::TestSetMatch()
1462 {
1463     int count = 0;
1464     while (MATCH[count].text != nullptr) {
1465         SearchData     search = MATCH[count];
1466         char16_t       temp[128];
1467         UErrorCode status = U_ZERO_ERROR;
1468         u_unescape(search.text, temp, 128);
1469         UnicodeString text;
1470         text.setTo(temp, u_strlen(temp));
1471         u_unescape(search.pattern, temp, 128);
1472         UnicodeString pattern;
1473         pattern.setTo(temp, u_strlen(temp));
1474 
1475         StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_,
1476                                                  nullptr, status);
1477         if (U_FAILURE(status)) {
1478             errln("Error opening string search %s", u_errorName(status));
1479             delete strsrch;
1480             return;
1481         }
1482 
1483         int size = 0;
1484         while (search.offset[size] != -1) {
1485             size ++;
1486         }
1487 
1488         if (strsrch->first(status) != search.offset[0] || U_FAILURE(status)) {
1489             errln("Error getting first match");
1490         }
1491         if (strsrch->last(status) != search.offset[size -1] ||
1492             U_FAILURE(status)) {
1493             errln("Error getting last match");
1494         }
1495 
1496         int index = 0;
1497         while (index < size) {
1498             if (index + 2 < size) {
1499                 if (strsrch->following(search.offset[index + 2] - 1, status)
1500                          != search.offset[index + 2] || U_FAILURE(status)) {
1501                     errln("Error getting following match at index %d",
1502                           search.offset[index + 2] - 1);
1503                 }
1504             }
1505             if (index + 1 < size) {
1506                 if (strsrch->preceding(search.offset[index + 1] +
1507                                                 search.size[index + 1] + 1,
1508                                        status) != search.offset[index + 1] ||
1509                     U_FAILURE(status)) {
1510                     errln("Error getting preceding match at index %d",
1511                           search.offset[index + 1] + 1);
1512                 }
1513             }
1514             index += 2;
1515         }
1516         status = U_ZERO_ERROR;
1517         if (strsrch->following(text.length(), status) != USEARCH_DONE) {
1518             errln("Error expecting out of bounds match");
1519         }
1520         if (strsrch->preceding(0, status) != USEARCH_DONE) {
1521             errln("Error expecting out of bounds match");
1522         }
1523         count ++;
1524         delete strsrch;
1525     }
1526 }
1527 
TestReset()1528 void StringSearchTest::TestReset()
1529 {
1530     UErrorCode     status  = U_ZERO_ERROR;
1531     UnicodeString  text("fish fish");
1532     UnicodeString  pattern("s");
1533     StringSearch  *strsrch = new StringSearch(pattern, text, m_en_us_, nullptr,
1534                                               status);
1535     if (U_FAILURE(status)) {
1536         errln("Error opening string search %s", u_errorName(status));
1537         delete strsrch;
1538         return;
1539     }
1540     strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status);
1541     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1542     strsrch->setOffset(9, status);
1543     if (U_FAILURE(status)) {
1544         errln("Error setting attributes and offsets");
1545     }
1546     else {
1547         strsrch->reset();
1548         if (strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF ||
1549             strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_OFF ||
1550             strsrch->getOffset() != 0 || strsrch->getMatchedLength() != 0 ||
1551             strsrch->getMatchedStart() != USEARCH_DONE) {
1552             errln("Error resetting string search");
1553         }
1554         strsrch->previous(status);
1555         if (strsrch->getMatchedStart() != 7 ||
1556             strsrch->getMatchedLength() != 1) {
1557             errln("Error resetting string search\n");
1558         }
1559     }
1560     delete strsrch;
1561 }
1562 
TestSupplementary()1563 void StringSearchTest::TestSupplementary()
1564 {
1565     int count = 0;
1566     while (SUPPLEMENTARY[count].text != nullptr) {
1567         if (!assertEqual(&SUPPLEMENTARY[count])) {
1568             errln("Error at test number %d", count);
1569         }
1570         count ++;
1571     }
1572 }
1573 
TestContraction()1574 void StringSearchTest::TestContraction()
1575 {
1576     char16_t   temp[128];
1577     UErrorCode status = U_ZERO_ERROR;
1578 
1579     u_unescape(CONTRACTIONRULE, temp, 128);
1580     UnicodeString rules;
1581     rules.setTo(temp, u_strlen(temp));
1582     RuleBasedCollator *collator = new RuleBasedCollator(rules,
1583         getECollationStrength(UCOL_TERTIARY), UCOL_ON, status);
1584     if (U_FAILURE(status)) {
1585         errln("Error opening collator %s", u_errorName(status));
1586     }
1587     UnicodeString text("text");
1588     UnicodeString pattern("pattern");
1589     StringSearch *strsrch = new StringSearch(pattern, text, collator, nullptr,
1590                                              status);
1591     if (U_FAILURE(status)) {
1592         errln("Error opening string search %s", u_errorName(status));
1593     }
1594 
1595     int count = 0;
1596     while (CONTRACTION[count].text != nullptr) {
1597         u_unescape(CONTRACTION[count].text, temp, 128);
1598         text.setTo(temp, u_strlen(temp));
1599         u_unescape(CONTRACTION[count].pattern, temp, 128);
1600         pattern.setTo(temp, u_strlen(temp));
1601         strsrch->setText(text, status);
1602         strsrch->setPattern(pattern, status);
1603         if (!assertEqualWithStringSearch(strsrch, &CONTRACTION[count])) {
1604             errln("Error at test number %d", count);
1605         }
1606         count ++;
1607     }
1608     delete strsrch;
1609     delete collator;
1610 }
1611 
TestIgnorable()1612 void StringSearchTest::TestIgnorable()
1613 {
1614     char16_t temp[128];
1615     u_unescape(IGNORABLERULE, temp, 128);
1616     UnicodeString rules;
1617     rules.setTo(temp, u_strlen(temp));
1618     UErrorCode status = U_ZERO_ERROR;
1619     int        count  = 0;
1620     RuleBasedCollator *collator = new RuleBasedCollator(rules,
1621                             getECollationStrength(IGNORABLE[count].strength),
1622                             UCOL_ON, status);
1623     if (U_FAILURE(status)) {
1624         errln("Error opening collator %s", u_errorName(status));
1625         return;
1626     }
1627     UnicodeString pattern("pattern");
1628     UnicodeString text("text");
1629     StringSearch *strsrch = new StringSearch(pattern, text, collator, nullptr,
1630                                              status);
1631     if (U_FAILURE(status)) {
1632         errln("Error opening string search %s", u_errorName(status));
1633         delete collator;
1634         return;
1635     }
1636 
1637     while (IGNORABLE[count].text != nullptr) {
1638         u_unescape(IGNORABLE[count].text, temp, 128);
1639         text.setTo(temp, u_strlen(temp));
1640         u_unescape(IGNORABLE[count].pattern, temp, 128);
1641         pattern.setTo(temp, u_strlen(temp));
1642         strsrch->setText(text, status);
1643         strsrch->setPattern(pattern, status);
1644         if (!assertEqualWithStringSearch(strsrch, &IGNORABLE[count])) {
1645             errln("Error at test number %d", count);
1646         }
1647         count ++;
1648     }
1649     delete strsrch;
1650     delete collator;
1651 }
1652 
TestDiacriticMatch()1653 void StringSearchTest::TestDiacriticMatch()
1654 {
1655 	char16_t temp[128];
1656     UErrorCode status = U_ZERO_ERROR;
1657     int        count  = 0;
1658     RuleBasedCollator* coll = nullptr;
1659     StringSearch *strsrch = nullptr;
1660 
1661     UnicodeString pattern("pattern");
1662     UnicodeString text("text");
1663 
1664     const SearchData *search;
1665 
1666     search = &(DIACRITICMATCH[count]);
1667     while (search->text != nullptr) {
1668    		coll = getCollator(search->collator);
1669     	coll->setStrength(getECollationStrength(search->strength));
1670     	strsrch = new StringSearch(pattern, text, coll, getBreakIterator(search->breaker), status);
1671     	if (U_FAILURE(status)) {
1672 	        errln("Error opening string search %s", u_errorName(status));
1673 	        return;
1674 	    }
1675         u_unescape(search->text, temp, 128);
1676         text.setTo(temp, u_strlen(temp));
1677         u_unescape(search->pattern, temp, 128);
1678         pattern.setTo(temp, u_strlen(temp));
1679         strsrch->setText(text, status);
1680         strsrch->setPattern(pattern, status);
1681         if (!assertEqualWithStringSearch(strsrch, search)) {
1682             errln("Error at test number %d", count);
1683         }
1684         search = &(DIACRITICMATCH[++count]);
1685         delete strsrch;
1686     }
1687 
1688 }
1689 
TestCanonical()1690 void StringSearchTest::TestCanonical()
1691 {
1692     int count = 0;
1693     while (BASICCANONICAL[count].text != nullptr) {
1694         if (!assertCanonicalEqual(&BASICCANONICAL[count])) {
1695             errln("Error at test number %d", count);
1696         }
1697         count ++;
1698     }
1699 }
1700 
TestNormCanonical()1701 void StringSearchTest::TestNormCanonical()
1702 {
1703     UErrorCode status = U_ZERO_ERROR;
1704     m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
1705     int count = 0;
1706     while (NORMCANONICAL[count].text != nullptr) {
1707         if (!assertCanonicalEqual(&NORMCANONICAL[count])) {
1708             errln("Error at test number %d", count);
1709         }
1710         count ++;
1711     }
1712     m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
1713 }
1714 
TestStrengthCanonical()1715 void StringSearchTest::TestStrengthCanonical()
1716 {
1717     int count = 0;
1718     while (STRENGTHCANONICAL[count].text != nullptr) {
1719         if (!assertCanonicalEqual(&STRENGTHCANONICAL[count])) {
1720             errln("Error at test number %d", count);
1721         }
1722         count ++;
1723     }
1724 }
1725 
1726 #if !UCONFIG_NO_BREAK_ITERATION
1727 
TestBreakIteratorCanonical()1728 void StringSearchTest::TestBreakIteratorCanonical()
1729 {
1730     UErrorCode status = U_ZERO_ERROR;
1731     int        count  = 0;
1732 
1733     while (count < 4) {
1734         // special purposes for tests numbers 0-3
1735               char16_t        temp[128];
1736         const SearchData     *search   = &(BREAKITERATORCANONICAL[count]);
1737 
1738         u_unescape(search->text, temp, 128);
1739         UnicodeString text;
1740         text.setTo(temp, u_strlen(temp));
1741         u_unescape(search->pattern, temp, 128);
1742         UnicodeString pattern;
1743         pattern.setTo(temp, u_strlen(temp));
1744         RuleBasedCollator *collator = getCollator(search->collator);
1745         collator->setStrength(getECollationStrength(search->strength));
1746 
1747         BreakIterator *breaker = getBreakIterator(search->breaker);
1748         StringSearch  *strsrch = new StringSearch(pattern, text, collator,
1749                                                   breaker, status);
1750         if (U_FAILURE(status)) {
1751             errln("Error creating string search data");
1752             return;
1753         }
1754         strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1755         if (U_FAILURE(status) ||
1756             strsrch->getBreakIterator() != breaker) {
1757             errln("Error setting break iterator");
1758             delete strsrch;
1759             return;
1760         }
1761         if (!assertEqualWithStringSearch(strsrch, search)) {
1762             collator->setStrength(getECollationStrength(UCOL_TERTIARY));
1763             delete strsrch;
1764             return;
1765         }
1766         search  = &(BREAKITERATOREXACT[count + 1]);
1767         breaker = getBreakIterator(search->breaker);
1768         if (breaker == nullptr) {
1769             errln("Error creating BreakIterator");
1770             return;
1771         }
1772         breaker->setText(strsrch->getText());
1773         strsrch->setBreakIterator(breaker, status);
1774         if (U_FAILURE(status) || strsrch->getBreakIterator() != breaker) {
1775             errln("Error setting break iterator");
1776             delete strsrch;
1777             return;
1778         }
1779         strsrch->reset();
1780         strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1781         if (!assertEqualWithStringSearch(strsrch, search)) {
1782              errln("Error at test number %d", count);
1783              return;
1784         }
1785         delete strsrch;
1786         count += 2;
1787     }
1788     count = 0;
1789     while (BREAKITERATORCANONICAL[count].text != nullptr) {
1790          if (!assertEqual(&BREAKITERATORCANONICAL[count])) {
1791              errln("Error at test number %d", count);
1792              return;
1793          }
1794          count ++;
1795     }
1796 }
1797 
1798 #endif
1799 
TestVariableCanonical()1800 void StringSearchTest::TestVariableCanonical()
1801 {
1802     int count = 0;
1803     UErrorCode status = U_ZERO_ERROR;
1804     m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
1805     if (U_FAILURE(status)) {
1806         errln("Error setting collation alternate attribute %s",
1807               u_errorName(status));
1808     }
1809     while (VARIABLE[count].text != nullptr) {
1810         logln("variable %d", count);
1811         if (!assertCanonicalEqual(&VARIABLE[count])) {
1812             errln("Error at test number %d", count);
1813         }
1814         count ++;
1815     }
1816     m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE,
1817                            status);
1818 }
1819 
TestOverlapCanonical()1820 void StringSearchTest::TestOverlapCanonical()
1821 {
1822     int count = 0;
1823     while (OVERLAPCANONICAL[count].text != nullptr) {
1824         if (!assertEqualWithAttribute(&OVERLAPCANONICAL[count], USEARCH_ON,
1825                                       USEARCH_ON)) {
1826             errln("Error at overlap test number %d", count);
1827         }
1828         count ++;
1829     }
1830     count = 0;
1831     while (NONOVERLAP[count].text != nullptr) {
1832         if (!assertCanonicalEqual(&NONOVERLAPCANONICAL[count])) {
1833             errln("Error at non overlap test number %d", count);
1834         }
1835         count ++;
1836     }
1837 
1838     count = 0;
1839     while (count < 1) {
1840               char16_t    temp[128];
1841         const SearchData *search = &(OVERLAPCANONICAL[count]);
1842               UErrorCode  status = U_ZERO_ERROR;
1843 
1844         u_unescape(search->text, temp, 128);
1845         UnicodeString text;
1846         text.setTo(temp, u_strlen(temp));
1847         u_unescape(search->pattern, temp, 128);
1848         UnicodeString pattern;
1849         pattern.setTo(temp, u_strlen(temp));
1850         RuleBasedCollator *collator = getCollator(search->collator);
1851         StringSearch *strsrch = new StringSearch(pattern, text, collator,
1852                                                  nullptr, status);
1853         strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1854         strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status);
1855         if (U_FAILURE(status) ||
1856             strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_ON) {
1857             errln("Error setting overlap option");
1858         }
1859         if (!assertEqualWithStringSearch(strsrch, search)) {
1860             delete strsrch;
1861             return;
1862         }
1863         search = &(NONOVERLAPCANONICAL[count]);
1864         strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_OFF, status);
1865         if (U_FAILURE(status) ||
1866             strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) {
1867             errln("Error setting overlap option");
1868         }
1869         strsrch->reset();
1870         if (!assertEqualWithStringSearch(strsrch, search)) {
1871             delete strsrch;
1872             errln("Error at test number %d", count);
1873          }
1874 
1875         count ++;
1876         delete strsrch;
1877     }
1878 }
1879 
TestCollatorCanonical()1880 void StringSearchTest::TestCollatorCanonical()
1881 {
1882     /* test collator that thinks "o" and "p" are the same thing */
1883     char16_t temp[128];
1884     u_unescape(COLLATORCANONICAL[0].text, temp, 128);
1885     UnicodeString text;
1886     text.setTo(temp, u_strlen(temp));
1887     u_unescape(COLLATORCANONICAL[0].pattern, temp, 128);
1888     UnicodeString pattern;
1889     pattern.setTo(temp, u_strlen(temp));
1890 
1891     UErrorCode    status  = U_ZERO_ERROR;
1892     StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_,
1893                                              nullptr, status);
1894     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1895     if (U_FAILURE(status)) {
1896         errln("Error opening string search %s", u_errorName(status));
1897     }
1898     if (!assertEqualWithStringSearch(strsrch, &COLLATORCANONICAL[0])) {
1899         delete strsrch;
1900         return;
1901     }
1902 
1903     u_unescape(TESTCOLLATORRULE, temp, 128);
1904     UnicodeString rules;
1905     rules.setTo(temp, u_strlen(temp));
1906     RuleBasedCollator *tailored = new RuleBasedCollator(rules,
1907         getECollationStrength(COLLATORCANONICAL[1].strength),
1908         UCOL_ON, status);
1909 
1910     if (U_FAILURE(status)) {
1911         errln("Error opening rule based collator %s", u_errorName(status));
1912     }
1913 
1914     strsrch->setCollator(tailored, status);
1915     if (U_FAILURE(status) || *(strsrch->getCollator()) != *tailored) {
1916         errln("Error setting rule based collator");
1917     }
1918     strsrch->reset();
1919     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1920     if (!assertEqualWithStringSearch(strsrch, &COLLATORCANONICAL[1])) {
1921         delete strsrch;
1922         delete tailored;
1923 
1924         return;
1925     }
1926 
1927     strsrch->setCollator(m_en_us_, status);
1928     strsrch->reset();
1929     if (U_FAILURE(status) || *(strsrch->getCollator()) != *m_en_us_) {
1930         errln("Error setting rule based collator");
1931     }
1932     if (!assertEqualWithStringSearch(strsrch, &COLLATORCANONICAL[0])) {
1933     }
1934     delete strsrch;
1935     delete tailored;
1936 }
1937 
TestPatternCanonical()1938 void StringSearchTest::TestPatternCanonical()
1939 {
1940 
1941     char16_t temp[128];
1942 
1943     u_unescape(PATTERNCANONICAL[0].text, temp, 128);
1944     UnicodeString text;
1945     text.setTo(temp, u_strlen(temp));
1946     u_unescape(PATTERNCANONICAL[0].pattern, temp, 128);
1947     UnicodeString pattern;
1948     pattern.setTo(temp, u_strlen(temp));
1949 
1950     m_en_us_->setStrength(
1951                       getECollationStrength(PATTERNCANONICAL[0].strength));
1952 
1953     UErrorCode    status  = U_ZERO_ERROR;
1954     StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, nullptr,
1955                                              status);
1956     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1957     if (U_FAILURE(status)) {
1958         errln("Error opening string search %s", u_errorName(status));
1959         goto ENDTESTPATTERN;
1960     }
1961     if (pattern != strsrch->getPattern()) {
1962         errln("Error setting pattern");
1963     }
1964     if (!assertEqualWithStringSearch(strsrch, &PATTERNCANONICAL[0])) {
1965         goto ENDTESTPATTERN;
1966     }
1967 
1968     u_unescape(PATTERNCANONICAL[1].pattern, temp, 128);
1969     pattern.setTo(temp, u_strlen(temp));
1970     strsrch->setPattern(pattern, status);
1971     if (pattern != strsrch->getPattern()) {
1972         errln("Error setting pattern");
1973         goto ENDTESTPATTERN;
1974     }
1975     strsrch->reset();
1976     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1977     if (U_FAILURE(status)) {
1978         errln("Error setting pattern %s", u_errorName(status));
1979     }
1980     if (!assertEqualWithStringSearch(strsrch, &PATTERNCANONICAL[1])) {
1981         goto ENDTESTPATTERN;
1982     }
1983 
1984     u_unescape(PATTERNCANONICAL[0].pattern, temp, 128);
1985     pattern.setTo(temp, u_strlen(temp));
1986     strsrch->setPattern(pattern, status);
1987     if (pattern != strsrch->getPattern()) {
1988         errln("Error setting pattern");
1989         goto ENDTESTPATTERN;
1990     }
1991     strsrch->reset();
1992     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1993     if (U_FAILURE(status)) {
1994         errln("Error setting pattern %s", u_errorName(status));
1995     }
1996     if (!assertEqualWithStringSearch(strsrch, &PATTERNCANONICAL[0])) {
1997         goto ENDTESTPATTERN;
1998     }
1999 ENDTESTPATTERN:
2000     m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
2001     delete strsrch;
2002 }
2003 
TestTextCanonical()2004 void StringSearchTest::TestTextCanonical()
2005 {
2006     char16_t temp[128];
2007     u_unescape(TEXTCANONICAL[0].text, temp, 128);
2008     UnicodeString text;
2009     text.setTo(temp, u_strlen(temp));
2010     u_unescape(TEXTCANONICAL[0].pattern, temp, 128);
2011     UnicodeString pattern;
2012     pattern.setTo(temp, u_strlen(temp));
2013 
2014     UErrorCode    status  = U_ZERO_ERROR;
2015     StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, nullptr,
2016                                              status);
2017     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
2018 
2019     if (U_FAILURE(status)) {
2020         errln("Error opening string search %s", u_errorName(status));
2021         goto ENDTESTPATTERN;
2022     }
2023     if (text != strsrch->getText()) {
2024         errln("Error setting text");
2025     }
2026     if (!assertEqualWithStringSearch(strsrch, &TEXTCANONICAL[0])) {
2027         goto ENDTESTPATTERN;
2028     }
2029 
2030     u_unescape(TEXTCANONICAL[1].text, temp, 128);
2031     text.setTo(temp, u_strlen(temp));
2032     strsrch->setText(text, status);
2033     if (text != strsrch->getText()) {
2034         errln("Error setting text");
2035         goto ENDTESTPATTERN;
2036     }
2037     if (U_FAILURE(status)) {
2038         errln("Error setting text %s", u_errorName(status));
2039     }
2040     if (!assertEqualWithStringSearch(strsrch, &TEXTCANONICAL[1])) {
2041         goto ENDTESTPATTERN;
2042     }
2043 
2044     u_unescape(TEXTCANONICAL[0].text, temp, 128);
2045     text.setTo(temp, u_strlen(temp));
2046     strsrch->setText(text, status);
2047     if (text != strsrch->getText()) {
2048         errln("Error setting text");
2049         goto ENDTESTPATTERN;
2050     }
2051     if (U_FAILURE(status)) {
2052         errln("Error setting pattern %s", u_errorName(status));
2053     }
2054     if (!assertEqualWithStringSearch(strsrch, &TEXTCANONICAL[0])) {
2055         goto ENDTESTPATTERN;
2056     }
2057 ENDTESTPATTERN:
2058     delete strsrch;
2059 }
2060 
TestCompositeBoundariesCanonical()2061 void StringSearchTest::TestCompositeBoundariesCanonical()
2062 {
2063     int count = 0;
2064     while (COMPOSITEBOUNDARIESCANONICAL[count].text != nullptr) {
2065         logln("composite %d", count);
2066         if (!assertCanonicalEqual(&COMPOSITEBOUNDARIESCANONICAL[count])) {
2067             errln("Error at test number %d", count);
2068         }
2069         count ++;
2070     }
2071 }
2072 
TestGetSetOffsetCanonical()2073 void StringSearchTest::TestGetSetOffsetCanonical()
2074 {
2075 
2076     UErrorCode     status  = U_ZERO_ERROR;
2077     UnicodeString  text("text");
2078     UnicodeString  pattern("pattern");
2079     StringSearch  *strsrch = new StringSearch(pattern, text, m_en_us_, nullptr,
2080                                               status);
2081     Collator *collator = strsrch->getCollator();
2082 
2083     collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
2084 
2085     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
2086     /* testing out of bounds error */
2087     strsrch->setOffset(-1, status);
2088     if (U_SUCCESS(status)) {
2089         errln("Error expecting set offset error");
2090     }
2091     strsrch->setOffset(128, status);
2092     if (U_SUCCESS(status)) {
2093         errln("Error expecting set offset error");
2094     }
2095     int   index   = 0;
2096     char16_t temp[128];
2097     while (BASICCANONICAL[index].text != nullptr) {
2098         SearchData  search      = BASICCANONICAL[index ++];
2099         if (BASICCANONICAL[index].text == nullptr) {
2100             /* skip the last one */
2101             break;
2102         }
2103 
2104         u_unescape(search.text, temp, 128);
2105         text.setTo(temp, u_strlen(temp));
2106         u_unescape(search.pattern, temp, 128);
2107         pattern.setTo(temp, u_strlen(temp));
2108 
2109         UErrorCode  status      = U_ZERO_ERROR;
2110         strsrch->setText(text, status);
2111 
2112         strsrch->setPattern(pattern, status);
2113 
2114         int         count       = 0;
2115         int32_t matchindex  = search.offset[count];
2116         while (U_SUCCESS(status) && matchindex >= 0) {
2117             int32_t matchlength = search.size[count];
2118             strsrch->next(status);
2119             if (matchindex != strsrch->getMatchedStart() ||
2120                 matchlength != strsrch->getMatchedLength()) {
2121                 char *str = toCharString(strsrch->getText());
2122                 errln("Text: %s", str);
2123                 str = toCharString(strsrch->getPattern());
2124                 errln("Pattern: %s", str);
2125                 errln("Error match found at %d %d",
2126                       strsrch->getMatchedStart(),
2127                       strsrch->getMatchedLength());
2128                 goto bail;
2129             }
2130             matchindex = search.offset[count + 1] == -1 ? -1 :
2131                          search.offset[count + 2];
2132             if (search.offset[count + 1] != -1) {
2133                 strsrch->setOffset(search.offset[count + 1] + 1, status);
2134                 if (strsrch->getOffset() != search.offset[count + 1] + 1) {
2135                     errln("Error setting offset");
2136                     goto bail;
2137                 }
2138             }
2139 
2140             count += 2;
2141         }
2142         strsrch->next(status);
2143         if (strsrch->getMatchedStart() != USEARCH_DONE) {
2144             char *str = toCharString(strsrch->getText());
2145             errln("Text: %s", str);
2146             str = toCharString(strsrch->getPattern());
2147             errln("Pattern: %s", str);
2148             errln("Error match found at %d %d", strsrch->getMatchedStart(),
2149                    strsrch->getMatchedLength());
2150             goto bail;
2151         }
2152     }
2153 
2154 bail:
2155     collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
2156     delete strsrch;
2157 }
2158 
TestSupplementaryCanonical()2159 void StringSearchTest::TestSupplementaryCanonical()
2160 {
2161     int count = 0;
2162     while (SUPPLEMENTARYCANONICAL[count].text != nullptr) {
2163         if (!assertCanonicalEqual(&SUPPLEMENTARYCANONICAL[count])) {
2164             errln("Error at test number %d", count);
2165         }
2166         count ++;
2167     }
2168 }
2169 
TestContractionCanonical()2170 void StringSearchTest::TestContractionCanonical()
2171 {
2172     char16_t       temp[128];
2173 
2174     u_unescape(CONTRACTIONRULE, temp, 128);
2175     UnicodeString rules;
2176     rules.setTo(temp, u_strlen(temp));
2177 
2178     UErrorCode         status   = U_ZERO_ERROR;
2179     RuleBasedCollator *collator = new RuleBasedCollator(rules,
2180         getECollationStrength(UCOL_TERTIARY), UCOL_ON, status);
2181     if (U_FAILURE(status)) {
2182         errln("Error opening collator %s", u_errorName(status));
2183     }
2184     UnicodeString text("text");
2185     UnicodeString pattern("pattern");
2186     StringSearch *strsrch = new StringSearch(pattern, text, collator, nullptr,
2187                                              status);
2188     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
2189     if (U_FAILURE(status)) {
2190         errln("Error opening string search %s", u_errorName(status));
2191     }
2192 
2193     int count = 0;
2194     while (CONTRACTIONCANONICAL[count].text != nullptr) {
2195         u_unescape(CONTRACTIONCANONICAL[count].text, temp, 128);
2196         text.setTo(temp, u_strlen(temp));
2197         u_unescape(CONTRACTIONCANONICAL[count].pattern, temp, 128);
2198         pattern.setTo(temp, u_strlen(temp));
2199         strsrch->setText(text, status);
2200         strsrch->setPattern(pattern, status);
2201         if (!assertEqualWithStringSearch(strsrch,
2202                                              &CONTRACTIONCANONICAL[count])) {
2203             errln("Error at test number %d", count);
2204         }
2205         count ++;
2206     }
2207     delete strsrch;
2208     delete collator;
2209 }
2210 
TestUClassID()2211 void StringSearchTest::TestUClassID()
2212 {
2213     char id = *((char *)StringSearch::getStaticClassID());
2214     if (id != 0) {
2215         errln("Static class id for StringSearch should be 0");
2216     }
2217     UErrorCode     status    = U_ZERO_ERROR;
2218     UnicodeString  text("text");
2219     UnicodeString  pattern("pattern");
2220     StringSearch  *strsrch = new StringSearch(pattern, text, m_en_us_, nullptr,
2221                                               status);
2222     id = *((char *)strsrch->getDynamicClassID());
2223     if (id != 0) {
2224         errln("Dynamic class id for StringSearch should be 0");
2225     }
2226     delete strsrch;
2227 }
2228 
2229 class TestSearch : public SearchIterator
2230 {
2231 public:
2232     TestSearch(const TestSearch &obj);
2233     TestSearch(const UnicodeString &text,
2234                BreakIterator *breakiter,
2235                const UnicodeString &pattern);
2236     ~TestSearch();
2237 
2238     void        setOffset(int32_t position, UErrorCode &status) override;
2239     int32_t     getOffset() const override;
2240     SearchIterator* safeClone() const override;
2241 
2242 
2243     /**
2244      * ICU "poor man's RTTI", returns a UClassID for the actual class.
2245      *
2246      * @draft ICU 2.2
2247      */
getDynamicClassID() const2248     virtual inline UClassID getDynamicClassID() const override { return getStaticClassID(); }
2249 
2250     /**
2251      * ICU "poor man's RTTI", returns a UClassID for this class.
2252      *
2253      * @draft ICU 2.2
2254      */
getStaticClassID()2255     static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
2256 
2257     bool operator!=(const TestSearch &that) const;
2258 
2259     UnicodeString m_pattern_;
2260 
2261 protected:
2262     int32_t      handleNext(int32_t position, UErrorCode &status) override;
2263     int32_t      handlePrev(int32_t position, UErrorCode &status) override;
2264     TestSearch & operator=(const TestSearch &that);
2265 
2266 private:
2267 
2268     /**
2269      * The address of this static class variable serves as this class's ID
2270      * for ICU "poor man's RTTI".
2271      */
2272     static const char fgClassID;
2273     uint32_t m_offset_;
2274 };
2275 
2276 const char TestSearch::fgClassID=0;
2277 
TestSearch(const TestSearch & obj)2278 TestSearch::TestSearch(const TestSearch &obj) : SearchIterator(obj)
2279 {
2280     m_offset_ = obj.m_offset_;
2281     m_pattern_ = obj.m_pattern_;
2282 }
2283 
TestSearch(const UnicodeString & text,BreakIterator * breakiter,const UnicodeString & pattern)2284 TestSearch::TestSearch(const UnicodeString &text,
2285                        BreakIterator *breakiter,
2286                        const UnicodeString &pattern) : SearchIterator()
2287 {
2288     m_breakiterator_ = breakiter;
2289     m_pattern_ = pattern;
2290     m_text_ = text;
2291     m_offset_ = 0;
2292     m_pattern_ = pattern;
2293 }
2294 
~TestSearch()2295 TestSearch::~TestSearch()
2296 {
2297 }
2298 
2299 
setOffset(int32_t position,UErrorCode & status)2300 void TestSearch::setOffset(int32_t position, UErrorCode &status)
2301 {
2302     if (position >= 0 && position <= m_text_.length()) {
2303         m_offset_ = position;
2304     }
2305     else {
2306         status = U_INDEX_OUTOFBOUNDS_ERROR;
2307     }
2308 }
2309 
getOffset() const2310 int32_t TestSearch::getOffset() const
2311 {
2312     return m_offset_;
2313 }
2314 
safeClone() const2315 SearchIterator * TestSearch::safeClone() const
2316 {
2317     return new TestSearch(m_text_, m_breakiterator_, m_pattern_);
2318 }
2319 
operator !=(const TestSearch & that) const2320 bool TestSearch::operator!=(const TestSearch &that) const
2321 {
2322     if (SearchIterator::operator !=(that)) {
2323         return false;
2324     }
2325     return m_offset_ != that.m_offset_ || m_pattern_ != that.m_pattern_;
2326 }
2327 
handleNext(int32_t start,UErrorCode & status)2328 int32_t TestSearch::handleNext(int32_t start, UErrorCode &status)
2329 {
2330   if(U_SUCCESS(status)) {
2331     int match = m_text_.indexOf(m_pattern_, start);
2332     if (match < 0) {
2333         m_offset_ = m_text_.length();
2334         setMatchStart(m_offset_);
2335         setMatchLength(0);
2336         return USEARCH_DONE;
2337     }
2338     setMatchStart(match);
2339     m_offset_ = match;
2340     setMatchLength(m_pattern_.length());
2341     return match;
2342   } else {
2343     return USEARCH_DONE;
2344   }
2345 }
2346 
handlePrev(int32_t start,UErrorCode & status)2347 int32_t TestSearch::handlePrev(int32_t start, UErrorCode &status)
2348 {
2349   if(U_SUCCESS(status)) {
2350     int match = m_text_.lastIndexOf(m_pattern_, 0, start);
2351     if (match < 0) {
2352         m_offset_ = 0;
2353         setMatchStart(m_offset_);
2354         setMatchLength(0);
2355         return USEARCH_DONE;
2356     }
2357     setMatchStart(match);
2358     m_offset_ = match;
2359     setMatchLength(m_pattern_.length());
2360     return match;
2361   } else {
2362     return USEARCH_DONE;
2363   }
2364 }
2365 
operator =(const TestSearch & that)2366 TestSearch & TestSearch::operator=(const TestSearch &that)
2367 {
2368     SearchIterator::operator=(that);
2369     m_offset_ = that.m_offset_;
2370     m_pattern_ = that.m_pattern_;
2371     return *this;
2372 }
2373 
TestSubclass()2374 void StringSearchTest::TestSubclass()
2375 {
2376     UnicodeString text("abc abcd abc");
2377     UnicodeString pattern("abc");
2378     TestSearch search(text, nullptr, pattern);
2379     TestSearch search2(search);
2380     int expected[] = {0, 4, 9};
2381     UErrorCode status = U_ZERO_ERROR;
2382     int i;
2383     StringCharacterIterator chariter(text);
2384 
2385     search.setText(text, status);
2386     if (search.getText() != search2.getText()) {
2387         errln("Error setting text");
2388     }
2389 
2390     search.setText(chariter, status);
2391     if (search.getText() != search2.getText()) {
2392         errln("Error setting text");
2393     }
2394 
2395     search.reset();
2396     // comparing constructors
2397 
2398     for (i = 0; i < UPRV_LENGTHOF(expected); i ++) {
2399         if (search.next(status) != expected[i]) {
2400             errln("Error getting next match");
2401         }
2402         if (search.getMatchedLength() != search.m_pattern_.length()) {
2403             errln("Error getting next match length");
2404         }
2405     }
2406     if (search.next(status) != USEARCH_DONE) {
2407         errln("Error should have reached the end of the iteration");
2408     }
2409     for (i = UPRV_LENGTHOF(expected) - 1; i >= 0; i --) {
2410         if (search.previous(status) != expected[i]) {
2411             errln("Error getting previous match");
2412         }
2413         if (search.getMatchedLength() != search.m_pattern_.length()) {
2414             errln("Error getting previous match length");
2415         }
2416     }
2417     if (search.previous(status) != USEARCH_DONE) {
2418         errln("Error should have reached the start of the iteration");
2419     }
2420 }
2421 
2422 class StubSearchIterator:public SearchIterator{
2423 public:
StubSearchIterator()2424     StubSearchIterator(){}
setOffset(int32_t,UErrorCode &)2425     virtual void setOffset(int32_t , UErrorCode &) override {}
getOffset() const2426     virtual int32_t getOffset() const override {return 0;}
safeClone() const2427     virtual SearchIterator* safeClone() const override {return nullptr;}
handleNext(int32_t,UErrorCode &)2428     virtual int32_t handleNext(int32_t , UErrorCode &) override {return 0;}
handlePrev(int32_t,UErrorCode &)2429     virtual int32_t handlePrev(int32_t , UErrorCode &) override {return 0;}
getDynamicClassID() const2430     virtual UClassID getDynamicClassID() const override {
2431         static char classID = 0;
2432         return (UClassID)&classID;
2433     }
2434 };
2435 
TestCoverage()2436 void StringSearchTest::TestCoverage(){
2437     StubSearchIterator stub1, stub2;
2438     UErrorCode status = U_ZERO_ERROR;
2439 
2440     if (stub1 != stub2){
2441         errln("new StubSearchIterator should be equal");
2442     }
2443 
2444     stub2.setText(UnicodeString("ABC"), status);
2445     if (U_FAILURE(status)) {
2446         errln("Error: SearchIterator::SetText");
2447     }
2448 
2449     stub1 = stub2;
2450     if (stub1 != stub2){
2451         errln("SearchIterator::operator =  assigned object should be equal");
2452     }
2453 }
2454 
2455 #endif /* !UCONFIG_NO_BREAK_ITERATION */
2456 
2457 #endif /* #if !UCONFIG_NO_COLLATION */
2458