• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * Copyright (c) 1999-2016, International Business Machines
5  * Corporation and others. All Rights Reserved.
6  ********************************************************************
7  *   Date        Name        Description
8  *   12/14/99    Madhu        Creation.
9  *   01/12/2000  Madhu        updated for changed API
10  ********************************************************************/
11 
12 #include "unicode/utypes.h"
13 
14 #if !UCONFIG_NO_BREAK_ITERATION
15 
16 #include "unicode/uchar.h"
17 #include "intltest.h"
18 #include "unicode/rbbi.h"
19 #include "unicode/schriter.h"
20 #include "rbbiapts.h"
21 #include "rbbidata.h"
22 #include "cstring.h"
23 #include "ubrkimpl.h"
24 #include "unicode/locid.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utext.h"
27 #include "cmemory.h"
28 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
29 #include "unicode/filteredbrk.h"
30 #include <stdio.h> // for sprintf
31 #endif
32 /**
33  * API Test the RuleBasedBreakIterator class
34  */
35 
36 
37 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
38 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
39 
40 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
41     errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
42 
TestCloneEquals()43 void RBBIAPITest::TestCloneEquals()
44 {
45 
46     UErrorCode status=U_ZERO_ERROR;
47     RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
48     RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
49     RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
50     RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
51     if(U_FAILURE(status)){
52         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
53         return;
54     }
55 
56 
57     UnicodeString testString="Testing word break iterators's clone() and equals()";
58     bi1->setText(testString);
59     bi2->setText(testString);
60     biequal->setText(testString);
61 
62     bi3->setText("hello");
63 
64     logln((UnicodeString)"Testing equals()");
65 
66     logln((UnicodeString)"Testing == and !=");
67     UBool b = (*bi1 != *biequal);
68     b |= *bi1 == *bi2;
69     b |= *bi1 == *bi3;
70     if (b) {
71         errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
72     }
73 
74     if(*bi2 == *biequal || *bi2 == *bi1  || *biequal == *bi3)
75         errln((UnicodeString)"ERROR:2 RBBI's == and != operator  failed.");
76 
77 
78     // Quick test of RulesBasedBreakIterator assignment -
79     // Check that
80     //    two different iterators are !=
81     //    they are == after assignment
82     //    source and dest iterator produce the same next() after assignment.
83     //    deleting one doesn't disable the other.
84     logln("Testing assignment");
85     RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
86     if(U_FAILURE(status)){
87         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
88         return;
89     }
90 
91     RuleBasedBreakIterator biDefault, biDefault2;
92     if(U_FAILURE(status)){
93         errln((UnicodeString)"FAIL : in construction of default iterator");
94         return;
95     }
96     if (biDefault == *bix) {
97         errln((UnicodeString)"ERROR: iterators should not compare ==");
98         return;
99     }
100     if (biDefault != biDefault2) {
101         errln((UnicodeString)"ERROR: iterators should compare ==");
102         return;
103     }
104 
105 
106     UnicodeString   HelloString("Hello Kitty");
107     bix->setText(HelloString);
108     if (*bix == *bi2) {
109         errln(UnicodeString("ERROR: strings should not be equal before assignment."));
110     }
111     *bix = *bi2;
112     if (*bix != *bi2) {
113         errln(UnicodeString("ERROR: strings should be equal before assignment."));
114     }
115 
116     int bixnext = bix->next();
117     int bi2next = bi2->next();
118     if (! (bixnext == bi2next && bixnext == 7)) {
119         errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
120     }
121     delete bix;
122     if (bi2->next() != 8) {
123         errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
124     }
125 
126 
127 
128     logln((UnicodeString)"Testing clone()");
129     RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
130     RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
131 
132     if(*bi1clone != *bi1 || *bi1clone  != *biequal  ||
133       *bi1clone == *bi3 || *bi1clone == *bi2)
134         errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
135 
136     if(*bi2clone == *bi1 || *bi2clone == *biequal ||
137        *bi2clone == *bi3 || *bi2clone != *bi2)
138         errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
139 
140     if(bi1->getText() != bi1clone->getText()   ||
141        bi2clone->getText() != bi2->getText()   ||
142        *bi2clone == *bi1clone )
143         errln((UnicodeString)"ERROR: RBBI's clone() method failed");
144 
145     delete bi1clone;
146     delete bi2clone;
147     delete bi1;
148     delete bi3;
149     delete bi2;
150     delete biequal;
151 }
152 
TestBoilerPlate()153 void RBBIAPITest::TestBoilerPlate()
154 {
155     UErrorCode status = U_ZERO_ERROR;
156     BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
157     BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
158     if (U_FAILURE(status)) {
159         errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
160         return;
161     }
162     if(*a!=*b){
163         errln("Failed: boilerplate method operator!= does not return correct results");
164     }
165     // Japanese word break iterators are identical to root with
166     // a dictionary-based break iterator
167     BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
168     BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
169     if(c && d){
170         if(*c!=*d){
171             errln("Failed: boilerplate method operator== does not return correct results");
172         }
173     }else{
174         errln("creation of break iterator failed");
175     }
176     delete a;
177     delete b;
178     delete c;
179     delete d;
180 }
181 
TestgetRules()182 void RBBIAPITest::TestgetRules()
183 {
184     UErrorCode status=U_ZERO_ERROR;
185 
186     RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
187     RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
188     if(U_FAILURE(status)){
189         errcheckln(status, "FAIL: in construction - %s", u_errorName(status));
190         delete bi1;
191         delete bi2;
192         return;
193     }
194 
195 
196 
197     logln((UnicodeString)"Testing toString()");
198 
199     bi1->setText((UnicodeString)"Hello there");
200 
201     RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
202 
203     UnicodeString temp=bi1->getRules();
204     UnicodeString temp2=bi2->getRules();
205     UnicodeString temp3=bi3->getRules();
206     if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
207         errln((UnicodeString)"ERROR: error in getRules() method");
208 
209     delete bi1;
210     delete bi2;
211     delete bi3;
212 }
TestHashCode()213 void RBBIAPITest::TestHashCode()
214 {
215     UErrorCode status=U_ZERO_ERROR;
216     RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
217     RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
218     RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
219     if(U_FAILURE(status)){
220         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
221         delete bi1;
222         delete bi2;
223         delete bi3;
224         return;
225     }
226 
227 
228     logln((UnicodeString)"Testing hashCode()");
229 
230     bi1->setText((UnicodeString)"Hash code");
231     bi2->setText((UnicodeString)"Hash code");
232     bi3->setText((UnicodeString)"Hash code");
233 
234     RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
235     RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
236 
237     if(bi1->hashCode() != bi1clone->hashCode() ||  bi1->hashCode() != bi3->hashCode() ||
238         bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
239         errln((UnicodeString)"ERROR: identical objects have different hashcodes");
240 
241     if(bi1->hashCode() == bi2->hashCode() ||  bi2->hashCode() == bi3->hashCode() ||
242         bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
243         errln((UnicodeString)"ERROR: different objects have same hashcodes");
244 
245     delete bi1clone;
246     delete bi2clone;
247     delete bi1;
248     delete bi2;
249     delete bi3;
250 
251 }
TestGetSetAdoptText()252 void RBBIAPITest::TestGetSetAdoptText()
253 {
254     logln((UnicodeString)"Testing getText setText ");
255     IcuTestErrorCode status(*this, "TestGetSetAdoptText");
256     UnicodeString str1="first string.";
257     UnicodeString str2="Second string.";
258     LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
259     LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
260     if(status.isFailure()){
261         errcheckln(status, "Fail : in construction - %s", status.errorName());
262             return;
263     }
264 
265 
266     CharacterIterator* text1= new StringCharacterIterator(str1);
267     CharacterIterator* text1Clone = text1->clone();
268     CharacterIterator* text2= new StringCharacterIterator(str2);
269     CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"
270 
271     wordIter1->setText(str1);
272     CharacterIterator *tci = &wordIter1->getText();
273     UnicodeString      tstr;
274     tci->getText(tstr);
275     TEST_ASSERT(tstr == str1);
276     if(wordIter1->current() != 0)
277         errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
278 
279     wordIter1->next(2);
280 
281     wordIter1->setText(str2);
282     if(wordIter1->current() != 0)
283         errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
284 
285 
286     charIter1->adoptText(text1Clone);
287     TEST_ASSERT(wordIter1->getText() != charIter1->getText());
288     tci = &wordIter1->getText();
289     tci->getText(tstr);
290     TEST_ASSERT(tstr == str2);
291     tci = &charIter1->getText();
292     tci->getText(tstr);
293     TEST_ASSERT(tstr == str1);
294 
295 
296     LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
297     rb->adoptText(text1);
298     if(rb->getText() != *text1)
299         errln((UnicodeString)"ERROR:1 error in adoptText ");
300     rb->adoptText(text2);
301     if(rb->getText() != *text2)
302         errln((UnicodeString)"ERROR:2 error in adoptText ");
303 
304     // Adopt where iterator range is less than the entire orignal source string.
305     //   (With the change of the break engine to working with UText internally,
306     //    CharacterIterators starting at positions other than zero are not supported)
307     rb->adoptText(text3);
308     TEST_ASSERT(rb->preceding(2) == 0);
309     TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
310     //if(rb->preceding(2) != 3) {
311     //    errln((UnicodeString)"ERROR:3 error in adoptText ");
312     //}
313     //if(rb->following(11) != BreakIterator::DONE) {
314     //    errln((UnicodeString)"ERROR:4 error in adoptText ");
315     //}
316 
317     // UText API
318     //
319     //   Quick test to see if UText is working at all.
320     //
321     const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
322     const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
323     //                012345678901
324 
325     status.reset();
326     LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
327     wordIter1->setText(ut.getAlias(), status);
328     TEST_ASSERT_SUCCESS(status);
329 
330     int32_t pos;
331     pos = wordIter1->first();
332     TEST_ASSERT(pos==0);
333     pos = wordIter1->next();
334     TEST_ASSERT(pos==5);
335     pos = wordIter1->next();
336     TEST_ASSERT(pos==6);
337     pos = wordIter1->next();
338     TEST_ASSERT(pos==11);
339     pos = wordIter1->next();
340     TEST_ASSERT(pos==UBRK_DONE);
341 
342     status.reset();
343     LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
344     TEST_ASSERT_SUCCESS(status);
345     wordIter1->setText(ut2.getAlias(), status);
346     TEST_ASSERT_SUCCESS(status);
347 
348     pos = wordIter1->first();
349     TEST_ASSERT(pos==0);
350     pos = wordIter1->next();
351     TEST_ASSERT(pos==3);
352     pos = wordIter1->next();
353     TEST_ASSERT(pos==4);
354 
355     pos = wordIter1->last();
356     TEST_ASSERT(pos==6);
357     pos = wordIter1->previous();
358     TEST_ASSERT(pos==4);
359     pos = wordIter1->previous();
360     TEST_ASSERT(pos==3);
361     pos = wordIter1->previous();
362     TEST_ASSERT(pos==0);
363     pos = wordIter1->previous();
364     TEST_ASSERT(pos==UBRK_DONE);
365 
366     status.reset();
367     UnicodeString sEmpty;
368     LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
369     wordIter1->getUText(gut2.getAlias(), status);
370     TEST_ASSERT_SUCCESS(status);
371     status.reset();
372 }
373 
374 
TestIteration()375 void RBBIAPITest::TestIteration()
376 {
377     // This test just verifies that the API is present.
378     // Testing for correct operation of the break rules happens elsewhere.
379 
380     UErrorCode status=U_ZERO_ERROR;
381     RuleBasedBreakIterator* bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
382     if (U_FAILURE(status) || bi == NULL)  {
383         errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
384     }
385     delete bi;
386 
387     status=U_ZERO_ERROR;
388     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
389     if (U_FAILURE(status) || bi == NULL)  {
390         errcheckln(status, "Failure creating Word break iterator.  Status = %s", u_errorName(status));
391     }
392     delete bi;
393 
394     status=U_ZERO_ERROR;
395     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
396     if (U_FAILURE(status) || bi == NULL)  {
397         errcheckln(status, "Failure creating Line break iterator.  Status = %s", u_errorName(status));
398     }
399     delete bi;
400 
401     status=U_ZERO_ERROR;
402     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
403     if (U_FAILURE(status) || bi == NULL)  {
404         errcheckln(status, "Failure creating Sentence break iterator.  Status = %s", u_errorName(status));
405     }
406     delete bi;
407 
408     status=U_ZERO_ERROR;
409     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
410     if (U_FAILURE(status) || bi == NULL)  {
411         errcheckln(status, "Failure creating Title break iterator.  Status = %s", u_errorName(status));
412     }
413     delete bi;
414 
415     status=U_ZERO_ERROR;
416     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
417     if (U_FAILURE(status) || bi == NULL)  {
418         errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
419         return;   // Skip the rest of these tests.
420     }
421 
422 
423     UnicodeString testString="0123456789";
424     bi->setText(testString);
425 
426     int32_t i;
427     i = bi->first();
428     if (i != 0) {
429         errln("Incorrect value from bi->first().  Expected 0, got %d.", i);
430     }
431 
432     i = bi->last();
433     if (i != 10) {
434         errln("Incorrect value from bi->last().  Expected 10, got %d", i);
435     }
436 
437     //
438     // Previous
439     //
440     bi->last();
441     i = bi->previous();
442     if (i != 9) {
443         errln("Incorrect value from bi->last() at line %d.  Expected 9, got %d", __LINE__, i);
444     }
445 
446 
447     bi->first();
448     i = bi->previous();
449     if (i != BreakIterator::DONE) {
450         errln("Incorrect value from bi->previous() at line %d.  Expected DONE, got %d", __LINE__, i);
451     }
452 
453     //
454     // next()
455     //
456     bi->first();
457     i = bi->next();
458     if (i != 1) {
459         errln("Incorrect value from bi->next() at line %d.  Expected 1, got %d", __LINE__, i);
460     }
461 
462     bi->last();
463     i = bi->next();
464     if (i != BreakIterator::DONE) {
465         errln("Incorrect value from bi->next() at line %d.  Expected DONE, got %d", __LINE__, i);
466     }
467 
468 
469     //
470     //  current()
471     //
472     bi->first();
473     i = bi->current();
474     if (i != 0) {
475         errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
476     }
477 
478     bi->next();
479     i = bi->current();
480     if (i != 1) {
481         errln("Incorrect value from bi->previous() at line %d.  Expected 1, got %d", __LINE__, i);
482     }
483 
484     bi->last();
485     bi->next();
486     i = bi->current();
487     if (i != 10) {
488         errln("Incorrect value from bi->previous() at line %d.  Expected 10, got %d", __LINE__, i);
489     }
490 
491     bi->first();
492     bi->previous();
493     i = bi->current();
494     if (i != 0) {
495         errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
496     }
497 
498 
499     //
500     // Following()
501     //
502     i = bi->following(4);
503     if (i != 5) {
504         errln("Incorrect value from bi->following() at line %d.  Expected 5, got %d", __LINE__, i);
505     }
506 
507     i = bi->following(9);
508     if (i != 10) {
509         errln("Incorrect value from bi->following() at line %d.  Expected 10, got %d", __LINE__, i);
510     }
511 
512     i = bi->following(10);
513     if (i != BreakIterator::DONE) {
514         errln("Incorrect value from bi->following() at line %d.  Expected DONE, got %d", __LINE__, i);
515     }
516 
517 
518     //
519     // Preceding
520     //
521     i = bi->preceding(4);
522     if (i != 3) {
523         errln("Incorrect value from bi->preceding() at line %d.  Expected 3, got %d", __LINE__, i);
524     }
525 
526     i = bi->preceding(10);
527     if (i != 9) {
528         errln("Incorrect value from bi->preceding() at line %d.  Expected 9, got %d", __LINE__, i);
529     }
530 
531     i = bi->preceding(1);
532     if (i != 0) {
533         errln("Incorrect value from bi->preceding() at line %d.  Expected 0, got %d", __LINE__, i);
534     }
535 
536     i = bi->preceding(0);
537     if (i != BreakIterator::DONE) {
538         errln("Incorrect value from bi->preceding() at line %d.  Expected DONE, got %d", __LINE__, i);
539     }
540 
541 
542     //
543     // isBoundary()
544     //
545     bi->first();
546     if (bi->isBoundary(3) != TRUE) {
547         errln("Incorrect value from bi->isBoudary() at line %d.  Expected TRUE, got FALSE", __LINE__, i);
548     }
549     i = bi->current();
550     if (i != 3) {
551         errln("Incorrect value from bi->current() at line %d.  Expected 3, got %d", __LINE__, i);
552     }
553 
554 
555     if (bi->isBoundary(11) != FALSE) {
556         errln("Incorrect value from bi->isBoudary() at line %d.  Expected FALSE, got TRUE", __LINE__, i);
557     }
558     i = bi->current();
559     if (i != 10) {
560         errln("Incorrect value from bi->current() at line %d.  Expected 10, got %d", __LINE__, i);
561     }
562 
563     //
564     // next(n)
565     //
566     bi->first();
567     i = bi->next(4);
568     if (i != 4) {
569         errln("Incorrect value from bi->next() at line %d.  Expected 4, got %d", __LINE__, i);
570     }
571 
572     i = bi->next(6);
573     if (i != 10) {
574         errln("Incorrect value from bi->next() at line %d.  Expected 10, got %d", __LINE__, i);
575     }
576 
577     bi->first();
578     i = bi->next(11);
579     if (i != BreakIterator::DONE) {
580         errln("Incorrect value from bi->next() at line %d.  Expected BreakIterator::DONE, got %d", __LINE__, i);
581     }
582 
583     delete bi;
584 
585 }
586 
587 
588 
589 
590 
591 
TestBuilder()592 void RBBIAPITest::TestBuilder() {
593      UnicodeString rulesString1 = "$Letters = [:L:];\n"
594                                   "$Numbers = [:N:];\n"
595                                   "$Letters+;\n"
596                                   "$Numbers+;\n"
597                                   "[^$Letters $Numbers];\n"
598                                   "!.*;\n";
599      UnicodeString testString1  = "abc123..abc";
600                                 // 01234567890
601      int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
602      UErrorCode status=U_ZERO_ERROR;
603      UParseError    parseError;
604 
605      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
606      if(U_FAILURE(status)) {
607          dataerrln("Fail : in construction - %s", u_errorName(status));
608      } else {
609          bi->setText(testString1);
610          doBoundaryTest(*bi, testString1, bounds1);
611      }
612      delete bi;
613 }
614 
615 
616 //
617 //  TestQuoteGrouping
618 //       Single quotes within rules imply a grouping, so that a modifier
619 //       following the quoted text (* or +) applies to all of the quoted chars.
620 //
TestQuoteGrouping()621 void RBBIAPITest::TestQuoteGrouping() {
622      UnicodeString rulesString1 = "#Here comes the rule...\n"
623                                   "'$@!'*;\n"   //  (\$\@\!)*
624                                   ".;\n";
625 
626      UnicodeString testString1  = "$@!$@!X$@!!X";
627                                 // 0123456789012
628      int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
629      UErrorCode status=U_ZERO_ERROR;
630      UParseError    parseError;
631 
632      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
633      if(U_FAILURE(status)) {
634          dataerrln("Fail : in construction - %s", u_errorName(status));
635      } else {
636          bi->setText(testString1);
637          doBoundaryTest(*bi, testString1, bounds1);
638      }
639      delete bi;
640 }
641 
642 //
643 //  TestRuleStatus
644 //      Test word break rule status constants.
645 //
TestRuleStatus()646 void RBBIAPITest::TestRuleStatus() {
647      UChar str[30];
648      //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
649      // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
650      u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
651               // 012345678901234567  8      9    0
652               //                     Katakana
653                 str, 30);
654      UnicodeString testString1(str);
655      int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
656      int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
657                           UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
658                           UBRK_WORD_IDEO,     UBRK_WORD_NONE};
659 
660      int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
661                           UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
662                           UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
663 
664      UErrorCode status=U_ZERO_ERROR;
665 
666      BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
667      if(U_FAILURE(status)) {
668          errcheckln(status, "Fail : in construction - %s", u_errorName(status));
669      } else {
670          bi->setText(testString1);
671          // First test that the breaks are in the right spots.
672          doBoundaryTest(*bi, testString1, bounds1);
673 
674          // Then go back and check tag values
675          int32_t i = 0;
676          int32_t pos, tag;
677          for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
678              if (pos != bounds1[i]) {
679                  errln("FAIL: unexpected word break at postion %d", pos);
680                  break;
681              }
682              tag = bi->getRuleStatus();
683              if (tag < tag_lo[i] || tag >= tag_hi[i]) {
684                  errln("FAIL: incorrect tag value %d at position %d", tag, pos);
685                  break;
686              }
687 
688              // Check that we get the same tag values from getRuleStatusVec()
689              int32_t vec[10];
690              int t = bi->getRuleStatusVec(vec, 10, status);
691              TEST_ASSERT_SUCCESS(status);
692              TEST_ASSERT(t==1);
693              TEST_ASSERT(vec[0] == tag);
694          }
695      }
696      delete bi;
697 
698      // Now test line break status.  This test mostly is to confirm that the status constants
699      //                              are correctly declared in the header.
700      testString1 =   "test line. \n";
701      // break type    s    s     h
702 
703      bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
704      if(U_FAILURE(status)) {
705          errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status));
706      } else {
707          int32_t i = 0;
708          int32_t pos, tag;
709          UBool   success;
710 
711          bi->setText(testString1);
712          pos = bi->current();
713          tag = bi->getRuleStatus();
714          for (i=0; i<3; i++) {
715              switch (i) {
716              case 0:
717                  success = pos==0  && tag==UBRK_LINE_SOFT; break;
718              case 1:
719                  success = pos==5  && tag==UBRK_LINE_SOFT; break;
720              case 2:
721                  success = pos==12 && tag==UBRK_LINE_HARD; break;
722              default:
723                  success = FALSE; break;
724              }
725              if (success == FALSE) {
726                  errln("Fail: incorrect word break status or position.  i=%d, pos=%d, tag=%d",
727                      i, pos, tag);
728                  break;
729              }
730              pos = bi->next();
731              tag = bi->getRuleStatus();
732          }
733          if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
734              UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
735              (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
736              errln("UBRK_LINE_* constants from header are inconsistent.");
737          }
738      }
739      delete bi;
740 
741 }
742 
743 
744 //
745 //  TestRuleStatusVec
746 //      Test the vector form of  break rule status.
747 //
TestRuleStatusVec()748 void RBBIAPITest::TestRuleStatusVec() {
749     UnicodeString rulesString(   "[A-N]{100}; \n"
750                                  "[a-w]{200}; \n"
751                                  "[\\p{L}]{300}; \n"
752                                  "[\\p{N}]{400}; \n"
753                                  "[0-5]{500}; \n"
754                                   "!.*;\n", -1, US_INV);
755      UnicodeString testString1  = "Aapz5?";
756      int32_t  statusVals[10];
757      int32_t  numStatuses;
758      int32_t  pos;
759 
760      UErrorCode status=U_ZERO_ERROR;
761      UParseError    parseError;
762 
763      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
764      if (U_FAILURE(status)) {
765          dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
766      } else {
767          bi->setText(testString1);
768 
769          // A
770          pos = bi->next();
771          TEST_ASSERT(pos==1);
772          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
773          TEST_ASSERT_SUCCESS(status);
774          TEST_ASSERT(numStatuses == 2);
775          TEST_ASSERT(statusVals[0] == 100);
776          TEST_ASSERT(statusVals[1] == 300);
777 
778          // a
779          pos = bi->next();
780          TEST_ASSERT(pos==2);
781          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
782          TEST_ASSERT_SUCCESS(status);
783          TEST_ASSERT(numStatuses == 2);
784          TEST_ASSERT(statusVals[0] == 200);
785          TEST_ASSERT(statusVals[1] == 300);
786 
787          // p
788          pos = bi->next();
789          TEST_ASSERT(pos==3);
790          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
791          TEST_ASSERT_SUCCESS(status);
792          TEST_ASSERT(numStatuses == 2);
793          TEST_ASSERT(statusVals[0] == 200);
794          TEST_ASSERT(statusVals[1] == 300);
795 
796          // z
797          pos = bi->next();
798          TEST_ASSERT(pos==4);
799          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
800          TEST_ASSERT_SUCCESS(status);
801          TEST_ASSERT(numStatuses == 1);
802          TEST_ASSERT(statusVals[0] == 300);
803 
804          // 5
805          pos = bi->next();
806          TEST_ASSERT(pos==5);
807          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
808          TEST_ASSERT_SUCCESS(status);
809          TEST_ASSERT(numStatuses == 2);
810          TEST_ASSERT(statusVals[0] == 400);
811          TEST_ASSERT(statusVals[1] == 500);
812 
813          // ?
814          pos = bi->next();
815          TEST_ASSERT(pos==6);
816          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
817          TEST_ASSERT_SUCCESS(status);
818          TEST_ASSERT(numStatuses == 1);
819          TEST_ASSERT(statusVals[0] == 0);
820 
821          //
822          //  Check buffer overflow error handling.   Char == A
823          //
824          bi->first();
825          pos = bi->next();
826          TEST_ASSERT(pos==1);
827          memset(statusVals, -1, sizeof(statusVals));
828          numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
829          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
830          TEST_ASSERT(numStatuses == 2);
831          TEST_ASSERT(statusVals[0] == -1);
832 
833          status = U_ZERO_ERROR;
834          memset(statusVals, -1, sizeof(statusVals));
835          numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
836          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
837          TEST_ASSERT(numStatuses == 2);
838          TEST_ASSERT(statusVals[0] == 100);
839          TEST_ASSERT(statusVals[1] == -1);
840 
841          status = U_ZERO_ERROR;
842          memset(statusVals, -1, sizeof(statusVals));
843          numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
844          TEST_ASSERT_SUCCESS(status);
845          TEST_ASSERT(numStatuses == 2);
846          TEST_ASSERT(statusVals[0] == 100);
847          TEST_ASSERT(statusVals[1] == 300);
848          TEST_ASSERT(statusVals[2] == -1);
849      }
850      delete bi;
851 
852 }
853 
854 //
855 //   Bug 2190 Regression test.   Builder crash on rule consisting of only a
856 //                               $variable reference
TestBug2190()857 void RBBIAPITest::TestBug2190() {
858      UnicodeString rulesString1 = "$aaa = abcd;\n"
859                                   "$bbb = $aaa;\n"
860                                   "$bbb;\n";
861      UnicodeString testString1  = "abcdabcd";
862                                 // 01234567890
863      int32_t bounds1[] = {0, 4, 8};
864      UErrorCode status=U_ZERO_ERROR;
865      UParseError    parseError;
866 
867      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
868      if(U_FAILURE(status)) {
869          dataerrln("Fail : in construction - %s", u_errorName(status));
870      } else {
871          bi->setText(testString1);
872          doBoundaryTest(*bi, testString1, bounds1);
873      }
874      delete bi;
875 }
876 
877 
TestRegistration()878 void RBBIAPITest::TestRegistration() {
879 #if !UCONFIG_NO_SERVICE
880     UErrorCode status = U_ZERO_ERROR;
881     BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
882     // ok to not delete these if we exit because of error?
883     BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
884     BreakIterator* root_word = BreakIterator::createWordInstance("", status);
885     BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
886 
887     if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
888         dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
889 
890         delete ja_word;
891         delete ja_char;
892         delete root_word;
893         delete root_char;
894 
895         return;
896     }
897 
898     URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
899     {
900 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
901         if (ja_word && *ja_word == *root_word) {
902             errln("japan not different from root");
903         }
904 #endif
905     }
906 
907     {
908         BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
909         UBool fail = TRUE;
910         if(result){
911             fail = *result != *ja_word;
912         }
913         delete result;
914         if (fail) {
915             errln("bad result for xx_XX/word");
916         }
917     }
918 
919     {
920         BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
921         UBool fail = TRUE;
922         if(result){
923             fail = *result != *ja_char;
924         }
925         delete result;
926         if (fail) {
927             errln("bad result for ja_JP/char");
928         }
929     }
930 
931     {
932         BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
933         UBool fail = TRUE;
934         if(result){
935             fail = *result != *root_char;
936         }
937         delete result;
938         if (fail) {
939             errln("bad result for xx_XX/char");
940         }
941     }
942 
943     {
944         StringEnumeration* avail = BreakIterator::getAvailableLocales();
945         UBool found = FALSE;
946         const UnicodeString* p;
947         while ((p = avail->snext(status))) {
948             if (p->compare("xx") == 0) {
949                 found = TRUE;
950                 break;
951             }
952         }
953         delete avail;
954         if (!found) {
955             errln("did not find test locale");
956         }
957     }
958 
959     {
960         UBool unreg = BreakIterator::unregister(key, status);
961         if (!unreg) {
962             errln("unable to unregister");
963         }
964     }
965 
966     {
967         BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
968         BreakIterator* root = BreakIterator::createWordInstance("", status);
969         UBool fail = TRUE;
970         if(root){
971           fail = *root != *result;
972         }
973         delete root;
974         delete result;
975         if (fail) {
976             errln("did not get root break");
977         }
978     }
979 
980     {
981         StringEnumeration* avail = BreakIterator::getAvailableLocales();
982         UBool found = FALSE;
983         const UnicodeString* p;
984         while ((p = avail->snext(status))) {
985             if (p->compare("xx") == 0) {
986                 found = TRUE;
987                 break;
988             }
989         }
990         delete avail;
991         if (found) {
992             errln("found test locale");
993         }
994     }
995 
996     {
997         int32_t count;
998         UBool   foundLocale = FALSE;
999         const Locale *avail = BreakIterator::getAvailableLocales(count);
1000         for (int i=0; i<count; i++) {
1001             if (avail[i] == Locale::getEnglish()) {
1002                 foundLocale = TRUE;
1003                 break;
1004             }
1005         }
1006         if (foundLocale == FALSE) {
1007             errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1008         }
1009     }
1010 
1011 
1012     // ja_word was adopted by factory
1013     delete ja_char;
1014     delete root_word;
1015     delete root_char;
1016 #endif
1017 }
1018 
RoundtripRule(const char * dataFile)1019 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1020     UErrorCode status = U_ZERO_ERROR;
1021     UParseError parseError;
1022     parseError.line = 0;
1023     parseError.offset = 0;
1024     LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1025     uint32_t length;
1026     const UChar *builtSource;
1027     const uint8_t *rbbiRules;
1028     const uint8_t *builtRules;
1029 
1030     if (U_FAILURE(status)) {
1031         errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status));
1032         return;
1033     }
1034 
1035     builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1036     builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1037     RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1038     if (U_FAILURE(status)) {
1039         errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
1040                 __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset);
1041         errln(UnicodeString(builtSource));
1042         return;
1043     };
1044     rbbiRules = brkItr->getBinaryRules(length);
1045     logln("Comparing \"%s\" len=%d", dataFile, length);
1046     if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1047         errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile);
1048         return;
1049     }
1050     delete brkItr;
1051 }
1052 
TestRoundtripRules()1053 void RBBIAPITest::TestRoundtripRules() {
1054     RoundtripRule("word");
1055     RoundtripRule("title");
1056     RoundtripRule("sent");
1057     RoundtripRule("line");
1058     RoundtripRule("char");
1059     if (!quick) {
1060         RoundtripRule("word_POSIX");
1061     }
1062 }
1063 
1064 
1065 // Check getBinaryRules() and construction of a break iterator from those rules.
1066 
TestGetBinaryRules()1067 void RBBIAPITest::TestGetBinaryRules() {
1068     UErrorCode status=U_ZERO_ERROR;
1069     LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
1070     TEST_ASSERT_SUCCESS(status);
1071     RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
1072     TEST_ASSERT(rbbi != NULL);
1073 
1074     // Check that the new line break iterator is nominally functional.
1075     UnicodeString helloWorld("Hello, World!");
1076     rbbi->setText(helloWorld);
1077     int n = 0;
1078     while (bi->next() != UBRK_DONE) {
1079         ++n;
1080     }
1081     TEST_ASSERT(n == 2);
1082 
1083     // Extract the binary rules as a uint8_t blob.
1084     uint32_t ruleLength;
1085     const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
1086     TEST_ASSERT(ruleLength > 0);
1087     TEST_ASSERT(binRules != NULL);
1088 
1089     // Clone the binary rules, and create a break iterator from that.
1090     // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
1091     uint8_t *clonedRules = new uint8_t[ruleLength];
1092     memcpy(clonedRules, binRules, ruleLength);
1093     RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
1094     TEST_ASSERT_SUCCESS(status);
1095 
1096     // Check that the cloned line break iterator is nominally alive.
1097     clonedBI.setText(helloWorld);
1098     n = 0;
1099     while (clonedBI.next() != UBRK_DONE) {
1100         ++n;
1101     }
1102     TEST_ASSERT(n == 2);
1103 
1104     delete[] clonedRules;
1105 }
1106 
1107 
TestRefreshInputText()1108 void RBBIAPITest::TestRefreshInputText() {
1109     /*
1110      *  RefreshInput changes out the input of a Break Iterator without
1111      *    changing anything else in the iterator's state.  Used with Java JNI,
1112      *    when Java moves the underlying string storage.   This test
1113      *    runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1114      *    The right set of boundaries should still be found.
1115      */
1116     UChar testStr[]  = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0};  /* = " A B C D"  */
1117     UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,  0};
1118     UErrorCode status = U_ZERO_ERROR;
1119     UText ut1 = UTEXT_INITIALIZER;
1120     UText ut2 = UTEXT_INITIALIZER;
1121     RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
1122     TEST_ASSERT_SUCCESS(status);
1123 
1124     utext_openUChars(&ut1, testStr, -1, &status);
1125     TEST_ASSERT_SUCCESS(status);
1126 
1127     if (U_SUCCESS(status)) {
1128         bi->setText(&ut1, status);
1129         TEST_ASSERT_SUCCESS(status);
1130 
1131         /* Line boundaries will occur before each letter in the original string */
1132         TEST_ASSERT(1 == bi->next());
1133         TEST_ASSERT(3 == bi->next());
1134 
1135         /* Move the string, kill the original string.  */
1136         u_strcpy(movedStr, testStr);
1137         u_memset(testStr, 0x20, u_strlen(testStr));
1138         utext_openUChars(&ut2, movedStr, -1, &status);
1139         TEST_ASSERT_SUCCESS(status);
1140         RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1141         TEST_ASSERT_SUCCESS(status);
1142         TEST_ASSERT(bi == returnedBI);
1143 
1144         /* Find the following matches, now working in the moved string. */
1145         TEST_ASSERT(5 == bi->next());
1146         TEST_ASSERT(7 == bi->next());
1147         TEST_ASSERT(8 == bi->next());
1148         TEST_ASSERT(UBRK_DONE == bi->next());
1149 
1150         utext_close(&ut1);
1151         utext_close(&ut2);
1152     }
1153     delete bi;
1154 
1155 }
1156 
1157 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
prtbrks(BreakIterator * brk,const UnicodeString & ustr,IntlTest & it)1158 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1159   static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
1160   it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1161 
1162   int32_t *pos = new int32_t[ustr.length()];
1163   int32_t posCount = 0;
1164 
1165   // calculate breaks up front, so we can print out
1166   // sans any debugging
1167   for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1168     pos[posCount++] = n;
1169     if(posCount>=ustr.length()) {
1170       it.errln("brk count exceeds string length!");
1171       return;
1172     }
1173   }
1174   UnicodeString out;
1175   out.append((UChar)CHSTR);
1176   int32_t prev = 0;
1177   for(int32_t i=0;i<posCount;i++) {
1178     int32_t n=pos[i];
1179     out.append(ustr.tempSubString(prev,n-prev));
1180     out.append((UChar)PILCROW);
1181     prev=n;
1182   }
1183   out.append(ustr.tempSubString(prev,ustr.length()-prev));
1184   out.append((UChar)CHEND);
1185   it.logln(out);
1186 
1187   out.remove();
1188   for(int32_t i=0;i<posCount;i++) {
1189     char tmp[100];
1190     sprintf(tmp,"%d ",pos[i]);
1191     out.append(UnicodeString(tmp));
1192   }
1193   it.logln(out);
1194   delete [] pos;
1195 }
1196 #endif
1197 
TestFilteredBreakIteratorBuilder()1198 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1199 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1200   UErrorCode status = U_ZERO_ERROR;
1201   LocalPointer<FilteredBreakIteratorBuilder> builder;
1202   LocalPointer<BreakIterator> baseBI;
1203   LocalPointer<BreakIterator> filteredBI;
1204   LocalPointer<BreakIterator> frenchBI;
1205 
1206   const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1207   const UnicodeString ABBR_MR("Mr.");
1208   const UnicodeString ABBR_CAPT("Capt.");
1209 
1210   {
1211     logln("Constructing empty builder\n");
1212     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1213     TEST_ASSERT_SUCCESS(status);
1214 
1215     logln("Constructing base BI\n");
1216     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1217     TEST_ASSERT_SUCCESS(status);
1218 
1219 	logln("Building new BI\n");
1220     filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1221     TEST_ASSERT_SUCCESS(status);
1222 
1223 	if (U_SUCCESS(status)) {
1224         logln("Testing:");
1225         filteredBI->setText(text);
1226         TEST_ASSERT(20 == filteredBI->next()); // Mr.
1227         TEST_ASSERT(84 == filteredBI->next()); // recovered.
1228         TEST_ASSERT(90 == filteredBI->next()); // Capt.
1229         TEST_ASSERT(181 == filteredBI->next()); // Mr.
1230         TEST_ASSERT(278 == filteredBI->next()); // charge.
1231         filteredBI->first();
1232         prtbrks(filteredBI.getAlias(), text, *this);
1233     }
1234   }
1235 
1236   {
1237     logln("Constructing empty builder\n");
1238     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1239     TEST_ASSERT_SUCCESS(status);
1240 
1241     if (U_SUCCESS(status)) {
1242         logln("Adding Mr. as an exception\n");
1243         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1244         TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
1245         TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
1246         TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1247         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1248         TEST_ASSERT_SUCCESS(status);
1249 
1250         logln("Constructing base BI\n");
1251         baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1252         TEST_ASSERT_SUCCESS(status);
1253 
1254         logln("Building new BI\n");
1255         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1256         TEST_ASSERT_SUCCESS(status);
1257 
1258         logln("Testing:");
1259         filteredBI->setText(text);
1260         TEST_ASSERT(84 == filteredBI->next());
1261         TEST_ASSERT(90 == filteredBI->next());// Capt.
1262         TEST_ASSERT(278 == filteredBI->next());
1263         filteredBI->first();
1264         prtbrks(filteredBI.getAlias(), text, *this);
1265     }
1266   }
1267 
1268 
1269   {
1270     logln("Constructing empty builder\n");
1271     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1272     TEST_ASSERT_SUCCESS(status);
1273 
1274     if (U_SUCCESS(status)) {
1275         logln("Adding Mr. and Capt as an exception\n");
1276         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1277         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
1278         TEST_ASSERT_SUCCESS(status);
1279 
1280         logln("Constructing base BI\n");
1281         baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1282         TEST_ASSERT_SUCCESS(status);
1283 
1284         logln("Building new BI\n");
1285         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1286         TEST_ASSERT_SUCCESS(status);
1287 
1288         logln("Testing:");
1289         filteredBI->setText(text);
1290         TEST_ASSERT(84 == filteredBI->next());
1291         TEST_ASSERT(278 == filteredBI->next());
1292         filteredBI->first();
1293         prtbrks(filteredBI.getAlias(), text, *this);
1294     }
1295   }
1296 
1297 
1298   {
1299     logln("Constructing English builder\n");
1300     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1301     TEST_ASSERT_SUCCESS(status);
1302 
1303     logln("Constructing base BI\n");
1304     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1305     TEST_ASSERT_SUCCESS(status);
1306 
1307     if (U_SUCCESS(status)) {
1308         logln("unsuppressing 'Capt'");
1309         TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
1310 
1311         logln("Building new BI\n");
1312         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1313         TEST_ASSERT_SUCCESS(status);
1314 
1315         if(filteredBI.isValid()) {
1316           logln("Testing:");
1317           filteredBI->setText(text);
1318           TEST_ASSERT(84 == filteredBI->next());
1319           TEST_ASSERT(90 == filteredBI->next());
1320           TEST_ASSERT(278 == filteredBI->next());
1321           filteredBI->first();
1322           prtbrks(filteredBI.getAlias(), text, *this);
1323         }
1324     }
1325   }
1326 
1327 
1328   {
1329     logln("Constructing English builder\n");
1330     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1331     TEST_ASSERT_SUCCESS(status);
1332 
1333     logln("Constructing base BI\n");
1334     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1335     TEST_ASSERT_SUCCESS(status);
1336 
1337     if (U_SUCCESS(status)) {
1338         logln("Building new BI\n");
1339         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1340         TEST_ASSERT_SUCCESS(status);
1341 
1342         if(filteredBI.isValid()) {
1343           logln("Testing:");
1344           filteredBI->setText(text);
1345           TEST_ASSERT(84 == filteredBI->next());
1346           TEST_ASSERT(278 == filteredBI->next());
1347           filteredBI->first();
1348           prtbrks(filteredBI.getAlias(), text, *this);
1349         }
1350     }
1351   }
1352 
1353   // reenable once french is in
1354   {
1355     logln("Constructing French builder");
1356     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
1357     TEST_ASSERT_SUCCESS(status);
1358 
1359     logln("Constructing base BI\n");
1360     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
1361     TEST_ASSERT_SUCCESS(status);
1362 
1363     if (U_SUCCESS(status)) {
1364         logln("Building new BI\n");
1365         frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
1366         TEST_ASSERT_SUCCESS(status);
1367     }
1368 
1369     if(frenchBI.isValid()) {
1370       logln("Testing:");
1371       UnicodeString frText("C'est MM. Duval.");
1372       frenchBI->setText(frText);
1373       TEST_ASSERT(16 == frenchBI->next());
1374       TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
1375       frenchBI->first();
1376       prtbrks(frenchBI.getAlias(), frText, *this);
1377       logln("Testing against English:");
1378       filteredBI->setText(frText);
1379       TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
1380       TEST_ASSERT(16 == filteredBI->next());
1381       TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
1382       filteredBI->first();
1383       prtbrks(filteredBI.getAlias(), frText, *this);
1384 
1385       // Verify ==
1386       TEST_ASSERT_TRUE(*frenchBI   == *frenchBI);
1387       TEST_ASSERT_TRUE(*filteredBI != *frenchBI);
1388       TEST_ASSERT_TRUE(*frenchBI   != *filteredBI);
1389     } else {
1390       dataerrln("French BI: not valid.");
1391 	}
1392   }
1393 
1394 #else
1395   logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1396 #endif
1397 }
1398 
1399 //---------------------------------------------
1400 // runIndexedTest
1401 //---------------------------------------------
1402 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1403 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1404 {
1405     if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1406     TESTCASE_AUTO_BEGIN;
1407 #if !UCONFIG_NO_FILE_IO
1408     TESTCASE_AUTO(TestCloneEquals);
1409     TESTCASE_AUTO(TestgetRules);
1410     TESTCASE_AUTO(TestHashCode);
1411     TESTCASE_AUTO(TestGetSetAdoptText);
1412     TESTCASE_AUTO(TestIteration);
1413 #endif
1414     TESTCASE_AUTO(TestBuilder);
1415     TESTCASE_AUTO(TestQuoteGrouping);
1416     TESTCASE_AUTO(TestRuleStatusVec);
1417     TESTCASE_AUTO(TestBug2190);
1418 #if !UCONFIG_NO_FILE_IO
1419     TESTCASE_AUTO(TestRegistration);
1420     TESTCASE_AUTO(TestBoilerPlate);
1421     TESTCASE_AUTO(TestRuleStatus);
1422     TESTCASE_AUTO(TestRoundtripRules);
1423     TESTCASE_AUTO(TestGetBinaryRules);
1424 #endif
1425     TESTCASE_AUTO(TestRefreshInputText);
1426 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
1427     TESTCASE_AUTO(TestFilteredBreakIteratorBuilder);
1428 #endif
1429     TESTCASE_AUTO_END;
1430 }
1431 
1432 
1433 //---------------------------------------------
1434 //Internal subroutines
1435 //---------------------------------------------
1436 
doBoundaryTest(BreakIterator & bi,UnicodeString & text,int32_t * boundaries)1437 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1438      logln((UnicodeString)"testIsBoundary():");
1439         int32_t p = 0;
1440         UBool isB;
1441         for (int32_t i = 0; i < text.length(); i++) {
1442             isB = bi.isBoundary(i);
1443             logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1444 
1445             if (i == boundaries[p]) {
1446                 if (!isB)
1447                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1448                 p++;
1449             }
1450             else {
1451                 if (isB)
1452                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1453             }
1454         }
1455 }
doTest(UnicodeString & testString,int32_t start,int32_t gotoffset,int32_t expectedOffset,const char * expectedString)1456 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1457     UnicodeString selected;
1458     UnicodeString expected=CharsToUnicodeString(expectedString);
1459 
1460     if(gotoffset != expectedOffset)
1461          errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1462     if(start <= gotoffset){
1463         testString.extractBetween(start, gotoffset, selected);
1464     }
1465     else{
1466         testString.extractBetween(gotoffset, start, selected);
1467     }
1468     if(selected.compare(expected) != 0)
1469          errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1470     else
1471         logln(prettify("****selected \"" + selected + "\""));
1472 }
1473 
1474 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1475