• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * Copyright (c) 1999-2016, International Business Machines
5  * Corporation and others. All Rights Reserved.
6  ********************************************************************
7  *   Date        Name        Description
8  *   12/14/99    Madhu        Creation.
9  *   01/12/2000  Madhu        updated for changed API
10  ********************************************************************/
11 
12 #include "unicode/utypes.h"
13 
14 #if !UCONFIG_NO_BREAK_ITERATION
15 
16 #include "unicode/uchar.h"
17 #include "intltest.h"
18 #include "unicode/rbbi.h"
19 #include "unicode/schriter.h"
20 #include "rbbiapts.h"
21 #include "rbbidata.h"
22 #include "cstring.h"
23 #include "ubrkimpl.h"
24 #include "unicode/locid.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utext.h"
27 #include "cmemory.h"
28 #if !UCONFIG_NO_BREAK_ITERATION
29 #include "unicode/filteredbrk.h"
30 #include <stdio.h> // for sprintf
31 #endif
32 /**
33  * API Test the RuleBasedBreakIterator class
34  */
35 
36 
37 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
38 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
39 
40 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
41     errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
42 
TestCloneEquals()43 void RBBIAPITest::TestCloneEquals()
44 {
45 
46     UErrorCode status=U_ZERO_ERROR;
47     RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
48     RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
49     RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
50     RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
51     if(U_FAILURE(status)){
52         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
53         return;
54     }
55 
56 
57     UnicodeString testString="Testing word break iterators's clone() and equals()";
58     bi1->setText(testString);
59     bi2->setText(testString);
60     biequal->setText(testString);
61 
62     bi3->setText("hello");
63 
64     logln((UnicodeString)"Testing equals()");
65 
66     logln((UnicodeString)"Testing == and !=");
67     UBool b = (*bi1 != *biequal);
68     b |= *bi1 == *bi2;
69     b |= *bi1 == *bi3;
70     if (b) {
71         errln("%s:%d ERROR:1 RBBI's == and != operator failed.", __FILE__, __LINE__);
72     }
73 
74     if(*bi2 == *biequal || *bi2 == *bi1  || *biequal == *bi3)
75         errln("%s:%d ERROR:2 RBBI's == and != operator  failed.", __FILE__, __LINE__);
76 
77 
78     // Quick test of RulesBasedBreakIterator assignment -
79     // Check that
80     //    two different iterators are !=
81     //    they are == after assignment
82     //    source and dest iterator produce the same next() after assignment.
83     //    deleting one doesn't disable the other.
84     logln("Testing assignment");
85     RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
86     if(U_FAILURE(status)){
87         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
88         return;
89     }
90 
91     RuleBasedBreakIterator biDefault, biDefault2;
92     if(U_FAILURE(status)){
93         errln("%s:%d FAIL : in construction of default iterator", __FILE__, __LINE__);
94         return;
95     }
96     if (biDefault == *bix) {
97         errln("%s:%d ERROR: iterators should not compare ==", __FILE__, __LINE__);
98         return;
99     }
100     if (biDefault != biDefault2) {
101         errln("%s:%d ERROR: iterators should compare ==", __FILE__, __LINE__);
102         return;
103     }
104 
105 
106     UnicodeString   HelloString("Hello Kitty");
107     bix->setText(HelloString);
108     if (*bix == *bi2) {
109         errln("%s:%d ERROR: strings should not be equal before assignment.", __FILE__, __LINE__);
110     }
111     *bix = *bi2;
112     if (*bix != *bi2) {
113         errln("%s:%d ERROR: strings should be equal before assignment.", __FILE__, __LINE__);
114     }
115 
116     int bixnext = bix->next();
117     int bi2next = bi2->next();
118     if (! (bixnext == bi2next && bixnext == 7)) {
119         errln("%s:%d ERROR: iterators behaved differently after assignment.", __FILE__, __LINE__);
120     }
121     delete bix;
122     if (bi2->next() != 8) {
123         errln("%s:%d ERROR: iterator.next() failed after deleting copy.", __FILE__, __LINE__);
124     }
125 
126 
127 
128     logln((UnicodeString)"Testing clone()");
129     RuleBasedBreakIterator* bi1clone = dynamic_cast<RuleBasedBreakIterator *>(bi1->clone());
130     RuleBasedBreakIterator* bi2clone = dynamic_cast<RuleBasedBreakIterator *>(bi2->clone());
131 
132     if(*bi1clone != *bi1 || *bi1clone  != *biequal  ||
133       *bi1clone == *bi3 || *bi1clone == *bi2)
134         errln("%s:%d ERROR:1 RBBI's clone() method failed", __FILE__, __LINE__);
135 
136     if(*bi2clone == *bi1 || *bi2clone == *biequal ||
137        *bi2clone == *bi3 || *bi2clone != *bi2)
138         errln("%s:%d ERROR:2 RBBI's clone() method failed", __FILE__, __LINE__);
139 
140     if(bi1->getText() != bi1clone->getText()   ||
141        bi2clone->getText() != bi2->getText()   ||
142        *bi2clone == *bi1clone )
143         errln("%s:%d ERROR: RBBI's clone() method failed", __FILE__, __LINE__);
144 
145     delete bi1clone;
146     delete bi2clone;
147     delete bi1;
148     delete bi3;
149     delete bi2;
150     delete biequal;
151 }
152 
TestBoilerPlate()153 void RBBIAPITest::TestBoilerPlate()
154 {
155     UErrorCode status = U_ZERO_ERROR;
156     BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
157     BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
158     if (U_FAILURE(status)) {
159         errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
160         return;
161     }
162     if(*a!=*b){
163         errln("Failed: boilerplate method operator!= does not return correct results");
164     }
165     // Japanese word break iterators are identical to root with
166     // a dictionary-based break iterator
167     BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
168     BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
169     if(c && d){
170         if(*c!=*d){
171             errln("Failed: boilerplate method operator== does not return correct results");
172         }
173     }else{
174         errln("creation of break iterator failed");
175     }
176     delete a;
177     delete b;
178     delete c;
179     delete d;
180 }
181 
TestgetRules()182 void RBBIAPITest::TestgetRules()
183 {
184     UErrorCode status=U_ZERO_ERROR;
185 
186     LocalPointer<RuleBasedBreakIterator> bi1(
187             (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status), status);
188     LocalPointer<RuleBasedBreakIterator> bi2(
189             (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status), status);
190     if(U_FAILURE(status)){
191         errcheckln(status, "%s:%d, FAIL: in construction - %s", __FILE__, __LINE__, u_errorName(status));
192         return;
193     }
194 
195     logln((UnicodeString)"Testing getRules()");
196 
197     UnicodeString text(u"Hello there");
198     bi1->setText(text);
199 
200     LocalPointer <RuleBasedBreakIterator> bi3((RuleBasedBreakIterator*)bi1->clone());
201 
202     UnicodeString temp=bi1->getRules();
203     UnicodeString temp2=bi2->getRules();
204     UnicodeString temp3=bi3->getRules();
205     if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
206         errln("%s:%d ERROR: error in getRules() method", __FILE__, __LINE__);
207 
208     RuleBasedBreakIterator bi4;   // Default RuleBasedBreakIterator constructor gives empty shell with empty rules.
209     if (!bi4.getRules().isEmpty()) {
210         errln("%s:%d Empty string expected.", __FILE__, __LINE__);
211     }
212 }
213 
TestHashCode()214 void RBBIAPITest::TestHashCode()
215 {
216     UErrorCode status=U_ZERO_ERROR;
217     RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
218     RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
219     RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
220     if(U_FAILURE(status)){
221         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
222         delete bi1;
223         delete bi2;
224         delete bi3;
225         return;
226     }
227 
228 
229     logln((UnicodeString)"Testing hashCode()");
230 
231     bi1->setText((UnicodeString)"Hash code");
232     bi2->setText((UnicodeString)"Hash code");
233     bi3->setText((UnicodeString)"Hash code");
234 
235     RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
236     RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
237 
238     if(bi1->hashCode() != bi1clone->hashCode() ||  bi1->hashCode() != bi3->hashCode() ||
239         bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
240         errln((UnicodeString)"ERROR: identical objects have different hashcodes");
241 
242     if(bi1->hashCode() == bi2->hashCode() ||  bi2->hashCode() == bi3->hashCode() ||
243         bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
244         errln((UnicodeString)"ERROR: different objects have same hashcodes");
245 
246     delete bi1clone;
247     delete bi2clone;
248     delete bi1;
249     delete bi2;
250     delete bi3;
251 
252 }
TestGetSetAdoptText()253 void RBBIAPITest::TestGetSetAdoptText()
254 {
255     logln((UnicodeString)"Testing getText setText ");
256     IcuTestErrorCode status(*this, "TestGetSetAdoptText");
257     UnicodeString str1="first string.";
258     UnicodeString str2="Second string.";
259     LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
260     LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
261     if(status.isFailure()){
262         errcheckln(status, "Fail : in construction - %s", status.errorName());
263             return;
264     }
265 
266 
267     CharacterIterator* text1= new StringCharacterIterator(str1);
268     CharacterIterator* text1Clone = text1->clone();
269     CharacterIterator* text2= new StringCharacterIterator(str2);
270     CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"
271 
272     wordIter1->setText(str1);
273     CharacterIterator *tci = &wordIter1->getText();
274     UnicodeString      tstr;
275     tci->getText(tstr);
276     TEST_ASSERT(tstr == str1);
277     if(wordIter1->current() != 0)
278         errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
279 
280     wordIter1->next(2);
281 
282     wordIter1->setText(str2);
283     if(wordIter1->current() != 0)
284         errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
285 
286 
287     charIter1->adoptText(text1Clone);
288     TEST_ASSERT(wordIter1->getText() != charIter1->getText());
289     tci = &wordIter1->getText();
290     tci->getText(tstr);
291     TEST_ASSERT(tstr == str2);
292     tci = &charIter1->getText();
293     tci->getText(tstr);
294     TEST_ASSERT(tstr == str1);
295 
296 
297     LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
298     rb->adoptText(text1);
299     if(rb->getText() != *text1)
300         errln((UnicodeString)"ERROR:1 error in adoptText ");
301     rb->adoptText(text2);
302     if(rb->getText() != *text2)
303         errln((UnicodeString)"ERROR:2 error in adoptText ");
304 
305     // Adopt where iterator range is less than the entire orignal source string.
306     //   (With the change of the break engine to working with UText internally,
307     //    CharacterIterators starting at positions other than zero are not supported)
308     rb->adoptText(text3);
309     TEST_ASSERT(rb->preceding(2) == 0);
310     TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
311     //if(rb->preceding(2) != 3) {
312     //    errln((UnicodeString)"ERROR:3 error in adoptText ");
313     //}
314     //if(rb->following(11) != BreakIterator::DONE) {
315     //    errln((UnicodeString)"ERROR:4 error in adoptText ");
316     //}
317 
318     // UText API
319     //
320     //   Quick test to see if UText is working at all.
321     //
322     const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
323     const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
324     //                012345678901
325 
326     status.reset();
327     LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
328     wordIter1->setText(ut.getAlias(), status);
329     TEST_ASSERT_SUCCESS(status);
330 
331     int32_t pos;
332     pos = wordIter1->first();
333     TEST_ASSERT(pos==0);
334     pos = wordIter1->next();
335     TEST_ASSERT(pos==5);
336     pos = wordIter1->next();
337     TEST_ASSERT(pos==6);
338     pos = wordIter1->next();
339     TEST_ASSERT(pos==11);
340     pos = wordIter1->next();
341     TEST_ASSERT(pos==UBRK_DONE);
342 
343     status.reset();
344     LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
345     TEST_ASSERT_SUCCESS(status);
346     wordIter1->setText(ut2.getAlias(), status);
347     TEST_ASSERT_SUCCESS(status);
348 
349     pos = wordIter1->first();
350     TEST_ASSERT(pos==0);
351     pos = wordIter1->next();
352     TEST_ASSERT(pos==3);
353     pos = wordIter1->next();
354     TEST_ASSERT(pos==4);
355 
356     pos = wordIter1->last();
357     TEST_ASSERT(pos==6);
358     pos = wordIter1->previous();
359     TEST_ASSERT(pos==4);
360     pos = wordIter1->previous();
361     TEST_ASSERT(pos==3);
362     pos = wordIter1->previous();
363     TEST_ASSERT(pos==0);
364     pos = wordIter1->previous();
365     TEST_ASSERT(pos==UBRK_DONE);
366 
367     status.reset();
368     UnicodeString sEmpty;
369     LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
370     wordIter1->getUText(gut2.getAlias(), status);
371     TEST_ASSERT_SUCCESS(status);
372     status.reset();
373 }
374 
375 
TestIteration()376 void RBBIAPITest::TestIteration()
377 {
378     // This test just verifies that the API is present.
379     // Testing for correct operation of the break rules happens elsewhere.
380 
381     UErrorCode status=U_ZERO_ERROR;
382     RuleBasedBreakIterator* bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
383     if (U_FAILURE(status) || bi == NULL)  {
384         errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
385     }
386     delete bi;
387 
388     status=U_ZERO_ERROR;
389     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
390     if (U_FAILURE(status) || bi == NULL)  {
391         errcheckln(status, "Failure creating Word break iterator.  Status = %s", u_errorName(status));
392     }
393     delete bi;
394 
395     status=U_ZERO_ERROR;
396     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
397     if (U_FAILURE(status) || bi == NULL)  {
398         errcheckln(status, "Failure creating Line break iterator.  Status = %s", u_errorName(status));
399     }
400     delete bi;
401 
402     status=U_ZERO_ERROR;
403     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
404     if (U_FAILURE(status) || bi == NULL)  {
405         errcheckln(status, "Failure creating Sentence break iterator.  Status = %s", u_errorName(status));
406     }
407     delete bi;
408 
409     status=U_ZERO_ERROR;
410     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
411     if (U_FAILURE(status) || bi == NULL)  {
412         errcheckln(status, "Failure creating Title break iterator.  Status = %s", u_errorName(status));
413     }
414     delete bi;
415 
416     status=U_ZERO_ERROR;
417     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
418     if (U_FAILURE(status) || bi == NULL)  {
419         errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
420         return;   // Skip the rest of these tests.
421     }
422 
423 
424     UnicodeString testString="0123456789";
425     bi->setText(testString);
426 
427     int32_t i;
428     i = bi->first();
429     if (i != 0) {
430         errln("%s:%d Incorrect value from bi->first().  Expected 0, got %d.", __FILE__, __LINE__, i);
431     }
432 
433     i = bi->last();
434     if (i != 10) {
435         errln("%s:%d Incorrect value from bi->last().  Expected 10, got %d", __FILE__, __LINE__, i);
436     }
437 
438     //
439     // Previous
440     //
441     bi->last();
442     i = bi->previous();
443     if (i != 9) {
444         errln("%s:%d Incorrect value from bi->last().  Expected 9, got %d", __FILE__, __LINE__, i);
445     }
446 
447 
448     bi->first();
449     i = bi->previous();
450     if (i != BreakIterator::DONE) {
451         errln("%s:%d Incorrect value from bi->previous().  Expected DONE, got %d", __FILE__, __LINE__, i);
452     }
453 
454     //
455     // next()
456     //
457     bi->first();
458     i = bi->next();
459     if (i != 1) {
460         errln("%s:%d Incorrect value from bi->next().  Expected 1, got %d", __FILE__, __LINE__, i);
461     }
462 
463     bi->last();
464     i = bi->next();
465     if (i != BreakIterator::DONE) {
466         errln("%s:%d Incorrect value from bi->next().  Expected DONE, got %d", __FILE__, __LINE__, i);
467     }
468 
469 
470     //
471     //  current()
472     //
473     bi->first();
474     i = bi->current();
475     if (i != 0) {
476         errln("%s:%d Incorrect value from bi->current().  Expected 0, got %d", __FILE__, __LINE__, i);
477     }
478 
479     bi->next();
480     i = bi->current();
481     if (i != 1) {
482         errln("%s:%d Incorrect value from bi->current().  Expected 1, got %d", __FILE__, __LINE__, i);
483     }
484 
485     bi->last();
486     bi->next();
487     i = bi->current();
488     if (i != 10) {
489         errln("%s:%d Incorrect value from bi->current().  Expected 10, got %d", __FILE__, __LINE__, i);
490     }
491 
492     bi->first();
493     bi->previous();
494     i = bi->current();
495     if (i != 0) {
496         errln("%s:%d Incorrect value from bi->current().  Expected 0, got %d", __FILE__, __LINE__, i);
497     }
498 
499 
500     //
501     // Following()
502     //
503     i = bi->following(4);
504     if (i != 5) {
505         errln("%s:%d Incorrect value from bi->following().  Expected 5, got %d", __FILE__, __LINE__, i);
506     }
507 
508     i = bi->following(9);
509     if (i != 10) {
510         errln("%s:%d Incorrect value from bi->following().  Expected 10, got %d", __FILE__, __LINE__, i);
511     }
512 
513     i = bi->following(10);
514     if (i != BreakIterator::DONE) {
515         errln("%s:%d Incorrect value from bi->following().  Expected DONE, got %d", __FILE__, __LINE__, i);
516     }
517 
518 
519     //
520     // Preceding
521     //
522     i = bi->preceding(4);
523     if (i != 3) {
524         errln("%s:%d Incorrect value from bi->preceding().  Expected 3, got %d", __FILE__, __LINE__, i);
525     }
526 
527     i = bi->preceding(10);
528     if (i != 9) {
529         errln("%s:%d Incorrect value from bi->preceding().  Expected 9, got %d", __FILE__, __LINE__, i);
530     }
531 
532     i = bi->preceding(1);
533     if (i != 0) {
534         errln("%s:%d Incorrect value from bi->preceding().  Expected 0, got %d", __FILE__, __LINE__, i);
535     }
536 
537     i = bi->preceding(0);
538     if (i != BreakIterator::DONE) {
539         errln("%s:%d Incorrect value from bi->preceding().  Expected DONE, got %d", __FILE__, __LINE__, i);
540     }
541 
542 
543     //
544     // isBoundary()
545     //
546     bi->first();
547     if (bi->isBoundary(3) != TRUE) {
548         errln("%s:%d Incorrect value from bi->isBoudary().  Expected TRUE, got FALSE", __FILE__, __LINE__, i);
549     }
550     i = bi->current();
551     if (i != 3) {
552         errln("%s:%d Incorrect value from bi->current().  Expected 3, got %d", __FILE__, __LINE__, i);
553     }
554 
555 
556     if (bi->isBoundary(11) != FALSE) {
557         errln("%s:%d Incorrect value from bi->isBoudary().  Expected FALSE, got TRUE", __FILE__, __LINE__, i);
558     }
559     i = bi->current();
560     if (i != 10) {
561         errln("%s:%d Incorrect value from bi->current().  Expected 10, got %d", __FILE__, __LINE__, i);
562     }
563 
564     //
565     // next(n)
566     //
567     bi->first();
568     i = bi->next(4);
569     if (i != 4) {
570         errln("%s:%d Incorrect value from bi->next().  Expected 4, got %d", __FILE__, __LINE__, i);
571     }
572 
573     i = bi->next(6);
574     if (i != 10) {
575         errln("%s:%d Incorrect value from bi->next().  Expected 10, got %d", __FILE__, __LINE__, i);
576     }
577 
578     bi->first();
579     i = bi->next(11);
580     if (i != BreakIterator::DONE) {
581         errln("%s:%d Incorrect value from bi->next().  Expected BreakIterator::DONE, got %d", __FILE__, __LINE__, i);
582     }
583 
584     delete bi;
585 
586 }
587 
588 
589 
590 
591 
592 
TestBuilder()593 void RBBIAPITest::TestBuilder() {
594      UnicodeString rulesString1 = "$Letters = [:L:];\n"
595                                   "$Numbers = [:N:];\n"
596                                   "$Letters+;\n"
597                                   "$Numbers+;\n"
598                                   "[^$Letters $Numbers];\n"
599                                   "!.*;\n";
600      UnicodeString testString1  = "abc123..abc";
601                                 // 01234567890
602      int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
603      UErrorCode status=U_ZERO_ERROR;
604      UParseError    parseError;
605 
606      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
607      if(U_FAILURE(status)) {
608          dataerrln("Fail : in construction - %s", u_errorName(status));
609      } else {
610          bi->setText(testString1);
611          doBoundaryTest(*bi, testString1, bounds1);
612      }
613      delete bi;
614 }
615 
616 
617 //
618 //  TestQuoteGrouping
619 //       Single quotes within rules imply a grouping, so that a modifier
620 //       following the quoted text (* or +) applies to all of the quoted chars.
621 //
TestQuoteGrouping()622 void RBBIAPITest::TestQuoteGrouping() {
623      UnicodeString rulesString1 = "#Here comes the rule...\n"
624                                   "'$@!'*;\n"   //  (\$\@\!)*
625                                   ".;\n";
626 
627      UnicodeString testString1  = "$@!$@!X$@!!X";
628                                 // 0123456789012
629      int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
630      UErrorCode status=U_ZERO_ERROR;
631      UParseError    parseError;
632 
633      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
634      if(U_FAILURE(status)) {
635          dataerrln("Fail : in construction - %s", u_errorName(status));
636      } else {
637          bi->setText(testString1);
638          doBoundaryTest(*bi, testString1, bounds1);
639      }
640      delete bi;
641 }
642 
643 //
644 //  TestRuleStatus
645 //      Test word break rule status constants.
646 //
TestRuleStatus()647 void RBBIAPITest::TestRuleStatus() {
648      UChar str[30];
649      //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
650      // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
651      u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
652               // 012345678901234567  8      9    0
653               //                     Katakana
654                 str, 30);
655      UnicodeString testString1(str);
656      int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
657      int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
658                           UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
659                           UBRK_WORD_IDEO,     UBRK_WORD_NONE};
660 
661      int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
662                           UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
663                           UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
664 
665      UErrorCode status=U_ZERO_ERROR;
666 
667      BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
668      if(U_FAILURE(status)) {
669          errcheckln(status, "%s:%d Fail in construction - %s", __FILE__, __LINE__, u_errorName(status));
670      } else {
671          bi->setText(testString1);
672          // First test that the breaks are in the right spots.
673          doBoundaryTest(*bi, testString1, bounds1);
674 
675          // Then go back and check tag values
676          int32_t i = 0;
677          int32_t pos, tag;
678          for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
679              if (pos != bounds1[i]) {
680                  errln("%s:%d FAIL: unexpected word break at postion %d", __FILE__, __LINE__, pos);
681                  break;
682              }
683              tag = bi->getRuleStatus();
684              if (tag < tag_lo[i] || tag >= tag_hi[i]) {
685                  errln("%s:%d FAIL: incorrect tag value %d at position %d", __FILE__, __LINE__, tag, pos);
686                  break;
687              }
688 
689              // Check that we get the same tag values from getRuleStatusVec()
690              int32_t vec[10];
691              int t = bi->getRuleStatusVec(vec, 10, status);
692              TEST_ASSERT_SUCCESS(status);
693              TEST_ASSERT(t==1);
694              TEST_ASSERT(vec[0] == tag);
695          }
696      }
697      delete bi;
698 
699      // Now test line break status.  This test mostly is to confirm that the status constants
700      //                              are correctly declared in the header.
701      testString1 =   "test line. \n";
702      // break type    s    s     h
703 
704      bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
705      if(U_FAILURE(status)) {
706          errcheckln(status, "%s:%d failed to create line break iterator. - %s", __FILE__, __LINE__, u_errorName(status));
707      } else {
708          int32_t i = 0;
709          int32_t pos, tag;
710          UBool   success;
711 
712          bi->setText(testString1);
713          pos = bi->current();
714          tag = bi->getRuleStatus();
715          for (i=0; i<3; i++) {
716              switch (i) {
717              case 0:
718                  success = pos==0  && tag==UBRK_LINE_SOFT; break;
719              case 1:
720                  success = pos==5  && tag==UBRK_LINE_SOFT; break;
721              case 2:
722                  success = pos==12 && tag==UBRK_LINE_HARD; break;
723              default:
724                  success = FALSE; break;
725              }
726              if (success == FALSE) {
727                  errln("%s:%d: incorrect line break status or position.  i=%d, pos=%d, tag=%d",
728                      __FILE__, __LINE__, i, pos, tag);
729                  break;
730              }
731              pos = bi->next();
732              tag = bi->getRuleStatus();
733          }
734          if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
735              UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
736              (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
737              errln("%s:%d UBRK_LINE_* constants from header are inconsistent.", __FILE__, __LINE__);
738          }
739      }
740      delete bi;
741 
742 }
743 
744 
745 //
746 //  TestRuleStatusVec
747 //      Test the vector form of  break rule status.
748 //
TestRuleStatusVec()749 void RBBIAPITest::TestRuleStatusVec() {
750     UnicodeString rulesString(   "[A-N]{100}; \n"
751                                  "[a-w]{200}; \n"
752                                  "[\\p{L}]{300}; \n"
753                                  "[\\p{N}]{400}; \n"
754                                  "[0-5]{500}; \n"
755                                   "!.*;\n", -1, US_INV);
756      UnicodeString testString1  = "Aapz5?";
757      int32_t  statusVals[10];
758      int32_t  numStatuses;
759      int32_t  pos;
760 
761      UErrorCode status=U_ZERO_ERROR;
762      UParseError    parseError;
763 
764      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
765      if (U_FAILURE(status)) {
766          dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
767      } else {
768          bi->setText(testString1);
769 
770          // A
771          pos = bi->next();
772          TEST_ASSERT(pos==1);
773          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
774          TEST_ASSERT_SUCCESS(status);
775          TEST_ASSERT(numStatuses == 2);
776          TEST_ASSERT(statusVals[0] == 100);
777          TEST_ASSERT(statusVals[1] == 300);
778 
779          // a
780          pos = bi->next();
781          TEST_ASSERT(pos==2);
782          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
783          TEST_ASSERT_SUCCESS(status);
784          TEST_ASSERT(numStatuses == 2);
785          TEST_ASSERT(statusVals[0] == 200);
786          TEST_ASSERT(statusVals[1] == 300);
787 
788          // p
789          pos = bi->next();
790          TEST_ASSERT(pos==3);
791          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
792          TEST_ASSERT_SUCCESS(status);
793          TEST_ASSERT(numStatuses == 2);
794          TEST_ASSERT(statusVals[0] == 200);
795          TEST_ASSERT(statusVals[1] == 300);
796 
797          // z
798          pos = bi->next();
799          TEST_ASSERT(pos==4);
800          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
801          TEST_ASSERT_SUCCESS(status);
802          TEST_ASSERT(numStatuses == 1);
803          TEST_ASSERT(statusVals[0] == 300);
804 
805          // 5
806          pos = bi->next();
807          TEST_ASSERT(pos==5);
808          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
809          TEST_ASSERT_SUCCESS(status);
810          TEST_ASSERT(numStatuses == 2);
811          TEST_ASSERT(statusVals[0] == 400);
812          TEST_ASSERT(statusVals[1] == 500);
813 
814          // ?
815          pos = bi->next();
816          TEST_ASSERT(pos==6);
817          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
818          TEST_ASSERT_SUCCESS(status);
819          TEST_ASSERT(numStatuses == 1);
820          TEST_ASSERT(statusVals[0] == 0);
821 
822          //
823          //  Check buffer overflow error handling.   Char == A
824          //
825          bi->first();
826          pos = bi->next();
827          TEST_ASSERT(pos==1);
828          memset(statusVals, -1, sizeof(statusVals));
829          numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
830          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
831          TEST_ASSERT(numStatuses == 2);
832          TEST_ASSERT(statusVals[0] == -1);
833 
834          status = U_ZERO_ERROR;
835          memset(statusVals, -1, sizeof(statusVals));
836          numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
837          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
838          TEST_ASSERT(numStatuses == 2);
839          TEST_ASSERT(statusVals[0] == 100);
840          TEST_ASSERT(statusVals[1] == -1);
841 
842          status = U_ZERO_ERROR;
843          memset(statusVals, -1, sizeof(statusVals));
844          numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
845          TEST_ASSERT_SUCCESS(status);
846          TEST_ASSERT(numStatuses == 2);
847          TEST_ASSERT(statusVals[0] == 100);
848          TEST_ASSERT(statusVals[1] == 300);
849          TEST_ASSERT(statusVals[2] == -1);
850      }
851      delete bi;
852 
853 }
854 
855 //
856 //   Bug 2190 Regression test.   Builder crash on rule consisting of only a
857 //                               $variable reference
TestBug2190()858 void RBBIAPITest::TestBug2190() {
859      UnicodeString rulesString1 = "$aaa = abcd;\n"
860                                   "$bbb = $aaa;\n"
861                                   "$bbb;\n";
862      UnicodeString testString1  = "abcdabcd";
863                                 // 01234567890
864      int32_t bounds1[] = {0, 4, 8};
865      UErrorCode status=U_ZERO_ERROR;
866      UParseError    parseError;
867 
868      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
869      if(U_FAILURE(status)) {
870          dataerrln("Fail : in construction - %s", u_errorName(status));
871      } else {
872          bi->setText(testString1);
873          doBoundaryTest(*bi, testString1, bounds1);
874      }
875      delete bi;
876 }
877 
878 
TestRegistration()879 void RBBIAPITest::TestRegistration() {
880 #if !UCONFIG_NO_SERVICE
881     UErrorCode status = U_ZERO_ERROR;
882     BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
883     // ok to not delete these if we exit because of error?
884     BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
885     BreakIterator* root_word = BreakIterator::createWordInstance("", status);
886     BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
887 
888     if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
889         dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
890 
891         delete ja_word;
892         delete ja_char;
893         delete root_word;
894         delete root_char;
895 
896         return;
897     }
898 
899     URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
900     {
901 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
902         if (ja_word && *ja_word == *root_word) {
903             errln("japan not different from root");
904         }
905 #endif
906     }
907 
908     {
909         BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
910         UBool fail = TRUE;
911         if(result){
912             fail = *result != *ja_word;
913         }
914         delete result;
915         if (fail) {
916             errln("bad result for xx_XX/word");
917         }
918     }
919 
920     {
921         BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
922         UBool fail = TRUE;
923         if(result){
924             fail = *result != *ja_char;
925         }
926         delete result;
927         if (fail) {
928             errln("bad result for ja_JP/char");
929         }
930     }
931 
932     {
933         BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
934         UBool fail = TRUE;
935         if(result){
936             fail = *result != *root_char;
937         }
938         delete result;
939         if (fail) {
940             errln("bad result for xx_XX/char");
941         }
942     }
943 
944     {
945         StringEnumeration* avail = BreakIterator::getAvailableLocales();
946         UBool found = FALSE;
947         const UnicodeString* p;
948         while ((p = avail->snext(status))) {
949             if (p->compare("xx") == 0) {
950                 found = TRUE;
951                 break;
952             }
953         }
954         delete avail;
955         if (!found) {
956             errln("did not find test locale");
957         }
958     }
959 
960     {
961         UBool unreg = BreakIterator::unregister(key, status);
962         if (!unreg) {
963             errln("unable to unregister");
964         }
965     }
966 
967     {
968         BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
969         BreakIterator* root = BreakIterator::createWordInstance("", status);
970         UBool fail = TRUE;
971         if(root){
972           fail = *root != *result;
973         }
974         delete root;
975         delete result;
976         if (fail) {
977             errln("did not get root break");
978         }
979     }
980 
981     {
982         StringEnumeration* avail = BreakIterator::getAvailableLocales();
983         UBool found = FALSE;
984         const UnicodeString* p;
985         while ((p = avail->snext(status))) {
986             if (p->compare("xx") == 0) {
987                 found = TRUE;
988                 break;
989             }
990         }
991         delete avail;
992         if (found) {
993             errln("found test locale");
994         }
995     }
996 
997     {
998         int32_t count;
999         UBool   foundLocale = FALSE;
1000         const Locale *avail = BreakIterator::getAvailableLocales(count);
1001         for (int i=0; i<count; i++) {
1002             if (avail[i] == Locale::getEnglish()) {
1003                 foundLocale = TRUE;
1004                 break;
1005             }
1006         }
1007         if (foundLocale == FALSE) {
1008             errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1009         }
1010     }
1011 
1012 
1013     // ja_word was adopted by factory
1014     delete ja_char;
1015     delete root_word;
1016     delete root_char;
1017 #endif
1018 }
1019 
RoundtripRule(const char * dataFile)1020 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1021     UErrorCode status = U_ZERO_ERROR;
1022     UParseError parseError;
1023     parseError.line = 0;
1024     parseError.offset = 0;
1025     LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1026     uint32_t length;
1027     const UChar *builtSource;
1028     const uint8_t *rbbiRules;
1029     const uint8_t *builtRules;
1030 
1031     if (U_FAILURE(status)) {
1032         errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status));
1033         return;
1034     }
1035 
1036     builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1037     builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1038     RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1039     if (U_FAILURE(status)) {
1040         errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
1041                 __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset);
1042         errln(UnicodeString(builtSource));
1043         return;
1044     };
1045     rbbiRules = brkItr->getBinaryRules(length);
1046     logln("Comparing \"%s\" len=%d", dataFile, length);
1047     if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1048         errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile);
1049         return;
1050     }
1051     delete brkItr;
1052 }
1053 
TestRoundtripRules()1054 void RBBIAPITest::TestRoundtripRules() {
1055     RoundtripRule("word");
1056     RoundtripRule("title");
1057     RoundtripRule("sent");
1058     RoundtripRule("line");
1059     RoundtripRule("char");
1060     if (!quick) {
1061         RoundtripRule("word_POSIX");
1062     }
1063 }
1064 
1065 
1066 // Check getBinaryRules() and construction of a break iterator from those rules.
1067 
TestGetBinaryRules()1068 void RBBIAPITest::TestGetBinaryRules() {
1069     UErrorCode status=U_ZERO_ERROR;
1070     LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
1071     if (U_FAILURE(status)) {
1072         dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status));
1073         return;
1074     }
1075     RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
1076     if (rbbi == NULL) {
1077         dataerrln("FAIL: RuleBasedBreakIterator is NULL");
1078         return;
1079     }
1080 
1081     // Check that the new line break iterator is nominally functional.
1082     UnicodeString helloWorld("Hello, World!");
1083     rbbi->setText(helloWorld);
1084     int n = 0;
1085     while (bi->next() != UBRK_DONE) {
1086         ++n;
1087     }
1088     TEST_ASSERT(n == 2);
1089 
1090     // Extract the binary rules as a uint8_t blob.
1091     uint32_t ruleLength;
1092     const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
1093     TEST_ASSERT(ruleLength > 0);
1094     TEST_ASSERT(binRules != NULL);
1095 
1096     // Clone the binary rules, and create a break iterator from that.
1097     // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
1098     uint8_t *clonedRules = new uint8_t[ruleLength];
1099     memcpy(clonedRules, binRules, ruleLength);
1100     RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
1101     TEST_ASSERT_SUCCESS(status);
1102 
1103     // Check that the cloned line break iterator is nominally alive.
1104     clonedBI.setText(helloWorld);
1105     n = 0;
1106     while (clonedBI.next() != UBRK_DONE) {
1107         ++n;
1108     }
1109     TEST_ASSERT(n == 2);
1110 
1111     delete[] clonedRules;
1112 }
1113 
1114 
TestRefreshInputText()1115 void RBBIAPITest::TestRefreshInputText() {
1116     /*
1117      *  RefreshInput changes out the input of a Break Iterator without
1118      *    changing anything else in the iterator's state.  Used with Java JNI,
1119      *    when Java moves the underlying string storage.   This test
1120      *    runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1121      *    The right set of boundaries should still be found.
1122      */
1123     UChar testStr[]  = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0};  /* = " A B C D"  */
1124     UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,  0};
1125     UErrorCode status = U_ZERO_ERROR;
1126     UText ut1 = UTEXT_INITIALIZER;
1127     UText ut2 = UTEXT_INITIALIZER;
1128     RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
1129     TEST_ASSERT_SUCCESS(status);
1130 
1131     utext_openUChars(&ut1, testStr, -1, &status);
1132     TEST_ASSERT_SUCCESS(status);
1133 
1134     if (U_SUCCESS(status)) {
1135         bi->setText(&ut1, status);
1136         TEST_ASSERT_SUCCESS(status);
1137 
1138         /* Line boundaries will occur before each letter in the original string */
1139         TEST_ASSERT(1 == bi->next());
1140         TEST_ASSERT(3 == bi->next());
1141 
1142         /* Move the string, kill the original string.  */
1143         u_strcpy(movedStr, testStr);
1144         u_memset(testStr, 0x20, u_strlen(testStr));
1145         utext_openUChars(&ut2, movedStr, -1, &status);
1146         TEST_ASSERT_SUCCESS(status);
1147         RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1148         TEST_ASSERT_SUCCESS(status);
1149         TEST_ASSERT(bi == returnedBI);
1150 
1151         /* Find the following matches, now working in the moved string. */
1152         TEST_ASSERT(5 == bi->next());
1153         TEST_ASSERT(7 == bi->next());
1154         TEST_ASSERT(8 == bi->next());
1155         TEST_ASSERT(UBRK_DONE == bi->next());
1156 
1157         utext_close(&ut1);
1158         utext_close(&ut2);
1159     }
1160     delete bi;
1161 
1162 }
1163 
1164 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
prtbrks(BreakIterator * brk,const UnicodeString & ustr,IntlTest & it)1165 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1166   static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
1167   it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1168 
1169   int32_t *pos = new int32_t[ustr.length()];
1170   int32_t posCount = 0;
1171 
1172   // calculate breaks up front, so we can print out
1173   // sans any debugging
1174   for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1175     pos[posCount++] = n;
1176     if(posCount>=ustr.length()) {
1177       it.errln("brk count exceeds string length!");
1178       return;
1179     }
1180   }
1181   UnicodeString out;
1182   out.append((UChar)CHSTR);
1183   int32_t prev = 0;
1184   for(int32_t i=0;i<posCount;i++) {
1185     int32_t n=pos[i];
1186     out.append(ustr.tempSubString(prev,n-prev));
1187     out.append((UChar)PILCROW);
1188     prev=n;
1189   }
1190   out.append(ustr.tempSubString(prev,ustr.length()-prev));
1191   out.append((UChar)CHEND);
1192   it.logln(out);
1193 
1194   out.remove();
1195   for(int32_t i=0;i<posCount;i++) {
1196     char tmp[100];
1197     sprintf(tmp,"%d ",pos[i]);
1198     out.append(UnicodeString(tmp));
1199   }
1200   it.logln(out);
1201   delete [] pos;
1202 }
1203 #endif
1204 
TestFilteredBreakIteratorBuilder()1205 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1206 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1207   UErrorCode status = U_ZERO_ERROR;
1208   LocalPointer<FilteredBreakIteratorBuilder> builder;
1209   LocalPointer<BreakIterator> baseBI;
1210   LocalPointer<BreakIterator> filteredBI;
1211   LocalPointer<BreakIterator> frenchBI;
1212 
1213   const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1214   const UnicodeString ABBR_MR("Mr.");
1215   const UnicodeString ABBR_CAPT("Capt.");
1216 
1217   {
1218     logln("Constructing empty builder\n");
1219     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1220     TEST_ASSERT_SUCCESS(status);
1221 
1222     logln("Constructing base BI\n");
1223     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1224     TEST_ASSERT_SUCCESS(status);
1225 
1226 	logln("Building new BI\n");
1227     filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1228     TEST_ASSERT_SUCCESS(status);
1229 
1230 	if (U_SUCCESS(status)) {
1231         logln("Testing:");
1232         filteredBI->setText(text);
1233         TEST_ASSERT(20 == filteredBI->next()); // Mr.
1234         TEST_ASSERT(84 == filteredBI->next()); // recovered.
1235         TEST_ASSERT(90 == filteredBI->next()); // Capt.
1236         TEST_ASSERT(181 == filteredBI->next()); // Mr.
1237         TEST_ASSERT(278 == filteredBI->next()); // charge.
1238         filteredBI->first();
1239         prtbrks(filteredBI.getAlias(), text, *this);
1240     }
1241   }
1242 
1243   {
1244     logln("Constructing empty builder\n");
1245     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1246     TEST_ASSERT_SUCCESS(status);
1247 
1248     if (U_SUCCESS(status)) {
1249         logln("Adding Mr. as an exception\n");
1250         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1251         TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
1252         TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
1253         TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1254         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1255         TEST_ASSERT_SUCCESS(status);
1256 
1257         logln("Constructing base BI\n");
1258         baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1259         TEST_ASSERT_SUCCESS(status);
1260 
1261         logln("Building new BI\n");
1262         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1263         TEST_ASSERT_SUCCESS(status);
1264 
1265         logln("Testing:");
1266         filteredBI->setText(text);
1267         TEST_ASSERT(84 == filteredBI->next());
1268         TEST_ASSERT(90 == filteredBI->next());// Capt.
1269         TEST_ASSERT(278 == filteredBI->next());
1270         filteredBI->first();
1271         prtbrks(filteredBI.getAlias(), text, *this);
1272     }
1273   }
1274 
1275 
1276   {
1277     logln("Constructing empty builder\n");
1278     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1279     TEST_ASSERT_SUCCESS(status);
1280 
1281     if (U_SUCCESS(status)) {
1282         logln("Adding Mr. and Capt as an exception\n");
1283         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1284         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
1285         TEST_ASSERT_SUCCESS(status);
1286 
1287         logln("Constructing base BI\n");
1288         baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1289         TEST_ASSERT_SUCCESS(status);
1290 
1291         logln("Building new BI\n");
1292         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1293         TEST_ASSERT_SUCCESS(status);
1294 
1295         logln("Testing:");
1296         filteredBI->setText(text);
1297         TEST_ASSERT(84 == filteredBI->next());
1298         TEST_ASSERT(278 == filteredBI->next());
1299         filteredBI->first();
1300         prtbrks(filteredBI.getAlias(), text, *this);
1301     }
1302   }
1303 
1304 
1305   {
1306     logln("Constructing English builder\n");
1307     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1308     TEST_ASSERT_SUCCESS(status);
1309 
1310     logln("Constructing base BI\n");
1311     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1312     TEST_ASSERT_SUCCESS(status);
1313 
1314     if (U_SUCCESS(status)) {
1315         logln("unsuppressing 'Capt'");
1316         TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
1317 
1318         logln("Building new BI\n");
1319         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1320         TEST_ASSERT_SUCCESS(status);
1321 
1322         if(filteredBI.isValid()) {
1323           logln("Testing:");
1324           filteredBI->setText(text);
1325           TEST_ASSERT(84 == filteredBI->next());
1326           TEST_ASSERT(90 == filteredBI->next());
1327           TEST_ASSERT(278 == filteredBI->next());
1328           filteredBI->first();
1329           prtbrks(filteredBI.getAlias(), text, *this);
1330         }
1331     }
1332   }
1333 
1334 
1335   {
1336     logln("Constructing English builder\n");
1337     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1338     TEST_ASSERT_SUCCESS(status);
1339 
1340     logln("Constructing base BI\n");
1341     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1342     TEST_ASSERT_SUCCESS(status);
1343 
1344     if (U_SUCCESS(status)) {
1345         logln("Building new BI\n");
1346         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1347         TEST_ASSERT_SUCCESS(status);
1348 
1349         if(filteredBI.isValid()) {
1350           logln("Testing:");
1351           filteredBI->setText(text);
1352           TEST_ASSERT(84 == filteredBI->next());
1353           TEST_ASSERT(278 == filteredBI->next());
1354           filteredBI->first();
1355           prtbrks(filteredBI.getAlias(), text, *this);
1356         }
1357     }
1358   }
1359 
1360   // reenable once french is in
1361   {
1362     logln("Constructing French builder");
1363     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
1364     TEST_ASSERT_SUCCESS(status);
1365 
1366     logln("Constructing base BI\n");
1367     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
1368     TEST_ASSERT_SUCCESS(status);
1369 
1370     if (U_SUCCESS(status)) {
1371         logln("Building new BI\n");
1372         frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
1373         TEST_ASSERT_SUCCESS(status);
1374     }
1375 
1376     if(frenchBI.isValid()) {
1377       logln("Testing:");
1378       UnicodeString frText("C'est MM. Duval.");
1379       frenchBI->setText(frText);
1380       TEST_ASSERT(16 == frenchBI->next());
1381       TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
1382       frenchBI->first();
1383       prtbrks(frenchBI.getAlias(), frText, *this);
1384       logln("Testing against English:");
1385       filteredBI->setText(frText);
1386       TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
1387       TEST_ASSERT(16 == filteredBI->next());
1388       TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
1389       filteredBI->first();
1390       prtbrks(filteredBI.getAlias(), frText, *this);
1391 
1392       // Verify ==
1393       TEST_ASSERT_TRUE(*frenchBI   == *frenchBI);
1394       TEST_ASSERT_TRUE(*filteredBI != *frenchBI);
1395       TEST_ASSERT_TRUE(*frenchBI   != *filteredBI);
1396     } else {
1397       dataerrln("French BI: not valid.");
1398 	}
1399   }
1400 
1401 #else
1402   logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1403 #endif
1404 }
1405 
1406 //---------------------------------------------
1407 // runIndexedTest
1408 //---------------------------------------------
1409 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1410 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1411 {
1412     if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1413     TESTCASE_AUTO_BEGIN;
1414 #if !UCONFIG_NO_FILE_IO
1415     TESTCASE_AUTO(TestCloneEquals);
1416     TESTCASE_AUTO(TestgetRules);
1417     TESTCASE_AUTO(TestHashCode);
1418     TESTCASE_AUTO(TestGetSetAdoptText);
1419     TESTCASE_AUTO(TestIteration);
1420 #endif
1421     TESTCASE_AUTO(TestBuilder);
1422     TESTCASE_AUTO(TestQuoteGrouping);
1423     TESTCASE_AUTO(TestRuleStatusVec);
1424     TESTCASE_AUTO(TestBug2190);
1425 #if !UCONFIG_NO_FILE_IO
1426     TESTCASE_AUTO(TestRegistration);
1427     TESTCASE_AUTO(TestBoilerPlate);
1428     TESTCASE_AUTO(TestRuleStatus);
1429     TESTCASE_AUTO(TestRoundtripRules);
1430     TESTCASE_AUTO(TestGetBinaryRules);
1431 #endif
1432     TESTCASE_AUTO(TestRefreshInputText);
1433 #if !UCONFIG_NO_BREAK_ITERATION
1434     TESTCASE_AUTO(TestFilteredBreakIteratorBuilder);
1435 #endif
1436     TESTCASE_AUTO_END;
1437 }
1438 
1439 
1440 //---------------------------------------------
1441 //Internal subroutines
1442 //---------------------------------------------
1443 
doBoundaryTest(BreakIterator & bi,UnicodeString & text,int32_t * boundaries)1444 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1445      logln((UnicodeString)"testIsBoundary():");
1446         int32_t p = 0;
1447         UBool isB;
1448         for (int32_t i = 0; i < text.length(); i++) {
1449             isB = bi.isBoundary(i);
1450             logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1451 
1452             if (i == boundaries[p]) {
1453                 if (!isB)
1454                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1455                 p++;
1456             }
1457             else {
1458                 if (isB)
1459                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1460             }
1461         }
1462 }
doTest(UnicodeString & testString,int32_t start,int32_t gotoffset,int32_t expectedOffset,const char * expectedString)1463 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1464     UnicodeString selected;
1465     UnicodeString expected=CharsToUnicodeString(expectedString);
1466 
1467     if(gotoffset != expectedOffset)
1468          errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1469     if(start <= gotoffset){
1470         testString.extractBetween(start, gotoffset, selected);
1471     }
1472     else{
1473         testString.extractBetween(gotoffset, start, selected);
1474     }
1475     if(selected.compare(expected) != 0)
1476          errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1477     else
1478         logln(prettify("****selected \"" + selected + "\""));
1479 }
1480 
1481 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1482