• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * Copyright (c) 1999-2016, International Business Machines
5  * Corporation and others. All Rights Reserved.
6  ********************************************************************
7  *   Date        Name        Description
8  *   12/14/99    Madhu        Creation.
9  *   01/12/2000  Madhu        updated for changed API
10  ********************************************************************/
11 
12 #include "unicode/utypes.h"
13 
14 #if !UCONFIG_NO_BREAK_ITERATION
15 
16 #include "unicode/uchar.h"
17 #include "intltest.h"
18 #include "unicode/rbbi.h"
19 #include "unicode/schriter.h"
20 #include "rbbiapts.h"
21 #include "rbbidata.h"
22 #include "cstring.h"
23 #include "ubrkimpl.h"
24 #include "unicode/locid.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utext.h"
27 #include "cmemory.h"
28 #if !UCONFIG_NO_BREAK_ITERATION
29 #include "unicode/filteredbrk.h"
30 #include <stdio.h> // for sprintf
31 #endif
32 /**
33  * API Test the RuleBasedBreakIterator class
34  */
35 
36 
37 #define TEST_ASSERT_SUCCESS(status) UPRV_BLOCK_MACRO_BEGIN { \
38     if (U_FAILURE(status)) { \
39         dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); \
40     } \
41 } UPRV_BLOCK_MACRO_END
42 
43 #define TEST_ASSERT(expr) UPRV_BLOCK_MACRO_BEGIN { \
44     if ((expr) == false) { \
45         errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr); \
46     } \
47 } UPRV_BLOCK_MACRO_END
48 
TestCloneEquals()49 void RBBIAPITest::TestCloneEquals()
50 {
51 
52     UErrorCode status=U_ZERO_ERROR;
53     RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
54     RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
55     RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
56     RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
57     if(U_FAILURE(status)){
58         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
59         return;
60     }
61 
62 
63     UnicodeString testString="Testing word break iterators's clone() and equals()";
64     bi1->setText(testString);
65     bi2->setText(testString);
66     biequal->setText(testString);
67 
68     bi3->setText("hello");
69 
70     logln((UnicodeString)"Testing equals()");
71 
72     logln((UnicodeString)"Testing == and !=");
73     bool b = (*bi1 != *biequal);
74     b |= *bi1 == *bi2;
75     b |= *bi1 == *bi3;
76     if (b) {
77         errln("%s:%d ERROR:1 RBBI's == and != operator failed.", __FILE__, __LINE__);
78     }
79 
80     if(*bi2 == *biequal || *bi2 == *bi1  || *biequal == *bi3)
81         errln("%s:%d ERROR:2 RBBI's == and != operator  failed.", __FILE__, __LINE__);
82 
83 
84     // Quick test of RulesBasedBreakIterator assignment -
85     // Check that
86     //    two different iterators are !=
87     //    they are == after assignment
88     //    source and dest iterator produce the same next() after assignment.
89     //    deleting one doesn't disable the other.
90     logln("Testing assignment");
91     RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
92     if(U_FAILURE(status)){
93         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
94         return;
95     }
96 
97     RuleBasedBreakIterator biDefault, biDefault2;
98     if(U_FAILURE(status)){
99         errln("%s:%d FAIL : in construction of default iterator", __FILE__, __LINE__);
100         return;
101     }
102     if (biDefault == *bix) {
103         errln("%s:%d ERROR: iterators should not compare ==", __FILE__, __LINE__);
104         return;
105     }
106     if (biDefault != biDefault2) {
107         errln("%s:%d ERROR: iterators should compare ==", __FILE__, __LINE__);
108         return;
109     }
110 
111 
112     UnicodeString   HelloString("Hello Kitty");
113     bix->setText(HelloString);
114     if (*bix == *bi2) {
115         errln("%s:%d ERROR: strings should not be equal before assignment.", __FILE__, __LINE__);
116     }
117     *bix = *bi2;
118     if (*bix != *bi2) {
119         errln("%s:%d ERROR: strings should be equal before assignment.", __FILE__, __LINE__);
120     }
121 
122     int bixnext = bix->next();
123     int bi2next = bi2->next();
124     if (! (bixnext == bi2next && bixnext == 7)) {
125         errln("%s:%d ERROR: iterators behaved differently after assignment.", __FILE__, __LINE__);
126     }
127     delete bix;
128     if (bi2->next() != 8) {
129         errln("%s:%d ERROR: iterator.next() failed after deleting copy.", __FILE__, __LINE__);
130     }
131 
132 
133 
134     logln((UnicodeString)"Testing clone()");
135     RuleBasedBreakIterator* bi1clone = bi1->clone();
136     RuleBasedBreakIterator* bi2clone = bi2->clone();
137 
138     if(*bi1clone != *bi1 || *bi1clone  != *biequal  ||
139       *bi1clone == *bi3 || *bi1clone == *bi2)
140         errln("%s:%d ERROR:1 RBBI's clone() method failed", __FILE__, __LINE__);
141 
142     if(*bi2clone == *bi1 || *bi2clone == *biequal ||
143        *bi2clone == *bi3 || *bi2clone != *bi2)
144         errln("%s:%d ERROR:2 RBBI's clone() method failed", __FILE__, __LINE__);
145 
146     if(bi1->getText() != bi1clone->getText()   ||
147        bi2clone->getText() != bi2->getText()   ||
148        *bi2clone == *bi1clone )
149         errln("%s:%d ERROR: RBBI's clone() method failed", __FILE__, __LINE__);
150 
151     delete bi1clone;
152     delete bi2clone;
153     delete bi1;
154     delete bi3;
155     delete bi2;
156     delete biequal;
157 }
158 
TestBoilerPlate()159 void RBBIAPITest::TestBoilerPlate()
160 {
161     UErrorCode status = U_ZERO_ERROR;
162     BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
163     BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
164     if (U_FAILURE(status)) {
165         errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
166         return;
167     }
168     if(*a!=*b){
169         errln("Failed: boilerplate method operator!= does not return correct results");
170     }
171     // Japanese word break iterators are identical to root with
172     // a dictionary-based break iterator
173     BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
174     BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
175     if(c && d){
176         if(*c!=*d){
177             errln("Failed: boilerplate method operator== does not return correct results");
178         }
179     }else{
180         errln("creation of break iterator failed");
181     }
182     delete a;
183     delete b;
184     delete c;
185     delete d;
186 }
187 
TestgetRules()188 void RBBIAPITest::TestgetRules()
189 {
190     UErrorCode status=U_ZERO_ERROR;
191 
192     LocalPointer<RuleBasedBreakIterator> bi1(
193             (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status), status);
194     LocalPointer<RuleBasedBreakIterator> bi2(
195             (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status), status);
196     if(U_FAILURE(status)){
197         errcheckln(status, "%s:%d, FAIL: in construction - %s", __FILE__, __LINE__, u_errorName(status));
198         return;
199     }
200 
201     logln((UnicodeString)"Testing getRules()");
202 
203     UnicodeString text(u"Hello there");
204     bi1->setText(text);
205 
206     LocalPointer <RuleBasedBreakIterator> bi3(bi1->clone());
207 
208     UnicodeString temp=bi1->getRules();
209     UnicodeString temp2=bi2->getRules();
210     UnicodeString temp3=bi3->getRules();
211     if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
212         errln("%s:%d ERROR: error in getRules() method", __FILE__, __LINE__);
213 
214     RuleBasedBreakIterator bi4;   // Default RuleBasedBreakIterator constructor gives empty shell with empty rules.
215     if (!bi4.getRules().isEmpty()) {
216         errln("%s:%d Empty string expected.", __FILE__, __LINE__);
217     }
218 }
219 
TestHashCode()220 void RBBIAPITest::TestHashCode()
221 {
222     UErrorCode status=U_ZERO_ERROR;
223     RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
224     RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
225     RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
226     if(U_FAILURE(status)){
227         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
228         delete bi1;
229         delete bi2;
230         delete bi3;
231         return;
232     }
233 
234 
235     logln((UnicodeString)"Testing hashCode()");
236 
237     bi1->setText((UnicodeString)"Hash code");
238     bi2->setText((UnicodeString)"Hash code");
239     bi3->setText((UnicodeString)"Hash code");
240 
241     RuleBasedBreakIterator* bi1clone= bi1->clone();
242     RuleBasedBreakIterator* bi2clone= bi2->clone();
243 
244     if(bi1->hashCode() != bi1clone->hashCode() ||  bi1->hashCode() != bi3->hashCode() ||
245         bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
246         errln((UnicodeString)"ERROR: identical objects have different hashcodes");
247 
248     if(bi1->hashCode() == bi2->hashCode() ||  bi2->hashCode() == bi3->hashCode() ||
249         bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
250         errln((UnicodeString)"ERROR: different objects have same hashcodes");
251 
252     delete bi1clone;
253     delete bi2clone;
254     delete bi1;
255     delete bi2;
256     delete bi3;
257 
258 }
TestGetSetAdoptText()259 void RBBIAPITest::TestGetSetAdoptText()
260 {
261     logln((UnicodeString)"Testing getText setText ");
262     IcuTestErrorCode status(*this, "TestGetSetAdoptText");
263     UnicodeString str1="first string.";
264     UnicodeString str2="Second string.";
265     LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
266     LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
267     if(status.isFailure()){
268         errcheckln(status, "Fail : in construction - %s", status.errorName());
269             return;
270     }
271 
272 
273     CharacterIterator* text1= new StringCharacterIterator(str1);
274     CharacterIterator* text1Clone = text1->clone();
275     CharacterIterator* text2= new StringCharacterIterator(str2);
276     CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"
277 
278     wordIter1->setText(str1);
279     CharacterIterator *tci = &wordIter1->getText();
280     UnicodeString      tstr;
281     tci->getText(tstr);
282     TEST_ASSERT(tstr == str1);
283     if(wordIter1->current() != 0)
284         errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
285 
286     wordIter1->next(2);
287 
288     wordIter1->setText(str2);
289     if(wordIter1->current() != 0)
290         errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
291 
292 
293     charIter1->adoptText(text1Clone);
294     TEST_ASSERT(wordIter1->getText() != charIter1->getText());
295     tci = &wordIter1->getText();
296     tci->getText(tstr);
297     TEST_ASSERT(tstr == str2);
298     tci = &charIter1->getText();
299     tci->getText(tstr);
300     TEST_ASSERT(tstr == str1);
301 
302 
303     LocalPointer<RuleBasedBreakIterator> rb(wordIter1->clone());
304     rb->adoptText(text1);
305     if(rb->getText() != *text1)
306         errln((UnicodeString)"ERROR:1 error in adoptText ");
307     rb->adoptText(text2);
308     if(rb->getText() != *text2)
309         errln((UnicodeString)"ERROR:2 error in adoptText ");
310 
311     // Adopt where iterator range is less than the entire original source string.
312     //   (With the change of the break engine to working with UText internally,
313     //    CharacterIterators starting at positions other than zero are not supported)
314     rb->adoptText(text3);
315     TEST_ASSERT(rb->preceding(2) == 0);
316     TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
317     //if(rb->preceding(2) != 3) {
318     //    errln((UnicodeString)"ERROR:3 error in adoptText ");
319     //}
320     //if(rb->following(11) != BreakIterator::DONE) {
321     //    errln((UnicodeString)"ERROR:4 error in adoptText ");
322     //}
323 
324     // UText API
325     //
326     //   Quick test to see if UText is working at all.
327     //
328     const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
329     const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
330     //                012345678901
331 
332     status.reset();
333     LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
334     wordIter1->setText(ut.getAlias(), status);
335     TEST_ASSERT_SUCCESS(status);
336 
337     int32_t pos;
338     pos = wordIter1->first();
339     TEST_ASSERT(pos==0);
340     pos = wordIter1->next();
341     TEST_ASSERT(pos==5);
342     pos = wordIter1->next();
343     TEST_ASSERT(pos==6);
344     pos = wordIter1->next();
345     TEST_ASSERT(pos==11);
346     pos = wordIter1->next();
347     TEST_ASSERT(pos==UBRK_DONE);
348 
349     status.reset();
350     LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
351     TEST_ASSERT_SUCCESS(status);
352     wordIter1->setText(ut2.getAlias(), status);
353     TEST_ASSERT_SUCCESS(status);
354 
355     pos = wordIter1->first();
356     TEST_ASSERT(pos==0);
357     pos = wordIter1->next();
358     TEST_ASSERT(pos==3);
359     pos = wordIter1->next();
360     TEST_ASSERT(pos==4);
361 
362     pos = wordIter1->last();
363     TEST_ASSERT(pos==6);
364     pos = wordIter1->previous();
365     TEST_ASSERT(pos==4);
366     pos = wordIter1->previous();
367     TEST_ASSERT(pos==3);
368     pos = wordIter1->previous();
369     TEST_ASSERT(pos==0);
370     pos = wordIter1->previous();
371     TEST_ASSERT(pos==UBRK_DONE);
372 
373     status.reset();
374     UnicodeString sEmpty;
375     LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
376     wordIter1->getUText(gut2.getAlias(), status);
377     TEST_ASSERT_SUCCESS(status);
378     status.reset();
379 }
380 
381 
TestIteration()382 void RBBIAPITest::TestIteration()
383 {
384     // This test just verifies that the API is present.
385     // Testing for correct operation of the break rules happens elsewhere.
386 
387     UErrorCode status=U_ZERO_ERROR;
388     RuleBasedBreakIterator* bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
389     if (U_FAILURE(status) || bi == NULL)  {
390         errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
391     }
392     delete bi;
393 
394     status=U_ZERO_ERROR;
395     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
396     if (U_FAILURE(status) || bi == NULL)  {
397         errcheckln(status, "Failure creating Word break iterator.  Status = %s", u_errorName(status));
398     }
399     delete bi;
400 
401     status=U_ZERO_ERROR;
402     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
403     if (U_FAILURE(status) || bi == NULL)  {
404         errcheckln(status, "Failure creating Line break iterator.  Status = %s", u_errorName(status));
405     }
406     delete bi;
407 
408     status=U_ZERO_ERROR;
409     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
410     if (U_FAILURE(status) || bi == NULL)  {
411         errcheckln(status, "Failure creating Sentence break iterator.  Status = %s", u_errorName(status));
412     }
413     delete bi;
414 
415     status=U_ZERO_ERROR;
416     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
417     if (U_FAILURE(status) || bi == NULL)  {
418         errcheckln(status, "Failure creating Title break iterator.  Status = %s", u_errorName(status));
419     }
420     delete bi;
421 
422     status=U_ZERO_ERROR;
423     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
424     if (U_FAILURE(status) || bi == NULL)  {
425         errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
426         return;   // Skip the rest of these tests.
427     }
428 
429 
430     UnicodeString testString="0123456789";
431     bi->setText(testString);
432 
433     int32_t i;
434     i = bi->first();
435     if (i != 0) {
436         errln("%s:%d Incorrect value from bi->first().  Expected 0, got %d.", __FILE__, __LINE__, i);
437     }
438 
439     i = bi->last();
440     if (i != 10) {
441         errln("%s:%d Incorrect value from bi->last().  Expected 10, got %d", __FILE__, __LINE__, i);
442     }
443 
444     //
445     // Previous
446     //
447     bi->last();
448     i = bi->previous();
449     if (i != 9) {
450         errln("%s:%d Incorrect value from bi->last().  Expected 9, got %d", __FILE__, __LINE__, i);
451     }
452 
453 
454     bi->first();
455     i = bi->previous();
456     if (i != BreakIterator::DONE) {
457         errln("%s:%d Incorrect value from bi->previous().  Expected DONE, got %d", __FILE__, __LINE__, i);
458     }
459 
460     //
461     // next()
462     //
463     bi->first();
464     i = bi->next();
465     if (i != 1) {
466         errln("%s:%d Incorrect value from bi->next().  Expected 1, got %d", __FILE__, __LINE__, i);
467     }
468 
469     bi->last();
470     i = bi->next();
471     if (i != BreakIterator::DONE) {
472         errln("%s:%d Incorrect value from bi->next().  Expected DONE, got %d", __FILE__, __LINE__, i);
473     }
474 
475 
476     //
477     //  current()
478     //
479     bi->first();
480     i = bi->current();
481     if (i != 0) {
482         errln("%s:%d Incorrect value from bi->current().  Expected 0, got %d", __FILE__, __LINE__, i);
483     }
484 
485     bi->next();
486     i = bi->current();
487     if (i != 1) {
488         errln("%s:%d Incorrect value from bi->current().  Expected 1, got %d", __FILE__, __LINE__, i);
489     }
490 
491     bi->last();
492     bi->next();
493     i = bi->current();
494     if (i != 10) {
495         errln("%s:%d Incorrect value from bi->current().  Expected 10, got %d", __FILE__, __LINE__, i);
496     }
497 
498     bi->first();
499     bi->previous();
500     i = bi->current();
501     if (i != 0) {
502         errln("%s:%d Incorrect value from bi->current().  Expected 0, got %d", __FILE__, __LINE__, i);
503     }
504 
505 
506     //
507     // Following()
508     //
509     i = bi->following(4);
510     if (i != 5) {
511         errln("%s:%d Incorrect value from bi->following().  Expected 5, got %d", __FILE__, __LINE__, i);
512     }
513 
514     i = bi->following(9);
515     if (i != 10) {
516         errln("%s:%d Incorrect value from bi->following().  Expected 10, got %d", __FILE__, __LINE__, i);
517     }
518 
519     i = bi->following(10);
520     if (i != BreakIterator::DONE) {
521         errln("%s:%d Incorrect value from bi->following().  Expected DONE, got %d", __FILE__, __LINE__, i);
522     }
523 
524 
525     //
526     // Preceding
527     //
528     i = bi->preceding(4);
529     if (i != 3) {
530         errln("%s:%d Incorrect value from bi->preceding().  Expected 3, got %d", __FILE__, __LINE__, i);
531     }
532 
533     i = bi->preceding(10);
534     if (i != 9) {
535         errln("%s:%d Incorrect value from bi->preceding().  Expected 9, got %d", __FILE__, __LINE__, i);
536     }
537 
538     i = bi->preceding(1);
539     if (i != 0) {
540         errln("%s:%d Incorrect value from bi->preceding().  Expected 0, got %d", __FILE__, __LINE__, i);
541     }
542 
543     i = bi->preceding(0);
544     if (i != BreakIterator::DONE) {
545         errln("%s:%d Incorrect value from bi->preceding().  Expected DONE, got %d", __FILE__, __LINE__, i);
546     }
547 
548 
549     //
550     // isBoundary()
551     //
552     bi->first();
553     if (bi->isBoundary(3) != true) {
554         errln("%s:%d Incorrect value from bi->isBoundary().  Expected true, got false", __FILE__, __LINE__, i);
555     }
556     i = bi->current();
557     if (i != 3) {
558         errln("%s:%d Incorrect value from bi->current().  Expected 3, got %d", __FILE__, __LINE__, i);
559     }
560 
561 
562     if (bi->isBoundary(11) != false) {
563         errln("%s:%d Incorrect value from bi->isBoundary().  Expected false, got true", __FILE__, __LINE__, i);
564     }
565     i = bi->current();
566     if (i != 10) {
567         errln("%s:%d Incorrect value from bi->current().  Expected 10, got %d", __FILE__, __LINE__, i);
568     }
569 
570     //
571     // next(n)
572     //
573     bi->first();
574     i = bi->next(4);
575     if (i != 4) {
576         errln("%s:%d Incorrect value from bi->next().  Expected 4, got %d", __FILE__, __LINE__, i);
577     }
578 
579     i = bi->next(6);
580     if (i != 10) {
581         errln("%s:%d Incorrect value from bi->next().  Expected 10, got %d", __FILE__, __LINE__, i);
582     }
583 
584     bi->first();
585     i = bi->next(11);
586     if (i != BreakIterator::DONE) {
587         errln("%s:%d Incorrect value from bi->next().  Expected BreakIterator::DONE, got %d", __FILE__, __LINE__, i);
588     }
589 
590     delete bi;
591 
592 }
593 
594 
595 
596 
597 
598 
TestBuilder()599 void RBBIAPITest::TestBuilder() {
600      UnicodeString rulesString1 = "$Letters = [:L:];\n"
601                                   "$Numbers = [:N:];\n"
602                                   "$Letters+;\n"
603                                   "$Numbers+;\n"
604                                   "[^$Letters $Numbers];\n"
605                                   "!.*;\n";
606      UnicodeString testString1  = "abc123..abc";
607                                 // 01234567890
608      int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
609      UErrorCode status=U_ZERO_ERROR;
610      UParseError    parseError;
611 
612      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
613      if(U_FAILURE(status)) {
614          dataerrln("Fail : in construction - %s", u_errorName(status));
615      } else {
616          bi->setText(testString1);
617          doBoundaryTest(*bi, testString1, bounds1);
618      }
619      delete bi;
620 }
621 
622 
623 //
624 //  TestQuoteGrouping
625 //       Single quotes within rules imply a grouping, so that a modifier
626 //       following the quoted text (* or +) applies to all of the quoted chars.
627 //
TestQuoteGrouping()628 void RBBIAPITest::TestQuoteGrouping() {
629      UnicodeString rulesString1 = "#Here comes the rule...\n"
630                                   "'$@!'*;\n"   //  (\$\@\!)*
631                                   ".;\n";
632 
633      UnicodeString testString1  = "$@!$@!X$@!!X";
634                                 // 0123456789012
635      int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
636      UErrorCode status=U_ZERO_ERROR;
637      UParseError    parseError;
638 
639      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
640      if(U_FAILURE(status)) {
641          dataerrln("Fail : in construction - %s", u_errorName(status));
642      } else {
643          bi->setText(testString1);
644          doBoundaryTest(*bi, testString1, bounds1);
645      }
646      delete bi;
647 }
648 
649 //
650 //  TestRuleStatus
651 //      Test word break rule status constants.
652 //
TestRuleStatus()653 void RBBIAPITest::TestRuleStatus() {
654      UChar str[30];
655      //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
656      // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
657      u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
658               // 012345678901234567  8      9    0
659               //                     Katakana
660                 str, 30);
661      UnicodeString testString1(str);
662      int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
663      int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
664                           UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
665                           UBRK_WORD_IDEO,     UBRK_WORD_NONE};
666 
667      int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
668                           UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
669                           UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
670 
671      UErrorCode status=U_ZERO_ERROR;
672 
673      BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
674      if(U_FAILURE(status)) {
675          errcheckln(status, "%s:%d Fail in construction - %s", __FILE__, __LINE__, u_errorName(status));
676      } else {
677          bi->setText(testString1);
678          // First test that the breaks are in the right spots.
679          doBoundaryTest(*bi, testString1, bounds1);
680 
681          // Then go back and check tag values
682          int32_t i = 0;
683          int32_t pos, tag;
684          for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
685              if (pos != bounds1[i]) {
686                  errln("%s:%d FAIL: unexpected word break at position %d", __FILE__, __LINE__, pos);
687                  break;
688              }
689              tag = bi->getRuleStatus();
690              if (tag < tag_lo[i] || tag >= tag_hi[i]) {
691                  errln("%s:%d FAIL: incorrect tag value %d at position %d", __FILE__, __LINE__, tag, pos);
692                  break;
693              }
694 
695              // Check that we get the same tag values from getRuleStatusVec()
696              int32_t vec[10];
697              int t = bi->getRuleStatusVec(vec, 10, status);
698              TEST_ASSERT_SUCCESS(status);
699              TEST_ASSERT(t==1);
700              TEST_ASSERT(vec[0] == tag);
701          }
702      }
703      delete bi;
704 
705      // Now test line break status.  This test mostly is to confirm that the status constants
706      //                              are correctly declared in the header.
707      testString1 =   "test line. \n";
708      // break type    s    s     h
709 
710      bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
711      if(U_FAILURE(status)) {
712          errcheckln(status, "%s:%d failed to create line break iterator. - %s", __FILE__, __LINE__, u_errorName(status));
713      } else {
714          int32_t i = 0;
715          int32_t pos, tag;
716          UBool   success;
717 
718          bi->setText(testString1);
719          pos = bi->current();
720          tag = bi->getRuleStatus();
721          for (i=0; i<3; i++) {
722              switch (i) {
723              case 0:
724                  success = pos==0  && tag==UBRK_LINE_SOFT; break;
725              case 1:
726                  success = pos==5  && tag==UBRK_LINE_SOFT; break;
727              case 2:
728                  success = pos==12 && tag==UBRK_LINE_HARD; break;
729              default:
730                  success = false; break;
731              }
732              if (success == false) {
733                  errln("%s:%d: incorrect line break status or position.  i=%d, pos=%d, tag=%d",
734                      __FILE__, __LINE__, i, pos, tag);
735                  break;
736              }
737              pos = bi->next();
738              tag = bi->getRuleStatus();
739          }
740          if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
741              UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
742              (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
743              errln("%s:%d UBRK_LINE_* constants from header are inconsistent.", __FILE__, __LINE__);
744          }
745      }
746      delete bi;
747 
748 }
749 
750 
751 //
752 //  TestRuleStatusVec
753 //      Test the vector form of  break rule status.
754 //
TestRuleStatusVec()755 void RBBIAPITest::TestRuleStatusVec() {
756     UnicodeString rulesString(   "[A-N]{100}; \n"
757                                  "[a-w]{200}; \n"
758                                  "[\\p{L}]{300}; \n"
759                                  "[\\p{N}]{400}; \n"
760                                  "[0-5]{500}; \n"
761                                   "!.*;\n", -1, US_INV);
762      UnicodeString testString1  = "Aapz5?";
763      int32_t  statusVals[10];
764      int32_t  numStatuses;
765      int32_t  pos;
766 
767      UErrorCode status=U_ZERO_ERROR;
768      UParseError    parseError;
769 
770      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
771      if (U_FAILURE(status)) {
772          dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
773      } else {
774          bi->setText(testString1);
775 
776          // A
777          pos = bi->next();
778          TEST_ASSERT(pos==1);
779          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
780          TEST_ASSERT_SUCCESS(status);
781          TEST_ASSERT(numStatuses == 2);
782          TEST_ASSERT(statusVals[0] == 100);
783          TEST_ASSERT(statusVals[1] == 300);
784 
785          // a
786          pos = bi->next();
787          TEST_ASSERT(pos==2);
788          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
789          TEST_ASSERT_SUCCESS(status);
790          TEST_ASSERT(numStatuses == 2);
791          TEST_ASSERT(statusVals[0] == 200);
792          TEST_ASSERT(statusVals[1] == 300);
793 
794          // p
795          pos = bi->next();
796          TEST_ASSERT(pos==3);
797          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
798          TEST_ASSERT_SUCCESS(status);
799          TEST_ASSERT(numStatuses == 2);
800          TEST_ASSERT(statusVals[0] == 200);
801          TEST_ASSERT(statusVals[1] == 300);
802 
803          // z
804          pos = bi->next();
805          TEST_ASSERT(pos==4);
806          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
807          TEST_ASSERT_SUCCESS(status);
808          TEST_ASSERT(numStatuses == 1);
809          TEST_ASSERT(statusVals[0] == 300);
810 
811          // 5
812          pos = bi->next();
813          TEST_ASSERT(pos==5);
814          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
815          TEST_ASSERT_SUCCESS(status);
816          TEST_ASSERT(numStatuses == 2);
817          TEST_ASSERT(statusVals[0] == 400);
818          TEST_ASSERT(statusVals[1] == 500);
819 
820          // ?
821          pos = bi->next();
822          TEST_ASSERT(pos==6);
823          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
824          TEST_ASSERT_SUCCESS(status);
825          TEST_ASSERT(numStatuses == 1);
826          TEST_ASSERT(statusVals[0] == 0);
827 
828          //
829          //  Check buffer overflow error handling.   Char == A
830          //
831          bi->first();
832          pos = bi->next();
833          TEST_ASSERT(pos==1);
834          memset(statusVals, -1, sizeof(statusVals));
835          numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
836          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
837          TEST_ASSERT(numStatuses == 2);
838          TEST_ASSERT(statusVals[0] == -1);
839 
840          status = U_ZERO_ERROR;
841          memset(statusVals, -1, sizeof(statusVals));
842          numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
843          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
844          TEST_ASSERT(numStatuses == 2);
845          TEST_ASSERT(statusVals[0] == 100);
846          TEST_ASSERT(statusVals[1] == -1);
847 
848          status = U_ZERO_ERROR;
849          memset(statusVals, -1, sizeof(statusVals));
850          numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
851          TEST_ASSERT_SUCCESS(status);
852          TEST_ASSERT(numStatuses == 2);
853          TEST_ASSERT(statusVals[0] == 100);
854          TEST_ASSERT(statusVals[1] == 300);
855          TEST_ASSERT(statusVals[2] == -1);
856      }
857      delete bi;
858 
859 }
860 
861 //
862 //   Bug 2190 Regression test.   Builder crash on rule consisting of only a
863 //                               $variable reference
TestBug2190()864 void RBBIAPITest::TestBug2190() {
865      UnicodeString rulesString1 = "$aaa = abcd;\n"
866                                   "$bbb = $aaa;\n"
867                                   "$bbb;\n";
868      UnicodeString testString1  = "abcdabcd";
869                                 // 01234567890
870      int32_t bounds1[] = {0, 4, 8};
871      UErrorCode status=U_ZERO_ERROR;
872      UParseError    parseError;
873 
874      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
875      if(U_FAILURE(status)) {
876          dataerrln("Fail : in construction - %s", u_errorName(status));
877      } else {
878          bi->setText(testString1);
879          doBoundaryTest(*bi, testString1, bounds1);
880      }
881      delete bi;
882 }
883 
884 
TestRegistration()885 void RBBIAPITest::TestRegistration() {
886 #if !UCONFIG_NO_SERVICE
887     UErrorCode status = U_ZERO_ERROR;
888     BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
889     // ok to not delete these if we exit because of error?
890     BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
891     BreakIterator* root_word = BreakIterator::createWordInstance("", status);
892     BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
893 
894     if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
895         dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
896 
897         delete ja_word;
898         delete ja_char;
899         delete root_word;
900         delete root_char;
901 
902         return;
903     }
904 
905     URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
906     {
907 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
908         if (ja_word && *ja_word == *root_word) {
909             errln("japan not different from root");
910         }
911 #endif
912     }
913 
914     {
915         BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
916         UBool fail = true;
917         if(result){
918             fail = *result != *ja_word;
919         }
920         delete result;
921         if (fail) {
922             errln("bad result for xx_XX/word");
923         }
924     }
925 
926     {
927         BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
928         UBool fail = true;
929         if(result){
930             fail = *result != *ja_char;
931         }
932         delete result;
933         if (fail) {
934             errln("bad result for ja_JP/char");
935         }
936     }
937 
938     {
939         BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
940         UBool fail = true;
941         if(result){
942             fail = *result != *root_char;
943         }
944         delete result;
945         if (fail) {
946             errln("bad result for xx_XX/char");
947         }
948     }
949 
950     {
951         StringEnumeration* avail = BreakIterator::getAvailableLocales();
952         UBool found = false;
953         const UnicodeString* p;
954         while ((p = avail->snext(status))) {
955             if (p->compare("xx") == 0) {
956                 found = true;
957                 break;
958             }
959         }
960         delete avail;
961         if (!found) {
962             errln("did not find test locale");
963         }
964     }
965 
966     {
967         UBool unreg = BreakIterator::unregister(key, status);
968         if (!unreg) {
969             errln("unable to unregister");
970         }
971     }
972 
973     {
974         BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
975         BreakIterator* root = BreakIterator::createWordInstance("", status);
976         UBool fail = true;
977         if(root){
978           fail = *root != *result;
979         }
980         delete root;
981         delete result;
982         if (fail) {
983             errln("did not get root break");
984         }
985     }
986 
987     {
988         StringEnumeration* avail = BreakIterator::getAvailableLocales();
989         UBool found = false;
990         const UnicodeString* p;
991         while ((p = avail->snext(status))) {
992             if (p->compare("xx") == 0) {
993                 found = true;
994                 break;
995             }
996         }
997         delete avail;
998         if (found) {
999             errln("found test locale");
1000         }
1001     }
1002 
1003     {
1004         int32_t count;
1005         UBool   foundLocale = false;
1006         const Locale *avail = BreakIterator::getAvailableLocales(count);
1007         for (int i=0; i<count; i++) {
1008             if (avail[i] == Locale::getEnglish()) {
1009                 foundLocale = true;
1010                 break;
1011             }
1012         }
1013         if (foundLocale == false) {
1014             errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1015         }
1016     }
1017 
1018 
1019     // ja_word was adopted by factory
1020     delete ja_char;
1021     delete root_word;
1022     delete root_char;
1023 #endif
1024 }
1025 
RoundtripRule(const char * dataFile)1026 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1027     UErrorCode status = U_ZERO_ERROR;
1028     UParseError parseError;
1029     parseError.line = 0;
1030     parseError.offset = 0;
1031     LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1032     uint32_t length;
1033     const char *builtSource;
1034     const uint8_t *rbbiRules;
1035     const uint8_t *builtRules;
1036 
1037     if (U_FAILURE(status)) {
1038         errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status));
1039         return;
1040     }
1041 
1042     builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1043     builtSource = (const char *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1044     LocalPointer<RuleBasedBreakIterator> brkItr (new RuleBasedBreakIterator(builtSource, parseError, status));
1045     if (U_FAILURE(status)) {
1046         errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
1047                 __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset);
1048         errln(UnicodeString(builtSource));
1049         return;
1050     }
1051     rbbiRules = brkItr->getBinaryRules(length);
1052     logln("Comparing \"%s\" len=%d", dataFile, length);
1053     if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1054         errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile);
1055         return;
1056     }
1057 }
1058 
TestRoundtripRules()1059 void RBBIAPITest::TestRoundtripRules() {
1060     RoundtripRule("word");
1061     RoundtripRule("title");
1062     RoundtripRule("sent");
1063     RoundtripRule("line");
1064     RoundtripRule("char");
1065     if (!quick) {
1066         RoundtripRule("word_POSIX");
1067     }
1068 }
1069 
1070 
1071 // Check getBinaryRules() and construction of a break iterator from those rules.
1072 
TestGetBinaryRules()1073 void RBBIAPITest::TestGetBinaryRules() {
1074     UErrorCode status=U_ZERO_ERROR;
1075     LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
1076     if (U_FAILURE(status)) {
1077         dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status));
1078         return;
1079     }
1080     RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
1081     if (rbbi == NULL) {
1082         dataerrln("FAIL: RuleBasedBreakIterator is NULL");
1083         return;
1084     }
1085 
1086     // Check that the new line break iterator is nominally functional.
1087     UnicodeString helloWorld("Hello, World!");
1088     rbbi->setText(helloWorld);
1089     int n = 0;
1090     while (bi->next() != UBRK_DONE) {
1091         ++n;
1092     }
1093     TEST_ASSERT(n == 2);
1094 
1095     // Extract the binary rules as a uint8_t blob.
1096     uint32_t ruleLength;
1097     const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
1098     TEST_ASSERT(ruleLength > 0);
1099     TEST_ASSERT(binRules != NULL);
1100 
1101     // Clone the binary rules, and create a break iterator from that.
1102     // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
1103     uint8_t *clonedRules = new uint8_t[ruleLength];
1104     memcpy(clonedRules, binRules, ruleLength);
1105     RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
1106     TEST_ASSERT_SUCCESS(status);
1107 
1108     // Check that the cloned line break iterator is nominally alive.
1109     clonedBI.setText(helloWorld);
1110     n = 0;
1111     while (clonedBI.next() != UBRK_DONE) {
1112         ++n;
1113     }
1114     TEST_ASSERT(n == 2);
1115 
1116     delete[] clonedRules;
1117 }
1118 
1119 
TestRefreshInputText()1120 void RBBIAPITest::TestRefreshInputText() {
1121     /*
1122      *  RefreshInput changes out the input of a Break Iterator without
1123      *    changing anything else in the iterator's state.  Used with Java JNI,
1124      *    when Java moves the underlying string storage.   This test
1125      *    runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1126      *    The right set of boundaries should still be found.
1127      */
1128     UChar testStr[]  = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0};  /* = " A B C D"  */
1129     UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,  0};
1130     UErrorCode status = U_ZERO_ERROR;
1131     UText ut1 = UTEXT_INITIALIZER;
1132     UText ut2 = UTEXT_INITIALIZER;
1133     RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
1134     TEST_ASSERT_SUCCESS(status);
1135 
1136     utext_openUChars(&ut1, testStr, -1, &status);
1137     TEST_ASSERT_SUCCESS(status);
1138 
1139     if (U_SUCCESS(status)) {
1140         bi->setText(&ut1, status);
1141         TEST_ASSERT_SUCCESS(status);
1142 
1143         /* Line boundaries will occur before each letter in the original string */
1144         TEST_ASSERT(1 == bi->next());
1145         TEST_ASSERT(3 == bi->next());
1146 
1147         /* Move the string, kill the original string.  */
1148         u_strcpy(movedStr, testStr);
1149         u_memset(testStr, 0x20, u_strlen(testStr));
1150         utext_openUChars(&ut2, movedStr, -1, &status);
1151         TEST_ASSERT_SUCCESS(status);
1152         RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1153         TEST_ASSERT_SUCCESS(status);
1154         TEST_ASSERT(bi == returnedBI);
1155 
1156         /* Find the following matches, now working in the moved string. */
1157         TEST_ASSERT(5 == bi->next());
1158         TEST_ASSERT(7 == bi->next());
1159         TEST_ASSERT(8 == bi->next());
1160         TEST_ASSERT(UBRK_DONE == bi->next());
1161 
1162         utext_close(&ut1);
1163         utext_close(&ut2);
1164     }
1165     delete bi;
1166 
1167 }
1168 
1169 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
prtbrks(BreakIterator * brk,const UnicodeString & ustr,IntlTest & it)1170 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1171   static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
1172   it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1173 
1174   int32_t *pos = new int32_t[ustr.length()];
1175   int32_t posCount = 0;
1176 
1177   // calculate breaks up front, so we can print out
1178   // sans any debugging
1179   for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1180     pos[posCount++] = n;
1181     if(posCount>=ustr.length()) {
1182       it.errln("brk count exceeds string length!");
1183       return;
1184     }
1185   }
1186   UnicodeString out;
1187   out.append((UChar)CHSTR);
1188   int32_t prev = 0;
1189   for(int32_t i=0;i<posCount;i++) {
1190     int32_t n=pos[i];
1191     out.append(ustr.tempSubString(prev,n-prev));
1192     out.append((UChar)PILCROW);
1193     prev=n;
1194   }
1195   out.append(ustr.tempSubString(prev,ustr.length()-prev));
1196   out.append((UChar)CHEND);
1197   it.logln(out);
1198 
1199   out.remove();
1200   for(int32_t i=0;i<posCount;i++) {
1201     char tmp[100];
1202     sprintf(tmp,"%d ",pos[i]);
1203     out.append(UnicodeString(tmp));
1204   }
1205   it.logln(out);
1206   delete [] pos;
1207 }
1208 #endif
1209 
TestFilteredBreakIteratorBuilder()1210 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1211 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1212   UErrorCode status = U_ZERO_ERROR;
1213   LocalPointer<FilteredBreakIteratorBuilder> builder;
1214   LocalPointer<BreakIterator> baseBI;
1215   LocalPointer<BreakIterator> filteredBI;
1216   LocalPointer<BreakIterator> frenchBI;
1217 
1218   const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1219   const UnicodeString ABBR_MR("Mr.");
1220   const UnicodeString ABBR_CAPT("Capt.");
1221 
1222   {
1223     logln("Constructing empty builder\n");
1224     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1225     TEST_ASSERT_SUCCESS(status);
1226 
1227     logln("Constructing base BI\n");
1228     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1229     TEST_ASSERT_SUCCESS(status);
1230 
1231 	logln("Building new BI\n");
1232     filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1233     TEST_ASSERT_SUCCESS(status);
1234 
1235 	if (U_SUCCESS(status)) {
1236         logln("Testing:");
1237         filteredBI->setText(text);
1238         TEST_ASSERT(20 == filteredBI->next()); // Mr.
1239         TEST_ASSERT(84 == filteredBI->next()); // recovered.
1240         TEST_ASSERT(90 == filteredBI->next()); // Capt.
1241         TEST_ASSERT(181 == filteredBI->next()); // Mr.
1242         TEST_ASSERT(278 == filteredBI->next()); // charge.
1243         filteredBI->first();
1244         prtbrks(filteredBI.getAlias(), text, *this);
1245     }
1246   }
1247 
1248   {
1249     logln("Constructing empty builder\n");
1250     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1251     TEST_ASSERT_SUCCESS(status);
1252 
1253     if (U_SUCCESS(status)) {
1254         logln("Adding Mr. as an exception\n");
1255         TEST_ASSERT(true == builder->suppressBreakAfter(ABBR_MR, status));
1256         TEST_ASSERT(false == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
1257         TEST_ASSERT(true == builder->unsuppressBreakAfter(ABBR_MR, status));
1258         TEST_ASSERT(false == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1259         TEST_ASSERT(true == builder->suppressBreakAfter(ABBR_MR, status));
1260         TEST_ASSERT_SUCCESS(status);
1261 
1262         logln("Constructing base BI\n");
1263         baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1264         TEST_ASSERT_SUCCESS(status);
1265 
1266         logln("Building new BI\n");
1267         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1268         TEST_ASSERT_SUCCESS(status);
1269 
1270         logln("Testing:");
1271         filteredBI->setText(text);
1272         TEST_ASSERT(84 == filteredBI->next());
1273         TEST_ASSERT(90 == filteredBI->next());// Capt.
1274         TEST_ASSERT(278 == filteredBI->next());
1275         filteredBI->first();
1276         prtbrks(filteredBI.getAlias(), text, *this);
1277     }
1278   }
1279 
1280 
1281   {
1282     logln("Constructing empty builder\n");
1283     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1284     TEST_ASSERT_SUCCESS(status);
1285 
1286     if (U_SUCCESS(status)) {
1287         logln("Adding Mr. and Capt as an exception\n");
1288         TEST_ASSERT(true == builder->suppressBreakAfter(ABBR_MR, status));
1289         TEST_ASSERT(true == builder->suppressBreakAfter(ABBR_CAPT, status));
1290         TEST_ASSERT_SUCCESS(status);
1291 
1292         logln("Constructing base BI\n");
1293         baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1294         TEST_ASSERT_SUCCESS(status);
1295 
1296         logln("Building new BI\n");
1297         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1298         TEST_ASSERT_SUCCESS(status);
1299 
1300         logln("Testing:");
1301         filteredBI->setText(text);
1302         TEST_ASSERT(84 == filteredBI->next());
1303         TEST_ASSERT(278 == filteredBI->next());
1304         filteredBI->first();
1305         prtbrks(filteredBI.getAlias(), text, *this);
1306     }
1307   }
1308 
1309 
1310   {
1311     logln("Constructing English builder\n");
1312     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1313     TEST_ASSERT_SUCCESS(status);
1314 
1315     logln("Constructing base BI\n");
1316     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1317     TEST_ASSERT_SUCCESS(status);
1318 
1319     if (U_SUCCESS(status)) {
1320         logln("unsuppressing 'Capt'");
1321         TEST_ASSERT(true == builder->unsuppressBreakAfter(ABBR_CAPT, status));
1322 
1323         logln("Building new BI\n");
1324         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1325         TEST_ASSERT_SUCCESS(status);
1326 
1327         if(filteredBI.isValid()) {
1328           logln("Testing:");
1329           filteredBI->setText(text);
1330           TEST_ASSERT(84 == filteredBI->next());
1331           TEST_ASSERT(90 == filteredBI->next());
1332           TEST_ASSERT(278 == filteredBI->next());
1333           filteredBI->first();
1334           prtbrks(filteredBI.getAlias(), text, *this);
1335         }
1336     }
1337   }
1338 
1339 
1340   {
1341     logln("Constructing English builder\n");
1342     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1343     TEST_ASSERT_SUCCESS(status);
1344 
1345     logln("Constructing base BI\n");
1346     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1347     TEST_ASSERT_SUCCESS(status);
1348 
1349     if (U_SUCCESS(status)) {
1350         logln("Building new BI\n");
1351         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1352         TEST_ASSERT_SUCCESS(status);
1353 
1354         if(filteredBI.isValid()) {
1355           logln("Testing:");
1356           filteredBI->setText(text);
1357           TEST_ASSERT(84 == filteredBI->next());
1358           TEST_ASSERT(278 == filteredBI->next());
1359           filteredBI->first();
1360           prtbrks(filteredBI.getAlias(), text, *this);
1361         }
1362     }
1363   }
1364 
1365   // reenable once french is in
1366   {
1367     logln("Constructing French builder");
1368     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
1369     TEST_ASSERT_SUCCESS(status);
1370 
1371     logln("Constructing base BI\n");
1372     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
1373     TEST_ASSERT_SUCCESS(status);
1374 
1375     if (U_SUCCESS(status)) {
1376         logln("Building new BI\n");
1377         frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
1378         TEST_ASSERT_SUCCESS(status);
1379     }
1380 
1381     if(frenchBI.isValid()) {
1382       logln("Testing:");
1383       UnicodeString frText("C'est MM. Duval.");
1384       frenchBI->setText(frText);
1385       TEST_ASSERT(16 == frenchBI->next());
1386       TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
1387       frenchBI->first();
1388       prtbrks(frenchBI.getAlias(), frText, *this);
1389       logln("Testing against English:");
1390       filteredBI->setText(frText);
1391       TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
1392       TEST_ASSERT(16 == filteredBI->next());
1393       TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
1394       filteredBI->first();
1395       prtbrks(filteredBI.getAlias(), frText, *this);
1396 
1397       // Verify ==
1398       assertTrue(WHERE, *frenchBI   == *frenchBI);
1399       assertTrue(WHERE, *filteredBI != *frenchBI);
1400       assertTrue(WHERE, *frenchBI   != *filteredBI);
1401     } else {
1402       dataerrln("French BI: not valid.");
1403 	}
1404   }
1405 
1406 #else
1407   logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1408 #endif
1409 }
1410 
1411 //---------------------------------------------
1412 // runIndexedTest
1413 //---------------------------------------------
1414 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1415 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1416 {
1417     if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1418     TESTCASE_AUTO_BEGIN;
1419 #if !UCONFIG_NO_FILE_IO
1420     TESTCASE_AUTO(TestCloneEquals);
1421     TESTCASE_AUTO(TestgetRules);
1422     TESTCASE_AUTO(TestHashCode);
1423     TESTCASE_AUTO(TestGetSetAdoptText);
1424     TESTCASE_AUTO(TestIteration);
1425 #endif
1426     TESTCASE_AUTO(TestBuilder);
1427     TESTCASE_AUTO(TestQuoteGrouping);
1428     TESTCASE_AUTO(TestRuleStatusVec);
1429     TESTCASE_AUTO(TestBug2190);
1430 #if !UCONFIG_NO_FILE_IO
1431     TESTCASE_AUTO(TestRegistration);
1432     TESTCASE_AUTO(TestBoilerPlate);
1433     TESTCASE_AUTO(TestRuleStatus);
1434     TESTCASE_AUTO(TestRoundtripRules);
1435     TESTCASE_AUTO(TestGetBinaryRules);
1436 #endif
1437     TESTCASE_AUTO(TestRefreshInputText);
1438 #if !UCONFIG_NO_BREAK_ITERATION
1439     TESTCASE_AUTO(TestFilteredBreakIteratorBuilder);
1440 #endif
1441     TESTCASE_AUTO_END;
1442 }
1443 
1444 
1445 //---------------------------------------------
1446 //Internal subroutines
1447 //---------------------------------------------
1448 
doBoundaryTest(BreakIterator & bi,UnicodeString & text,int32_t * boundaries)1449 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1450      logln((UnicodeString)"testIsBoundary():");
1451         int32_t p = 0;
1452         UBool isB;
1453         for (int32_t i = 0; i < text.length(); i++) {
1454             isB = bi.isBoundary(i);
1455             logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1456 
1457             if (i == boundaries[p]) {
1458                 if (!isB)
1459                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1460                 p++;
1461             }
1462             else {
1463                 if (isB)
1464                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1465             }
1466         }
1467 }
doTest(UnicodeString & testString,int32_t start,int32_t gotoffset,int32_t expectedOffset,const char * expectedString)1468 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1469     UnicodeString selected;
1470     UnicodeString expected=CharsToUnicodeString(expectedString);
1471 
1472     if(gotoffset != expectedOffset)
1473          errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1474     if(start <= gotoffset){
1475         testString.extractBetween(start, gotoffset, selected);
1476     }
1477     else{
1478         testString.extractBetween(gotoffset, start, selected);
1479     }
1480     if(selected.compare(expected) != 0)
1481          errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1482     else
1483         logln(prettify("****selected \"" + selected + "\""));
1484 }
1485 
1486 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1487