• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * Copyright (c) 1999-2007, International Business Machines
3  * Corporation and others. All Rights Reserved.
4  ********************************************************************
5  *   Date        Name        Description
6  *   12/14/99    Madhu        Creation.
7  *   01/12/2000  Madhu        updated for changed API
8  ********************************************************************/
9 
10 #include "unicode/utypes.h"
11 
12 #if !UCONFIG_NO_BREAK_ITERATION
13 
14 #include "unicode/uchar.h"
15 #include "intltest.h"
16 #include "unicode/rbbi.h"
17 #include "unicode/schriter.h"
18 #include "rbbiapts.h"
19 #include "rbbidata.h"
20 #include "cstring.h"
21 #include "ubrkimpl.h"
22 #include "unicode/ustring.h"
23 #include "unicode/utext.h"
24 
25 /**
26  * API Test the RuleBasedBreakIterator class
27  */
28 
29 
30 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
31 errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
32 
33 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
34 errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}}
35 
TestCloneEquals()36 void RBBIAPITest::TestCloneEquals()
37 {
38 
39     UErrorCode status=U_ZERO_ERROR;
40     RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
41     RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
42     RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
43     RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
44     if(U_FAILURE(status)){
45         errln((UnicodeString)"FAIL : in construction");
46         return;
47     }
48 
49 
50     UnicodeString testString="Testing word break iterators's clone() and equals()";
51     bi1->setText(testString);
52     bi2->setText(testString);
53     biequal->setText(testString);
54 
55     bi3->setText("hello");
56 
57     logln((UnicodeString)"Testing equals()");
58 
59     logln((UnicodeString)"Testing == and !=");
60     UBool b = (*bi1 != *biequal);
61     b |= *bi1 == *bi2;
62     b |= *bi1 == *bi3;
63     if (b) {
64         errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
65     }
66 
67     if(*bi2 == *biequal || *bi2 == *bi1  || *biequal == *bi3)
68         errln((UnicodeString)"ERROR:2 RBBI's == and != operator  failed.");
69 
70 
71     // Quick test of RulesBasedBreakIterator assignment -
72     // Check that
73     //    two different iterators are !=
74     //    they are == after assignment
75     //    source and dest iterator produce the same next() after assignment.
76     //    deleting one doesn't disable the other.
77     logln("Testing assignment");
78     RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
79     if(U_FAILURE(status)){
80         errln((UnicodeString)"FAIL : in construction");
81         return;
82     }
83 
84     RuleBasedBreakIterator biDefault, biDefault2;
85     if(U_FAILURE(status)){
86         errln((UnicodeString)"FAIL : in construction of default iterator");
87         return;
88     }
89     if (biDefault == *bix) {
90         errln((UnicodeString)"ERROR: iterators should not compare ==");
91         return;
92     }
93     if (biDefault != biDefault2) {
94         errln((UnicodeString)"ERROR: iterators should compare ==");
95         return;
96     }
97 
98 
99     UnicodeString   HelloString("Hello Kitty");
100     bix->setText(HelloString);
101     if (*bix == *bi2) {
102         errln(UnicodeString("ERROR: strings should not be equal before assignment."));
103     }
104     *bix = *bi2;
105     if (*bix != *bi2) {
106         errln(UnicodeString("ERROR: strings should be equal before assignment."));
107     }
108 
109     int bixnext = bix->next();
110     int bi2next = bi2->next();
111     if (! (bixnext == bi2next && bixnext == 7)) {
112         errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
113     }
114     delete bix;
115     if (bi2->next() != 8) {
116         errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
117     }
118 
119 
120 
121     logln((UnicodeString)"Testing clone()");
122     RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
123     RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
124 
125     if(*bi1clone != *bi1 || *bi1clone  != *biequal  ||
126       *bi1clone == *bi3 || *bi1clone == *bi2)
127         errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
128 
129     if(*bi2clone == *bi1 || *bi2clone == *biequal ||
130        *bi2clone == *bi3 || *bi2clone != *bi2)
131         errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
132 
133     if(bi1->getText() != bi1clone->getText()   ||
134        bi2clone->getText() != bi2->getText()   ||
135        *bi2clone == *bi1clone )
136         errln((UnicodeString)"ERROR: RBBI's clone() method failed");
137 
138     delete bi1clone;
139     delete bi2clone;
140     delete bi1;
141     delete bi3;
142     delete bi2;
143     delete biequal;
144 }
145 
TestBoilerPlate()146 void RBBIAPITest::TestBoilerPlate()
147 {
148     UErrorCode status = U_ZERO_ERROR;
149     BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
150     BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
151     if (U_FAILURE(status)) {
152         errln("Creation of break iterator failed %s", u_errorName(status));
153         return;
154     }
155     if(*a!=*b){
156         errln("Failed: boilerplate method operator!= does not return correct results");
157     }
158     BreakIterator* c = BreakIterator::createWordInstance(Locale("ja"),status);
159     if(a && c){
160         if(*c==*a){
161             errln("Failed: boilerplate method opertator== does not return correct results");
162         }
163     }else{
164         errln("creation of break iterator failed");
165     }
166     delete a;
167     delete b;
168     delete c;
169 }
170 
TestgetRules()171 void RBBIAPITest::TestgetRules()
172 {
173     UErrorCode status=U_ZERO_ERROR;
174 
175     RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
176     RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
177     if(U_FAILURE(status)){
178         errln((UnicodeString)"FAIL: in construction");
179         delete bi1;
180         delete bi2;
181         return;
182     }
183 
184 
185 
186     logln((UnicodeString)"Testing toString()");
187 
188     bi1->setText((UnicodeString)"Hello there");
189 
190     RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
191 
192     UnicodeString temp=bi1->getRules();
193     UnicodeString temp2=bi2->getRules();
194     UnicodeString temp3=bi3->getRules();
195     if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
196         errln((UnicodeString)"ERROR: error in getRules() method");
197 
198     delete bi1;
199     delete bi2;
200     delete bi3;
201 }
TestHashCode()202 void RBBIAPITest::TestHashCode()
203 {
204     UErrorCode status=U_ZERO_ERROR;
205     RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
206     RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
207     RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
208     if(U_FAILURE(status)){
209         errln((UnicodeString)"FAIL : in construction");
210         delete bi1;
211         delete bi2;
212         delete bi3;
213         return;
214     }
215 
216 
217     logln((UnicodeString)"Testing hashCode()");
218 
219     bi1->setText((UnicodeString)"Hash code");
220     bi2->setText((UnicodeString)"Hash code");
221     bi3->setText((UnicodeString)"Hash code");
222 
223     RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
224     RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
225 
226     if(bi1->hashCode() != bi1clone->hashCode() ||  bi1->hashCode() != bi3->hashCode() ||
227         bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
228         errln((UnicodeString)"ERROR: identical objects have different hashcodes");
229 
230     if(bi1->hashCode() == bi2->hashCode() ||  bi2->hashCode() == bi3->hashCode() ||
231         bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
232         errln((UnicodeString)"ERROR: different objects have same hashcodes");
233 
234     delete bi1clone;
235     delete bi2clone;
236     delete bi1;
237     delete bi2;
238     delete bi3;
239 
240 }
TestGetSetAdoptText()241 void RBBIAPITest::TestGetSetAdoptText()
242 {
243     logln((UnicodeString)"Testing getText setText ");
244     UErrorCode status=U_ZERO_ERROR;
245     UnicodeString str1="first string.";
246     UnicodeString str2="Second string.";
247     RuleBasedBreakIterator* charIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
248     RuleBasedBreakIterator* wordIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
249     if(U_FAILURE(status)){
250         errln((UnicodeString)"FAIL : in construction");
251             return;
252     }
253 
254 
255     CharacterIterator* text1= new StringCharacterIterator(str1);
256     CharacterIterator* text1Clone = text1->clone();
257     CharacterIterator* text2= new StringCharacterIterator(str2);
258     CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"
259 
260     wordIter1->setText(str1);
261     CharacterIterator *tci = &wordIter1->getText();
262     UnicodeString      tstr;
263     tci->getText(tstr);
264     TEST_ASSERT(tstr == str1);
265     if(wordIter1->current() != 0)
266         errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
267 
268     wordIter1->next(2);
269 
270     wordIter1->setText(str2);
271     if(wordIter1->current() != 0)
272         errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
273 
274 
275     charIter1->adoptText(text1Clone);
276     TEST_ASSERT(wordIter1->getText() != charIter1->getText());
277     tci = &wordIter1->getText();
278     tci->getText(tstr);
279     TEST_ASSERT(tstr == str2);
280     tci = &charIter1->getText();
281     tci->getText(tstr);
282     TEST_ASSERT(tstr == str1);
283 
284 
285     RuleBasedBreakIterator* rb=(RuleBasedBreakIterator*)wordIter1->clone();
286     rb->adoptText(text1);
287     if(rb->getText() != *text1)
288         errln((UnicodeString)"ERROR:1 error in adoptText ");
289     rb->adoptText(text2);
290     if(rb->getText() != *text2)
291         errln((UnicodeString)"ERROR:2 error in adoptText ");
292 
293     // Adopt where iterator range is less than the entire orignal source string.
294     //   (With the change of the break engine to working with UText internally,
295     //    CharacterIterators starting at positions other than zero are not supported)
296     rb->adoptText(text3);
297     TEST_ASSERT(rb->preceding(2) == 0);
298     TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
299     //if(rb->preceding(2) != 3) {
300     //    errln((UnicodeString)"ERROR:3 error in adoptText ");
301     //}
302     //if(rb->following(11) != BreakIterator::DONE) {
303     //    errln((UnicodeString)"ERROR:4 error in adoptText ");
304     //}
305 
306     // UText API
307     //
308     //   Quick test to see if UText is working at all.
309     //
310     const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
311     const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
312     //                012345678901
313 
314     status = U_ZERO_ERROR;
315     UText *ut = utext_openUTF8(NULL, s1, -1, &status);
316     wordIter1->setText(ut, status);
317     TEST_ASSERT_SUCCESS(status);
318 
319     int32_t pos;
320     pos = wordIter1->first();
321     TEST_ASSERT(pos==0);
322     pos = wordIter1->next();
323     TEST_ASSERT(pos==5);
324     pos = wordIter1->next();
325     TEST_ASSERT(pos==6);
326     pos = wordIter1->next();
327     TEST_ASSERT(pos==11);
328     pos = wordIter1->next();
329     TEST_ASSERT(pos==UBRK_DONE);
330 
331     status = U_ZERO_ERROR;
332     UText *ut2 = utext_openUTF8(NULL, s2, -1, &status);
333     TEST_ASSERT_SUCCESS(status);
334     wordIter1->setText(ut2, status);
335     TEST_ASSERT_SUCCESS(status);
336 
337     pos = wordIter1->first();
338     TEST_ASSERT(pos==0);
339     pos = wordIter1->next();
340     TEST_ASSERT(pos==3);
341     pos = wordIter1->next();
342     TEST_ASSERT(pos==4);
343 
344     pos = wordIter1->last();
345     TEST_ASSERT(pos==6);
346     pos = wordIter1->previous();
347     TEST_ASSERT(pos==4);
348     pos = wordIter1->previous();
349     TEST_ASSERT(pos==3);
350     pos = wordIter1->previous();
351     TEST_ASSERT(pos==0);
352     pos = wordIter1->previous();
353     TEST_ASSERT(pos==UBRK_DONE);
354 
355     status = U_ZERO_ERROR;
356     UnicodeString sEmpty;
357     UText *gut2 = utext_openUnicodeString(NULL, &sEmpty, &status);
358     wordIter1->getUText(gut2, status);
359     TEST_ASSERT_SUCCESS(status);
360     utext_close(gut2);
361 
362     utext_close(ut);
363     utext_close(ut2);
364 
365     delete wordIter1;
366     delete charIter1;
367     delete rb;
368 
369  }
370 
371 
TestIteration()372 void RBBIAPITest::TestIteration()
373 {
374     // This test just verifies that the API is present.
375     // Testing for correct operation of the break rules happens elsewhere.
376 
377     UErrorCode status=U_ZERO_ERROR;
378     RuleBasedBreakIterator* bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
379     if (U_FAILURE(status) || bi == NULL)  {
380         errln("Failure creating character break iterator.  Status = %s", u_errorName(status));
381     }
382     delete bi;
383 
384     status=U_ZERO_ERROR;
385     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
386     if (U_FAILURE(status) || bi == NULL)  {
387         errln("Failure creating Word break iterator.  Status = %s", u_errorName(status));
388     }
389     delete bi;
390 
391     status=U_ZERO_ERROR;
392     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
393     if (U_FAILURE(status) || bi == NULL)  {
394         errln("Failure creating Line break iterator.  Status = %s", u_errorName(status));
395     }
396     delete bi;
397 
398     status=U_ZERO_ERROR;
399     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
400     if (U_FAILURE(status) || bi == NULL)  {
401         errln("Failure creating Sentence break iterator.  Status = %s", u_errorName(status));
402     }
403     delete bi;
404 
405     status=U_ZERO_ERROR;
406     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
407     if (U_FAILURE(status) || bi == NULL)  {
408         errln("Failure creating Title break iterator.  Status = %s", u_errorName(status));
409     }
410     delete bi;
411 
412     status=U_ZERO_ERROR;
413     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
414     if (U_FAILURE(status) || bi == NULL)  {
415         errln("Failure creating character break iterator.  Status = %s", u_errorName(status));
416         return;   // Skip the rest of these tests.
417     }
418 
419 
420     UnicodeString testString="0123456789";
421     bi->setText(testString);
422 
423     int32_t i;
424     i = bi->first();
425     if (i != 0) {
426         errln("Incorrect value from bi->first().  Expected 0, got %d.", i);
427     }
428 
429     i = bi->last();
430     if (i != 10) {
431         errln("Incorrect value from bi->last().  Expected 10, got %d", i);
432     }
433 
434     //
435     // Previous
436     //
437     bi->last();
438     i = bi->previous();
439     if (i != 9) {
440         errln("Incorrect value from bi->last() at line %d.  Expected 9, got %d", __LINE__, i);
441     }
442 
443 
444     bi->first();
445     i = bi->previous();
446     if (i != BreakIterator::DONE) {
447         errln("Incorrect value from bi->previous() at line %d.  Expected DONE, got %d", __LINE__, i);
448     }
449 
450     //
451     // next()
452     //
453     bi->first();
454     i = bi->next();
455     if (i != 1) {
456         errln("Incorrect value from bi->next() at line %d.  Expected 1, got %d", __LINE__, i);
457     }
458 
459     bi->last();
460     i = bi->next();
461     if (i != BreakIterator::DONE) {
462         errln("Incorrect value from bi->next() at line %d.  Expected DONE, got %d", __LINE__, i);
463     }
464 
465 
466     //
467     //  current()
468     //
469     bi->first();
470     i = bi->current();
471     if (i != 0) {
472         errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
473     }
474 
475     bi->next();
476     i = bi->current();
477     if (i != 1) {
478         errln("Incorrect value from bi->previous() at line %d.  Expected 1, got %d", __LINE__, i);
479     }
480 
481     bi->last();
482     bi->next();
483     i = bi->current();
484     if (i != 10) {
485         errln("Incorrect value from bi->previous() at line %d.  Expected 10, got %d", __LINE__, i);
486     }
487 
488     bi->first();
489     bi->previous();
490     i = bi->current();
491     if (i != 0) {
492         errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
493     }
494 
495 
496     //
497     // Following()
498     //
499     i = bi->following(4);
500     if (i != 5) {
501         errln("Incorrect value from bi->following() at line %d.  Expected 5, got %d", __LINE__, i);
502     }
503 
504     i = bi->following(9);
505     if (i != 10) {
506         errln("Incorrect value from bi->following() at line %d.  Expected 10, got %d", __LINE__, i);
507     }
508 
509     i = bi->following(10);
510     if (i != BreakIterator::DONE) {
511         errln("Incorrect value from bi->following() at line %d.  Expected DONE, got %d", __LINE__, i);
512     }
513 
514 
515     //
516     // Preceding
517     //
518     i = bi->preceding(4);
519     if (i != 3) {
520         errln("Incorrect value from bi->preceding() at line %d.  Expected 3, got %d", __LINE__, i);
521     }
522 
523     i = bi->preceding(10);
524     if (i != 9) {
525         errln("Incorrect value from bi->preceding() at line %d.  Expected 9, got %d", __LINE__, i);
526     }
527 
528     i = bi->preceding(1);
529     if (i != 0) {
530         errln("Incorrect value from bi->preceding() at line %d.  Expected 0, got %d", __LINE__, i);
531     }
532 
533     i = bi->preceding(0);
534     if (i != BreakIterator::DONE) {
535         errln("Incorrect value from bi->preceding() at line %d.  Expected DONE, got %d", __LINE__, i);
536     }
537 
538 
539     //
540     // isBoundary()
541     //
542     bi->first();
543     if (bi->isBoundary(3) != TRUE) {
544         errln("Incorrect value from bi->isBoudary() at line %d.  Expected TRUE, got FALSE", __LINE__, i);
545     }
546     i = bi->current();
547     if (i != 3) {
548         errln("Incorrect value from bi->current() at line %d.  Expected 3, got %d", __LINE__, i);
549     }
550 
551 
552     if (bi->isBoundary(11) != FALSE) {
553         errln("Incorrect value from bi->isBoudary() at line %d.  Expected FALSE, got TRUE", __LINE__, i);
554     }
555     i = bi->current();
556     if (i != 10) {
557         errln("Incorrect value from bi->current() at line %d.  Expected 10, got %d", __LINE__, i);
558     }
559 
560     //
561     // next(n)
562     //
563     bi->first();
564     i = bi->next(4);
565     if (i != 4) {
566         errln("Incorrect value from bi->next() at line %d.  Expected 4, got %d", __LINE__, i);
567     }
568 
569     i = bi->next(6);
570     if (i != 10) {
571         errln("Incorrect value from bi->next() at line %d.  Expected 10, got %d", __LINE__, i);
572     }
573 
574     bi->first();
575     i = bi->next(11);
576     if (i != BreakIterator::DONE) {
577         errln("Incorrect value from bi->next() at line %d.  Expected BreakIterator::DONE, got %d", __LINE__, i);
578     }
579 
580     delete bi;
581 
582 }
583 
584 
585 
586 
587 
588 
TestBuilder()589 void RBBIAPITest::TestBuilder() {
590      UnicodeString rulesString1 = "$Letters = [:L:];\n"
591                                   "$Numbers = [:N:];\n"
592                                   "$Letters+;\n"
593                                   "$Numbers+;\n"
594                                   "[^$Letters $Numbers];\n"
595                                   "!.*;\n";
596      UnicodeString testString1  = "abc123..abc";
597                                 // 01234567890
598      int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
599      UErrorCode status=U_ZERO_ERROR;
600      UParseError    parseError;
601 
602      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
603      if(U_FAILURE(status)) {
604          errln("FAIL : in construction");
605      } else {
606          bi->setText(testString1);
607          doBoundaryTest(*bi, testString1, bounds1);
608      }
609      delete bi;
610 }
611 
612 
613 //
614 //  TestQuoteGrouping
615 //       Single quotes within rules imply a grouping, so that a modifier
616 //       following the quoted text (* or +) applies to all of the quoted chars.
617 //
TestQuoteGrouping()618 void RBBIAPITest::TestQuoteGrouping() {
619      UnicodeString rulesString1 = "#Here comes the rule...\n"
620                                   "'$@!'*;\n"   //  (\$\@\!)*
621                                   ".;\n";
622 
623      UnicodeString testString1  = "$@!$@!X$@!!X";
624                                 // 0123456789012
625      int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
626      UErrorCode status=U_ZERO_ERROR;
627      UParseError    parseError;
628 
629      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
630      if(U_FAILURE(status)) {
631          errln("FAIL : in construction");
632      } else {
633          bi->setText(testString1);
634          doBoundaryTest(*bi, testString1, bounds1);
635      }
636      delete bi;
637 }
638 
639 //
640 //  TestRuleStatus
641 //      Test word break rule status constants.
642 //
TestRuleStatus()643 void RBBIAPITest::TestRuleStatus() {
644      UChar str[30];
645      u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094",
646               // 012345678901234567  8      9    0  1      2    3  4      5    6
647               //                    Ideographic    Katakana       Hiragana
648                 str, 30);
649      UnicodeString testString1(str);
650      int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26};
651      int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
652                           UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
653                           UBRK_WORD_IDEO,     UBRK_WORD_IDEO,   UBRK_WORD_NONE,
654                           UBRK_WORD_KANA,     UBRK_WORD_NONE,   UBRK_WORD_KANA,    UBRK_WORD_KANA};
655 
656      int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
657                           UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
658                           UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT,   UBRK_WORD_NONE_LIMIT,
659                           UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT,   UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT};
660 
661      UErrorCode status=U_ZERO_ERROR;
662 
663      RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
664      if(U_FAILURE(status)) {
665          errln("FAIL : in construction");
666      } else {
667          bi->setText(testString1);
668          // First test that the breaks are in the right spots.
669          doBoundaryTest(*bi, testString1, bounds1);
670 
671          // Then go back and check tag values
672          int32_t i = 0;
673          int32_t pos, tag;
674          for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
675              if (pos != bounds1[i]) {
676                  errln("FAIL: unexpected word break at postion %d", pos);
677                  break;
678              }
679              tag = bi->getRuleStatus();
680              if (tag < tag_lo[i] || tag >= tag_hi[i]) {
681                  errln("FAIL: incorrect tag value %d at position %d", tag, pos);
682                  break;
683              }
684 
685              // Check that we get the same tag values from getRuleStatusVec()
686              int32_t vec[10];
687              int t = bi->getRuleStatusVec(vec, 10, status);
688              TEST_ASSERT_SUCCESS(status);
689              TEST_ASSERT(t==1);
690              TEST_ASSERT(vec[0] == tag);
691          }
692      }
693      delete bi;
694 
695      // Now test line break status.  This test mostly is to confirm that the status constants
696      //                              are correctly declared in the header.
697      testString1 =   "test line. \n";
698      // break type    s    s     h
699 
700      bi = (RuleBasedBreakIterator *)
701          BreakIterator::createLineInstance(Locale::getEnglish(), status);
702      if(U_FAILURE(status)) {
703          errln("failed to create word break iterator.");
704      } else {
705          int32_t i = 0;
706          int32_t pos, tag;
707          UBool   success;
708 
709          bi->setText(testString1);
710          pos = bi->current();
711          tag = bi->getRuleStatus();
712          for (i=0; i<3; i++) {
713              switch (i) {
714              case 0:
715                  success = pos==0  && tag==UBRK_LINE_SOFT; break;
716              case 1:
717                  success = pos==5  && tag==UBRK_LINE_SOFT; break;
718              case 2:
719                  success = pos==12 && tag==UBRK_LINE_HARD; break;
720              default:
721                  success = FALSE; break;
722              }
723              if (success == FALSE) {
724                  errln("Fail: incorrect word break status or position.  i=%d, pos=%d, tag=%d",
725                      i, pos, tag);
726                  break;
727              }
728              pos = bi->next();
729              tag = bi->getRuleStatus();
730          }
731          if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
732              UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
733              UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT ) {
734              errln("UBRK_LINE_* constants from header are inconsistent.");
735          }
736      }
737      delete bi;
738 
739 }
740 
741 
742 //
743 //  TestRuleStatusVec
744 //      Test the vector form of  break rule status.
745 //
TestRuleStatusVec()746 void RBBIAPITest::TestRuleStatusVec() {
747     UnicodeString rulesString  = "[A-N]{100}; \n"
748                                  "[a-w]{200}; \n"
749                                  "[\\p{L}]{300}; \n"
750                                  "[\\p{N}]{400}; \n"
751                                  "[0-5]{500}; \n"
752                                   "!.*;\n";
753      UnicodeString testString1  = "Aapz5?";
754      int32_t  statusVals[10];
755      int32_t  numStatuses;
756      int32_t  pos;
757 
758      UErrorCode status=U_ZERO_ERROR;
759      UParseError    parseError;
760 
761      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
762      TEST_ASSERT_SUCCESS(status);
763      if (U_SUCCESS(status)) {
764          bi->setText(testString1);
765 
766          // A
767          pos = bi->next();
768          TEST_ASSERT(pos==1);
769          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
770          TEST_ASSERT_SUCCESS(status);
771          TEST_ASSERT(numStatuses == 2);
772          TEST_ASSERT(statusVals[0] == 100);
773          TEST_ASSERT(statusVals[1] == 300);
774 
775          // a
776          pos = bi->next();
777          TEST_ASSERT(pos==2);
778          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
779          TEST_ASSERT_SUCCESS(status);
780          TEST_ASSERT(numStatuses == 2);
781          TEST_ASSERT(statusVals[0] == 200);
782          TEST_ASSERT(statusVals[1] == 300);
783 
784          // p
785          pos = bi->next();
786          TEST_ASSERT(pos==3);
787          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
788          TEST_ASSERT_SUCCESS(status);
789          TEST_ASSERT(numStatuses == 2);
790          TEST_ASSERT(statusVals[0] == 200);
791          TEST_ASSERT(statusVals[1] == 300);
792 
793          // z
794          pos = bi->next();
795          TEST_ASSERT(pos==4);
796          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
797          TEST_ASSERT_SUCCESS(status);
798          TEST_ASSERT(numStatuses == 1);
799          TEST_ASSERT(statusVals[0] == 300);
800 
801          // 5
802          pos = bi->next();
803          TEST_ASSERT(pos==5);
804          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
805          TEST_ASSERT_SUCCESS(status);
806          TEST_ASSERT(numStatuses == 2);
807          TEST_ASSERT(statusVals[0] == 400);
808          TEST_ASSERT(statusVals[1] == 500);
809 
810          // ?
811          pos = bi->next();
812          TEST_ASSERT(pos==6);
813          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
814          TEST_ASSERT_SUCCESS(status);
815          TEST_ASSERT(numStatuses == 1);
816          TEST_ASSERT(statusVals[0] == 0);
817 
818          //
819          //  Check buffer overflow error handling.   Char == A
820          //
821          bi->first();
822          pos = bi->next();
823          TEST_ASSERT(pos==1);
824          memset(statusVals, -1, sizeof(statusVals));
825          numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
826          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
827          TEST_ASSERT(numStatuses == 2);
828          TEST_ASSERT(statusVals[0] == -1);
829 
830          status = U_ZERO_ERROR;
831          memset(statusVals, -1, sizeof(statusVals));
832          numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
833          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
834          TEST_ASSERT(numStatuses == 2);
835          TEST_ASSERT(statusVals[0] == 100);
836          TEST_ASSERT(statusVals[1] == -1);
837 
838          status = U_ZERO_ERROR;
839          memset(statusVals, -1, sizeof(statusVals));
840          numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
841          TEST_ASSERT_SUCCESS(status);
842          TEST_ASSERT(numStatuses == 2);
843          TEST_ASSERT(statusVals[0] == 100);
844          TEST_ASSERT(statusVals[1] == 300);
845          TEST_ASSERT(statusVals[2] == -1);
846      }
847      delete bi;
848 
849 }
850 
851 //
852 //   Bug 2190 Regression test.   Builder crash on rule consisting of only a
853 //                               $variable reference
TestBug2190()854 void RBBIAPITest::TestBug2190() {
855      UnicodeString rulesString1 = "$aaa = abcd;\n"
856                                   "$bbb = $aaa;\n"
857                                   "$bbb;\n";
858      UnicodeString testString1  = "abcdabcd";
859                                 // 01234567890
860      int32_t bounds1[] = {0, 4, 8};
861      UErrorCode status=U_ZERO_ERROR;
862      UParseError    parseError;
863 
864      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
865      if(U_FAILURE(status)) {
866          errln("FAIL : in construction");
867      } else {
868          bi->setText(testString1);
869          doBoundaryTest(*bi, testString1, bounds1);
870      }
871      delete bi;
872 }
873 
874 
TestRegistration()875 void RBBIAPITest::TestRegistration() {
876 #if !UCONFIG_NO_SERVICE
877     UErrorCode status = U_ZERO_ERROR;
878     BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
879 
880     // ok to not delete these if we exit because of error?
881     BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
882     BreakIterator* root_word = BreakIterator::createWordInstance("", status);
883     BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
884 
885     URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
886     {
887         if (ja_word && *ja_word == *root_word) {
888             errln("japan not different from root");
889         }
890     }
891 
892     {
893         BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
894         UBool fail = TRUE;
895         if(result){
896             fail = *result != *ja_word;
897         }
898         delete result;
899         if (fail) {
900             errln("bad result for xx_XX/word");
901         }
902     }
903 
904     {
905         BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
906         UBool fail = TRUE;
907         if(result){
908             fail = *result != *ja_char;
909         }
910         delete result;
911         if (fail) {
912             errln("bad result for ja_JP/char");
913         }
914     }
915 
916     {
917         BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
918         UBool fail = TRUE;
919         if(result){
920             fail = *result != *root_char;
921         }
922         delete result;
923         if (fail) {
924             errln("bad result for xx_XX/char");
925         }
926     }
927 
928     {
929         StringEnumeration* avail = BreakIterator::getAvailableLocales();
930         UBool found = FALSE;
931         const UnicodeString* p;
932         while ((p = avail->snext(status))) {
933             if (p->compare("xx") == 0) {
934                 found = TRUE;
935                 break;
936             }
937         }
938         delete avail;
939         if (!found) {
940             errln("did not find test locale");
941         }
942     }
943 
944     {
945         UBool unreg = BreakIterator::unregister(key, status);
946         if (!unreg) {
947             errln("unable to unregister");
948         }
949     }
950 
951     {
952         BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
953         BreakIterator* root = BreakIterator::createWordInstance("", status);
954         UBool fail = TRUE;
955         if(root){
956           fail = *root != *result;
957         }
958         delete root;
959         delete result;
960         if (fail) {
961             errln("did not get root break");
962         }
963     }
964 
965     {
966         StringEnumeration* avail = BreakIterator::getAvailableLocales();
967         UBool found = FALSE;
968         const UnicodeString* p;
969         while ((p = avail->snext(status))) {
970             if (p->compare("xx") == 0) {
971                 found = TRUE;
972                 break;
973             }
974         }
975         delete avail;
976         if (found) {
977             errln("found test locale");
978         }
979     }
980 
981     {
982         int32_t count;
983         UBool   foundLocale = FALSE;
984         const Locale *avail = BreakIterator::getAvailableLocales(count);
985         for (int i=0; i<count; i++) {
986             if (avail[i] == Locale::getEnglish()) {
987                 foundLocale = TRUE;
988                 break;
989             }
990         }
991         if (foundLocale == FALSE) {
992             errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
993         }
994     }
995 
996 
997     // ja_word was adopted by factory
998     delete ja_char;
999     delete root_word;
1000     delete root_char;
1001 #endif
1002 }
1003 
RoundtripRule(const char * dataFile)1004 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1005     UErrorCode status = U_ZERO_ERROR;
1006     UParseError parseError;
1007     parseError.line = 0;
1008     parseError.offset = 0;
1009     UDataMemory *data = udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status);
1010     uint32_t length;
1011     const UChar *builtSource;
1012     const uint8_t *rbbiRules;
1013     const uint8_t *builtRules;
1014 
1015     if (U_FAILURE(status)) {
1016         errln("Can't open \"%s\"", dataFile);
1017         return;
1018     }
1019 
1020     builtRules = (const uint8_t *)udata_getMemory(data);
1021     builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1022     RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1023     if (U_FAILURE(status)) {
1024         errln("createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
1025                 u_errorName(status), parseError.line, parseError.offset);
1026         return;
1027     };
1028     rbbiRules = brkItr->getBinaryRules(length);
1029     logln("Comparing \"%s\" len=%d", dataFile, length);
1030     if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1031         errln("Built rules and rebuilt rules are different %s", dataFile);
1032         return;
1033     }
1034     delete brkItr;
1035     udata_close(data);
1036 }
1037 
TestRoundtripRules()1038 void RBBIAPITest::TestRoundtripRules() {
1039     RoundtripRule("word");
1040     RoundtripRule("title");
1041     RoundtripRule("sent");
1042     RoundtripRule("line");
1043     RoundtripRule("char");
1044     if (!quick) {
1045         RoundtripRule("word_ja");
1046         RoundtripRule("word_POSIX");
1047     }
1048 }
1049 
1050 //---------------------------------------------
1051 // runIndexedTest
1052 //---------------------------------------------
1053 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1054 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1055 {
1056     if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1057     switch (index) {
1058      //   case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
1059         case  0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
1060         case  1: name = "TestgetRules"; if (exec) TestgetRules(); break;
1061         case  2: name = "TestHashCode"; if (exec) TestHashCode(); break;
1062         case  3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
1063         case  4: name = "TestIteration"; if (exec) TestIteration(); break;
1064         case  5: name = "TestBuilder"; if (exec) TestBuilder(); break;
1065         case  6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
1066         case  7: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
1067         case  8: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break;
1068         case  9: name = "TestBug2190"; if (exec) TestBug2190(); break;
1069         case 10: name = "TestRegistration"; if (exec) TestRegistration(); break;
1070         case 11: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
1071         case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
1072 
1073         default: name = ""; break; // needed to end loop
1074     }
1075 }
1076 
1077 //---------------------------------------------
1078 //Internal subroutines
1079 //---------------------------------------------
1080 
doBoundaryTest(RuleBasedBreakIterator & bi,UnicodeString & text,int32_t * boundaries)1081 void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1082      logln((UnicodeString)"testIsBoundary():");
1083         int32_t p = 0;
1084         UBool isB;
1085         for (int32_t i = 0; i < text.length(); i++) {
1086             isB = bi.isBoundary(i);
1087             logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1088 
1089             if (i == boundaries[p]) {
1090                 if (!isB)
1091                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1092                 p++;
1093             }
1094             else {
1095                 if (isB)
1096                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1097             }
1098         }
1099 }
doTest(UnicodeString & testString,int32_t start,int32_t gotoffset,int32_t expectedOffset,const char * expectedString)1100 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1101     UnicodeString selected;
1102     UnicodeString expected=CharsToUnicodeString(expectedString);
1103 
1104     if(gotoffset != expectedOffset)
1105          errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1106     if(start <= gotoffset){
1107         testString.extractBetween(start, gotoffset, selected);
1108     }
1109     else{
1110         testString.extractBetween(gotoffset, start, selected);
1111     }
1112     if(selected.compare(expected) != 0)
1113          errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1114     else
1115         logln(prettify("****selected \"" + selected + "\""));
1116 }
1117 
1118 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1119