1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1999-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************
7 * Date Name Description
8 * 12/14/99 Madhu Creation.
9 * 01/12/2000 Madhu updated for changed API
10 ********************************************************************/
11
12 #include "unicode/utypes.h"
13
14 #if !UCONFIG_NO_BREAK_ITERATION
15
16 #include "unicode/uchar.h"
17 #include "intltest.h"
18 #include "unicode/rbbi.h"
19 #include "unicode/schriter.h"
20 #include "rbbiapts.h"
21 #include "rbbidata.h"
22 #include "cstring.h"
23 #include "ubrkimpl.h"
24 #include "unicode/locid.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utext.h"
27 #include "cmemory.h"
28 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
29 #include "unicode/filteredbrk.h"
30 #include <stdio.h> // for sprintf
31 #endif
32 /**
33 * API Test the RuleBasedBreakIterator class
34 */
35
36
37 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
38 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
39
40 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
41 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
42
TestCloneEquals()43 void RBBIAPITest::TestCloneEquals()
44 {
45
46 UErrorCode status=U_ZERO_ERROR;
47 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
48 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
49 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
50 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
51 if(U_FAILURE(status)){
52 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
53 return;
54 }
55
56
57 UnicodeString testString="Testing word break iterators's clone() and equals()";
58 bi1->setText(testString);
59 bi2->setText(testString);
60 biequal->setText(testString);
61
62 bi3->setText("hello");
63
64 logln((UnicodeString)"Testing equals()");
65
66 logln((UnicodeString)"Testing == and !=");
67 UBool b = (*bi1 != *biequal);
68 b |= *bi1 == *bi2;
69 b |= *bi1 == *bi3;
70 if (b) {
71 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
72 }
73
74 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3)
75 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed.");
76
77
78 // Quick test of RulesBasedBreakIterator assignment -
79 // Check that
80 // two different iterators are !=
81 // they are == after assignment
82 // source and dest iterator produce the same next() after assignment.
83 // deleting one doesn't disable the other.
84 logln("Testing assignment");
85 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
86 if(U_FAILURE(status)){
87 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
88 return;
89 }
90
91 RuleBasedBreakIterator biDefault, biDefault2;
92 if(U_FAILURE(status)){
93 errln((UnicodeString)"FAIL : in construction of default iterator");
94 return;
95 }
96 if (biDefault == *bix) {
97 errln((UnicodeString)"ERROR: iterators should not compare ==");
98 return;
99 }
100 if (biDefault != biDefault2) {
101 errln((UnicodeString)"ERROR: iterators should compare ==");
102 return;
103 }
104
105
106 UnicodeString HelloString("Hello Kitty");
107 bix->setText(HelloString);
108 if (*bix == *bi2) {
109 errln(UnicodeString("ERROR: strings should not be equal before assignment."));
110 }
111 *bix = *bi2;
112 if (*bix != *bi2) {
113 errln(UnicodeString("ERROR: strings should be equal before assignment."));
114 }
115
116 int bixnext = bix->next();
117 int bi2next = bi2->next();
118 if (! (bixnext == bi2next && bixnext == 7)) {
119 errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
120 }
121 delete bix;
122 if (bi2->next() != 8) {
123 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
124 }
125
126
127
128 logln((UnicodeString)"Testing clone()");
129 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
130 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
131
132 if(*bi1clone != *bi1 || *bi1clone != *biequal ||
133 *bi1clone == *bi3 || *bi1clone == *bi2)
134 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
135
136 if(*bi2clone == *bi1 || *bi2clone == *biequal ||
137 *bi2clone == *bi3 || *bi2clone != *bi2)
138 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
139
140 if(bi1->getText() != bi1clone->getText() ||
141 bi2clone->getText() != bi2->getText() ||
142 *bi2clone == *bi1clone )
143 errln((UnicodeString)"ERROR: RBBI's clone() method failed");
144
145 delete bi1clone;
146 delete bi2clone;
147 delete bi1;
148 delete bi3;
149 delete bi2;
150 delete biequal;
151 }
152
TestBoilerPlate()153 void RBBIAPITest::TestBoilerPlate()
154 {
155 UErrorCode status = U_ZERO_ERROR;
156 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
157 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
158 if (U_FAILURE(status)) {
159 errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
160 return;
161 }
162 if(*a!=*b){
163 errln("Failed: boilerplate method operator!= does not return correct results");
164 }
165 // Japanese word break iterators are identical to root with
166 // a dictionary-based break iterator
167 BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
168 BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
169 if(c && d){
170 if(*c!=*d){
171 errln("Failed: boilerplate method operator== does not return correct results");
172 }
173 }else{
174 errln("creation of break iterator failed");
175 }
176 delete a;
177 delete b;
178 delete c;
179 delete d;
180 }
181
TestgetRules()182 void RBBIAPITest::TestgetRules()
183 {
184 UErrorCode status=U_ZERO_ERROR;
185
186 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
187 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
188 if(U_FAILURE(status)){
189 errcheckln(status, "FAIL: in construction - %s", u_errorName(status));
190 delete bi1;
191 delete bi2;
192 return;
193 }
194
195
196
197 logln((UnicodeString)"Testing toString()");
198
199 bi1->setText((UnicodeString)"Hello there");
200
201 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
202
203 UnicodeString temp=bi1->getRules();
204 UnicodeString temp2=bi2->getRules();
205 UnicodeString temp3=bi3->getRules();
206 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
207 errln((UnicodeString)"ERROR: error in getRules() method");
208
209 delete bi1;
210 delete bi2;
211 delete bi3;
212 }
TestHashCode()213 void RBBIAPITest::TestHashCode()
214 {
215 UErrorCode status=U_ZERO_ERROR;
216 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
217 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
218 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
219 if(U_FAILURE(status)){
220 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
221 delete bi1;
222 delete bi2;
223 delete bi3;
224 return;
225 }
226
227
228 logln((UnicodeString)"Testing hashCode()");
229
230 bi1->setText((UnicodeString)"Hash code");
231 bi2->setText((UnicodeString)"Hash code");
232 bi3->setText((UnicodeString)"Hash code");
233
234 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
235 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
236
237 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() ||
238 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
239 errln((UnicodeString)"ERROR: identical objects have different hashcodes");
240
241 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() ||
242 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
243 errln((UnicodeString)"ERROR: different objects have same hashcodes");
244
245 delete bi1clone;
246 delete bi2clone;
247 delete bi1;
248 delete bi2;
249 delete bi3;
250
251 }
TestGetSetAdoptText()252 void RBBIAPITest::TestGetSetAdoptText()
253 {
254 logln((UnicodeString)"Testing getText setText ");
255 IcuTestErrorCode status(*this, "TestGetSetAdoptText");
256 UnicodeString str1="first string.";
257 UnicodeString str2="Second string.";
258 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
259 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
260 if(status.isFailure()){
261 errcheckln(status, "Fail : in construction - %s", status.errorName());
262 return;
263 }
264
265
266 CharacterIterator* text1= new StringCharacterIterator(str1);
267 CharacterIterator* text1Clone = text1->clone();
268 CharacterIterator* text2= new StringCharacterIterator(str2);
269 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str"
270
271 wordIter1->setText(str1);
272 CharacterIterator *tci = &wordIter1->getText();
273 UnicodeString tstr;
274 tci->getText(tstr);
275 TEST_ASSERT(tstr == str1);
276 if(wordIter1->current() != 0)
277 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
278
279 wordIter1->next(2);
280
281 wordIter1->setText(str2);
282 if(wordIter1->current() != 0)
283 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
284
285
286 charIter1->adoptText(text1Clone);
287 TEST_ASSERT(wordIter1->getText() != charIter1->getText());
288 tci = &wordIter1->getText();
289 tci->getText(tstr);
290 TEST_ASSERT(tstr == str2);
291 tci = &charIter1->getText();
292 tci->getText(tstr);
293 TEST_ASSERT(tstr == str1);
294
295
296 LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
297 rb->adoptText(text1);
298 if(rb->getText() != *text1)
299 errln((UnicodeString)"ERROR:1 error in adoptText ");
300 rb->adoptText(text2);
301 if(rb->getText() != *text2)
302 errln((UnicodeString)"ERROR:2 error in adoptText ");
303
304 // Adopt where iterator range is less than the entire orignal source string.
305 // (With the change of the break engine to working with UText internally,
306 // CharacterIterators starting at positions other than zero are not supported)
307 rb->adoptText(text3);
308 TEST_ASSERT(rb->preceding(2) == 0);
309 TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
310 //if(rb->preceding(2) != 3) {
311 // errln((UnicodeString)"ERROR:3 error in adoptText ");
312 //}
313 //if(rb->following(11) != BreakIterator::DONE) {
314 // errln((UnicodeString)"ERROR:4 error in adoptText ");
315 //}
316
317 // UText API
318 //
319 // Quick test to see if UText is working at all.
320 //
321 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
322 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
323 // 012345678901
324
325 status.reset();
326 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
327 wordIter1->setText(ut.getAlias(), status);
328 TEST_ASSERT_SUCCESS(status);
329
330 int32_t pos;
331 pos = wordIter1->first();
332 TEST_ASSERT(pos==0);
333 pos = wordIter1->next();
334 TEST_ASSERT(pos==5);
335 pos = wordIter1->next();
336 TEST_ASSERT(pos==6);
337 pos = wordIter1->next();
338 TEST_ASSERT(pos==11);
339 pos = wordIter1->next();
340 TEST_ASSERT(pos==UBRK_DONE);
341
342 status.reset();
343 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
344 TEST_ASSERT_SUCCESS(status);
345 wordIter1->setText(ut2.getAlias(), status);
346 TEST_ASSERT_SUCCESS(status);
347
348 pos = wordIter1->first();
349 TEST_ASSERT(pos==0);
350 pos = wordIter1->next();
351 TEST_ASSERT(pos==3);
352 pos = wordIter1->next();
353 TEST_ASSERT(pos==4);
354
355 pos = wordIter1->last();
356 TEST_ASSERT(pos==6);
357 pos = wordIter1->previous();
358 TEST_ASSERT(pos==4);
359 pos = wordIter1->previous();
360 TEST_ASSERT(pos==3);
361 pos = wordIter1->previous();
362 TEST_ASSERT(pos==0);
363 pos = wordIter1->previous();
364 TEST_ASSERT(pos==UBRK_DONE);
365
366 status.reset();
367 UnicodeString sEmpty;
368 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
369 wordIter1->getUText(gut2.getAlias(), status);
370 TEST_ASSERT_SUCCESS(status);
371 status.reset();
372 }
373
374
TestIteration()375 void RBBIAPITest::TestIteration()
376 {
377 // This test just verifies that the API is present.
378 // Testing for correct operation of the break rules happens elsewhere.
379
380 UErrorCode status=U_ZERO_ERROR;
381 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
382 if (U_FAILURE(status) || bi == NULL) {
383 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
384 }
385 delete bi;
386
387 status=U_ZERO_ERROR;
388 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
389 if (U_FAILURE(status) || bi == NULL) {
390 errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status));
391 }
392 delete bi;
393
394 status=U_ZERO_ERROR;
395 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
396 if (U_FAILURE(status) || bi == NULL) {
397 errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status));
398 }
399 delete bi;
400
401 status=U_ZERO_ERROR;
402 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
403 if (U_FAILURE(status) || bi == NULL) {
404 errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status));
405 }
406 delete bi;
407
408 status=U_ZERO_ERROR;
409 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
410 if (U_FAILURE(status) || bi == NULL) {
411 errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status));
412 }
413 delete bi;
414
415 status=U_ZERO_ERROR;
416 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
417 if (U_FAILURE(status) || bi == NULL) {
418 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
419 return; // Skip the rest of these tests.
420 }
421
422
423 UnicodeString testString="0123456789";
424 bi->setText(testString);
425
426 int32_t i;
427 i = bi->first();
428 if (i != 0) {
429 errln("Incorrect value from bi->first(). Expected 0, got %d.", i);
430 }
431
432 i = bi->last();
433 if (i != 10) {
434 errln("Incorrect value from bi->last(). Expected 10, got %d", i);
435 }
436
437 //
438 // Previous
439 //
440 bi->last();
441 i = bi->previous();
442 if (i != 9) {
443 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i);
444 }
445
446
447 bi->first();
448 i = bi->previous();
449 if (i != BreakIterator::DONE) {
450 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__, i);
451 }
452
453 //
454 // next()
455 //
456 bi->first();
457 i = bi->next();
458 if (i != 1) {
459 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i);
460 }
461
462 bi->last();
463 i = bi->next();
464 if (i != BreakIterator::DONE) {
465 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__, i);
466 }
467
468
469 //
470 // current()
471 //
472 bi->first();
473 i = bi->current();
474 if (i != 0) {
475 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i);
476 }
477
478 bi->next();
479 i = bi->current();
480 if (i != 1) {
481 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i);
482 }
483
484 bi->last();
485 bi->next();
486 i = bi->current();
487 if (i != 10) {
488 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i);
489 }
490
491 bi->first();
492 bi->previous();
493 i = bi->current();
494 if (i != 0) {
495 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i);
496 }
497
498
499 //
500 // Following()
501 //
502 i = bi->following(4);
503 if (i != 5) {
504 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i);
505 }
506
507 i = bi->following(9);
508 if (i != 10) {
509 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__, i);
510 }
511
512 i = bi->following(10);
513 if (i != BreakIterator::DONE) {
514 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i);
515 }
516
517
518 //
519 // Preceding
520 //
521 i = bi->preceding(4);
522 if (i != 3) {
523 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i);
524 }
525
526 i = bi->preceding(10);
527 if (i != 9) {
528 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i);
529 }
530
531 i = bi->preceding(1);
532 if (i != 0) {
533 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i);
534 }
535
536 i = bi->preceding(0);
537 if (i != BreakIterator::DONE) {
538 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i);
539 }
540
541
542 //
543 // isBoundary()
544 //
545 bi->first();
546 if (bi->isBoundary(3) != TRUE) {
547 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i);
548 }
549 i = bi->current();
550 if (i != 3) {
551 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__, i);
552 }
553
554
555 if (bi->isBoundary(11) != FALSE) {
556 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i);
557 }
558 i = bi->current();
559 if (i != 10) {
560 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i);
561 }
562
563 //
564 // next(n)
565 //
566 bi->first();
567 i = bi->next(4);
568 if (i != 4) {
569 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i);
570 }
571
572 i = bi->next(6);
573 if (i != 10) {
574 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__, i);
575 }
576
577 bi->first();
578 i = bi->next(11);
579 if (i != BreakIterator::DONE) {
580 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__, i);
581 }
582
583 delete bi;
584
585 }
586
587
588
589
590
591
TestBuilder()592 void RBBIAPITest::TestBuilder() {
593 UnicodeString rulesString1 = "$Letters = [:L:];\n"
594 "$Numbers = [:N:];\n"
595 "$Letters+;\n"
596 "$Numbers+;\n"
597 "[^$Letters $Numbers];\n"
598 "!.*;\n";
599 UnicodeString testString1 = "abc123..abc";
600 // 01234567890
601 int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
602 UErrorCode status=U_ZERO_ERROR;
603 UParseError parseError;
604
605 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
606 if(U_FAILURE(status)) {
607 dataerrln("Fail : in construction - %s", u_errorName(status));
608 } else {
609 bi->setText(testString1);
610 doBoundaryTest(*bi, testString1, bounds1);
611 }
612 delete bi;
613 }
614
615
616 //
617 // TestQuoteGrouping
618 // Single quotes within rules imply a grouping, so that a modifier
619 // following the quoted text (* or +) applies to all of the quoted chars.
620 //
TestQuoteGrouping()621 void RBBIAPITest::TestQuoteGrouping() {
622 UnicodeString rulesString1 = "#Here comes the rule...\n"
623 "'$@!'*;\n" // (\$\@\!)*
624 ".;\n";
625
626 UnicodeString testString1 = "$@!$@!X$@!!X";
627 // 0123456789012
628 int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
629 UErrorCode status=U_ZERO_ERROR;
630 UParseError parseError;
631
632 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
633 if(U_FAILURE(status)) {
634 dataerrln("Fail : in construction - %s", u_errorName(status));
635 } else {
636 bi->setText(testString1);
637 doBoundaryTest(*bi, testString1, bounds1);
638 }
639 delete bi;
640 }
641
642 //
643 // TestRuleStatus
644 // Test word break rule status constants.
645 //
TestRuleStatus()646 void RBBIAPITest::TestRuleStatus() {
647 UChar str[30];
648 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
649 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
650 u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
651 // 012345678901234567 8 9 0
652 // Katakana
653 str, 30);
654 UnicodeString testString1(str);
655 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
656 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,
657 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,
658 UBRK_WORD_IDEO, UBRK_WORD_NONE};
659
660 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
661 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
662 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
663
664 UErrorCode status=U_ZERO_ERROR;
665
666 BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
667 if(U_FAILURE(status)) {
668 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
669 } else {
670 bi->setText(testString1);
671 // First test that the breaks are in the right spots.
672 doBoundaryTest(*bi, testString1, bounds1);
673
674 // Then go back and check tag values
675 int32_t i = 0;
676 int32_t pos, tag;
677 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
678 if (pos != bounds1[i]) {
679 errln("FAIL: unexpected word break at postion %d", pos);
680 break;
681 }
682 tag = bi->getRuleStatus();
683 if (tag < tag_lo[i] || tag >= tag_hi[i]) {
684 errln("FAIL: incorrect tag value %d at position %d", tag, pos);
685 break;
686 }
687
688 // Check that we get the same tag values from getRuleStatusVec()
689 int32_t vec[10];
690 int t = bi->getRuleStatusVec(vec, 10, status);
691 TEST_ASSERT_SUCCESS(status);
692 TEST_ASSERT(t==1);
693 TEST_ASSERT(vec[0] == tag);
694 }
695 }
696 delete bi;
697
698 // Now test line break status. This test mostly is to confirm that the status constants
699 // are correctly declared in the header.
700 testString1 = "test line. \n";
701 // break type s s h
702
703 bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
704 if(U_FAILURE(status)) {
705 errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status));
706 } else {
707 int32_t i = 0;
708 int32_t pos, tag;
709 UBool success;
710
711 bi->setText(testString1);
712 pos = bi->current();
713 tag = bi->getRuleStatus();
714 for (i=0; i<3; i++) {
715 switch (i) {
716 case 0:
717 success = pos==0 && tag==UBRK_LINE_SOFT; break;
718 case 1:
719 success = pos==5 && tag==UBRK_LINE_SOFT; break;
720 case 2:
721 success = pos==12 && tag==UBRK_LINE_HARD; break;
722 default:
723 success = FALSE; break;
724 }
725 if (success == FALSE) {
726 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d",
727 i, pos, tag);
728 break;
729 }
730 pos = bi->next();
731 tag = bi->getRuleStatus();
732 }
733 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
734 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
735 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
736 errln("UBRK_LINE_* constants from header are inconsistent.");
737 }
738 }
739 delete bi;
740
741 }
742
743
744 //
745 // TestRuleStatusVec
746 // Test the vector form of break rule status.
747 //
TestRuleStatusVec()748 void RBBIAPITest::TestRuleStatusVec() {
749 UnicodeString rulesString( "[A-N]{100}; \n"
750 "[a-w]{200}; \n"
751 "[\\p{L}]{300}; \n"
752 "[\\p{N}]{400}; \n"
753 "[0-5]{500}; \n"
754 "!.*;\n", -1, US_INV);
755 UnicodeString testString1 = "Aapz5?";
756 int32_t statusVals[10];
757 int32_t numStatuses;
758 int32_t pos;
759
760 UErrorCode status=U_ZERO_ERROR;
761 UParseError parseError;
762
763 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
764 if (U_FAILURE(status)) {
765 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
766 } else {
767 bi->setText(testString1);
768
769 // A
770 pos = bi->next();
771 TEST_ASSERT(pos==1);
772 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
773 TEST_ASSERT_SUCCESS(status);
774 TEST_ASSERT(numStatuses == 2);
775 TEST_ASSERT(statusVals[0] == 100);
776 TEST_ASSERT(statusVals[1] == 300);
777
778 // a
779 pos = bi->next();
780 TEST_ASSERT(pos==2);
781 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
782 TEST_ASSERT_SUCCESS(status);
783 TEST_ASSERT(numStatuses == 2);
784 TEST_ASSERT(statusVals[0] == 200);
785 TEST_ASSERT(statusVals[1] == 300);
786
787 // p
788 pos = bi->next();
789 TEST_ASSERT(pos==3);
790 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
791 TEST_ASSERT_SUCCESS(status);
792 TEST_ASSERT(numStatuses == 2);
793 TEST_ASSERT(statusVals[0] == 200);
794 TEST_ASSERT(statusVals[1] == 300);
795
796 // z
797 pos = bi->next();
798 TEST_ASSERT(pos==4);
799 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
800 TEST_ASSERT_SUCCESS(status);
801 TEST_ASSERT(numStatuses == 1);
802 TEST_ASSERT(statusVals[0] == 300);
803
804 // 5
805 pos = bi->next();
806 TEST_ASSERT(pos==5);
807 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
808 TEST_ASSERT_SUCCESS(status);
809 TEST_ASSERT(numStatuses == 2);
810 TEST_ASSERT(statusVals[0] == 400);
811 TEST_ASSERT(statusVals[1] == 500);
812
813 // ?
814 pos = bi->next();
815 TEST_ASSERT(pos==6);
816 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
817 TEST_ASSERT_SUCCESS(status);
818 TEST_ASSERT(numStatuses == 1);
819 TEST_ASSERT(statusVals[0] == 0);
820
821 //
822 // Check buffer overflow error handling. Char == A
823 //
824 bi->first();
825 pos = bi->next();
826 TEST_ASSERT(pos==1);
827 memset(statusVals, -1, sizeof(statusVals));
828 numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
829 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
830 TEST_ASSERT(numStatuses == 2);
831 TEST_ASSERT(statusVals[0] == -1);
832
833 status = U_ZERO_ERROR;
834 memset(statusVals, -1, sizeof(statusVals));
835 numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
836 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
837 TEST_ASSERT(numStatuses == 2);
838 TEST_ASSERT(statusVals[0] == 100);
839 TEST_ASSERT(statusVals[1] == -1);
840
841 status = U_ZERO_ERROR;
842 memset(statusVals, -1, sizeof(statusVals));
843 numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
844 TEST_ASSERT_SUCCESS(status);
845 TEST_ASSERT(numStatuses == 2);
846 TEST_ASSERT(statusVals[0] == 100);
847 TEST_ASSERT(statusVals[1] == 300);
848 TEST_ASSERT(statusVals[2] == -1);
849 }
850 delete bi;
851
852 }
853
854 //
855 // Bug 2190 Regression test. Builder crash on rule consisting of only a
856 // $variable reference
TestBug2190()857 void RBBIAPITest::TestBug2190() {
858 UnicodeString rulesString1 = "$aaa = abcd;\n"
859 "$bbb = $aaa;\n"
860 "$bbb;\n";
861 UnicodeString testString1 = "abcdabcd";
862 // 01234567890
863 int32_t bounds1[] = {0, 4, 8};
864 UErrorCode status=U_ZERO_ERROR;
865 UParseError parseError;
866
867 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
868 if(U_FAILURE(status)) {
869 dataerrln("Fail : in construction - %s", u_errorName(status));
870 } else {
871 bi->setText(testString1);
872 doBoundaryTest(*bi, testString1, bounds1);
873 }
874 delete bi;
875 }
876
877
TestRegistration()878 void RBBIAPITest::TestRegistration() {
879 #if !UCONFIG_NO_SERVICE
880 UErrorCode status = U_ZERO_ERROR;
881 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
882 // ok to not delete these if we exit because of error?
883 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
884 BreakIterator* root_word = BreakIterator::createWordInstance("", status);
885 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
886
887 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
888 dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
889
890 delete ja_word;
891 delete ja_char;
892 delete root_word;
893 delete root_char;
894
895 return;
896 }
897
898 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
899 {
900 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
901 if (ja_word && *ja_word == *root_word) {
902 errln("japan not different from root");
903 }
904 #endif
905 }
906
907 {
908 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
909 UBool fail = TRUE;
910 if(result){
911 fail = *result != *ja_word;
912 }
913 delete result;
914 if (fail) {
915 errln("bad result for xx_XX/word");
916 }
917 }
918
919 {
920 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
921 UBool fail = TRUE;
922 if(result){
923 fail = *result != *ja_char;
924 }
925 delete result;
926 if (fail) {
927 errln("bad result for ja_JP/char");
928 }
929 }
930
931 {
932 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
933 UBool fail = TRUE;
934 if(result){
935 fail = *result != *root_char;
936 }
937 delete result;
938 if (fail) {
939 errln("bad result for xx_XX/char");
940 }
941 }
942
943 {
944 StringEnumeration* avail = BreakIterator::getAvailableLocales();
945 UBool found = FALSE;
946 const UnicodeString* p;
947 while ((p = avail->snext(status))) {
948 if (p->compare("xx") == 0) {
949 found = TRUE;
950 break;
951 }
952 }
953 delete avail;
954 if (!found) {
955 errln("did not find test locale");
956 }
957 }
958
959 {
960 UBool unreg = BreakIterator::unregister(key, status);
961 if (!unreg) {
962 errln("unable to unregister");
963 }
964 }
965
966 {
967 BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
968 BreakIterator* root = BreakIterator::createWordInstance("", status);
969 UBool fail = TRUE;
970 if(root){
971 fail = *root != *result;
972 }
973 delete root;
974 delete result;
975 if (fail) {
976 errln("did not get root break");
977 }
978 }
979
980 {
981 StringEnumeration* avail = BreakIterator::getAvailableLocales();
982 UBool found = FALSE;
983 const UnicodeString* p;
984 while ((p = avail->snext(status))) {
985 if (p->compare("xx") == 0) {
986 found = TRUE;
987 break;
988 }
989 }
990 delete avail;
991 if (found) {
992 errln("found test locale");
993 }
994 }
995
996 {
997 int32_t count;
998 UBool foundLocale = FALSE;
999 const Locale *avail = BreakIterator::getAvailableLocales(count);
1000 for (int i=0; i<count; i++) {
1001 if (avail[i] == Locale::getEnglish()) {
1002 foundLocale = TRUE;
1003 break;
1004 }
1005 }
1006 if (foundLocale == FALSE) {
1007 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1008 }
1009 }
1010
1011
1012 // ja_word was adopted by factory
1013 delete ja_char;
1014 delete root_word;
1015 delete root_char;
1016 #endif
1017 }
1018
RoundtripRule(const char * dataFile)1019 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1020 UErrorCode status = U_ZERO_ERROR;
1021 UParseError parseError;
1022 parseError.line = 0;
1023 parseError.offset = 0;
1024 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1025 uint32_t length;
1026 const UChar *builtSource;
1027 const uint8_t *rbbiRules;
1028 const uint8_t *builtRules;
1029
1030 if (U_FAILURE(status)) {
1031 errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status));
1032 return;
1033 }
1034
1035 builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1036 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1037 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1038 if (U_FAILURE(status)) {
1039 errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
1040 __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset);
1041 errln(UnicodeString(builtSource));
1042 return;
1043 };
1044 rbbiRules = brkItr->getBinaryRules(length);
1045 logln("Comparing \"%s\" len=%d", dataFile, length);
1046 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1047 errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile);
1048 return;
1049 }
1050 delete brkItr;
1051 }
1052
TestRoundtripRules()1053 void RBBIAPITest::TestRoundtripRules() {
1054 RoundtripRule("word");
1055 RoundtripRule("title");
1056 RoundtripRule("sent");
1057 RoundtripRule("line");
1058 RoundtripRule("char");
1059 if (!quick) {
1060 RoundtripRule("word_POSIX");
1061 }
1062 }
1063
1064
1065 // Check getBinaryRules() and construction of a break iterator from those rules.
1066
TestGetBinaryRules()1067 void RBBIAPITest::TestGetBinaryRules() {
1068 UErrorCode status=U_ZERO_ERROR;
1069 LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
1070 TEST_ASSERT_SUCCESS(status);
1071 RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
1072 TEST_ASSERT(rbbi != NULL);
1073
1074 // Check that the new line break iterator is nominally functional.
1075 UnicodeString helloWorld("Hello, World!");
1076 rbbi->setText(helloWorld);
1077 int n = 0;
1078 while (bi->next() != UBRK_DONE) {
1079 ++n;
1080 }
1081 TEST_ASSERT(n == 2);
1082
1083 // Extract the binary rules as a uint8_t blob.
1084 uint32_t ruleLength;
1085 const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
1086 TEST_ASSERT(ruleLength > 0);
1087 TEST_ASSERT(binRules != NULL);
1088
1089 // Clone the binary rules, and create a break iterator from that.
1090 // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
1091 uint8_t *clonedRules = new uint8_t[ruleLength];
1092 memcpy(clonedRules, binRules, ruleLength);
1093 RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
1094 TEST_ASSERT_SUCCESS(status);
1095
1096 // Check that the cloned line break iterator is nominally alive.
1097 clonedBI.setText(helloWorld);
1098 n = 0;
1099 while (clonedBI.next() != UBRK_DONE) {
1100 ++n;
1101 }
1102 TEST_ASSERT(n == 2);
1103
1104 delete[] clonedRules;
1105 }
1106
1107
TestRefreshInputText()1108 void RBBIAPITest::TestRefreshInputText() {
1109 /*
1110 * RefreshInput changes out the input of a Break Iterator without
1111 * changing anything else in the iterator's state. Used with Java JNI,
1112 * when Java moves the underlying string storage. This test
1113 * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1114 * The right set of boundaries should still be found.
1115 */
1116 UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
1117 UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
1118 UErrorCode status = U_ZERO_ERROR;
1119 UText ut1 = UTEXT_INITIALIZER;
1120 UText ut2 = UTEXT_INITIALIZER;
1121 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
1122 TEST_ASSERT_SUCCESS(status);
1123
1124 utext_openUChars(&ut1, testStr, -1, &status);
1125 TEST_ASSERT_SUCCESS(status);
1126
1127 if (U_SUCCESS(status)) {
1128 bi->setText(&ut1, status);
1129 TEST_ASSERT_SUCCESS(status);
1130
1131 /* Line boundaries will occur before each letter in the original string */
1132 TEST_ASSERT(1 == bi->next());
1133 TEST_ASSERT(3 == bi->next());
1134
1135 /* Move the string, kill the original string. */
1136 u_strcpy(movedStr, testStr);
1137 u_memset(testStr, 0x20, u_strlen(testStr));
1138 utext_openUChars(&ut2, movedStr, -1, &status);
1139 TEST_ASSERT_SUCCESS(status);
1140 RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1141 TEST_ASSERT_SUCCESS(status);
1142 TEST_ASSERT(bi == returnedBI);
1143
1144 /* Find the following matches, now working in the moved string. */
1145 TEST_ASSERT(5 == bi->next());
1146 TEST_ASSERT(7 == bi->next());
1147 TEST_ASSERT(8 == bi->next());
1148 TEST_ASSERT(UBRK_DONE == bi->next());
1149
1150 utext_close(&ut1);
1151 utext_close(&ut2);
1152 }
1153 delete bi;
1154
1155 }
1156
1157 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
prtbrks(BreakIterator * brk,const UnicodeString & ustr,IntlTest & it)1158 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1159 static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
1160 it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1161
1162 int32_t *pos = new int32_t[ustr.length()];
1163 int32_t posCount = 0;
1164
1165 // calculate breaks up front, so we can print out
1166 // sans any debugging
1167 for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1168 pos[posCount++] = n;
1169 if(posCount>=ustr.length()) {
1170 it.errln("brk count exceeds string length!");
1171 return;
1172 }
1173 }
1174 UnicodeString out;
1175 out.append((UChar)CHSTR);
1176 int32_t prev = 0;
1177 for(int32_t i=0;i<posCount;i++) {
1178 int32_t n=pos[i];
1179 out.append(ustr.tempSubString(prev,n-prev));
1180 out.append((UChar)PILCROW);
1181 prev=n;
1182 }
1183 out.append(ustr.tempSubString(prev,ustr.length()-prev));
1184 out.append((UChar)CHEND);
1185 it.logln(out);
1186
1187 out.remove();
1188 for(int32_t i=0;i<posCount;i++) {
1189 char tmp[100];
1190 sprintf(tmp,"%d ",pos[i]);
1191 out.append(UnicodeString(tmp));
1192 }
1193 it.logln(out);
1194 delete [] pos;
1195 }
1196 #endif
1197
TestFilteredBreakIteratorBuilder()1198 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1199 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1200 UErrorCode status = U_ZERO_ERROR;
1201 LocalPointer<FilteredBreakIteratorBuilder> builder;
1202 LocalPointer<BreakIterator> baseBI;
1203 LocalPointer<BreakIterator> filteredBI;
1204 LocalPointer<BreakIterator> frenchBI;
1205
1206 const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1207 const UnicodeString ABBR_MR("Mr.");
1208 const UnicodeString ABBR_CAPT("Capt.");
1209
1210 {
1211 logln("Constructing empty builder\n");
1212 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1213 TEST_ASSERT_SUCCESS(status);
1214
1215 logln("Constructing base BI\n");
1216 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1217 TEST_ASSERT_SUCCESS(status);
1218
1219 logln("Building new BI\n");
1220 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1221 TEST_ASSERT_SUCCESS(status);
1222
1223 if (U_SUCCESS(status)) {
1224 logln("Testing:");
1225 filteredBI->setText(text);
1226 TEST_ASSERT(20 == filteredBI->next()); // Mr.
1227 TEST_ASSERT(84 == filteredBI->next()); // recovered.
1228 TEST_ASSERT(90 == filteredBI->next()); // Capt.
1229 TEST_ASSERT(181 == filteredBI->next()); // Mr.
1230 TEST_ASSERT(278 == filteredBI->next()); // charge.
1231 filteredBI->first();
1232 prtbrks(filteredBI.getAlias(), text, *this);
1233 }
1234 }
1235
1236 {
1237 logln("Constructing empty builder\n");
1238 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1239 TEST_ASSERT_SUCCESS(status);
1240
1241 if (U_SUCCESS(status)) {
1242 logln("Adding Mr. as an exception\n");
1243 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1244 TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
1245 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
1246 TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1247 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1248 TEST_ASSERT_SUCCESS(status);
1249
1250 logln("Constructing base BI\n");
1251 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1252 TEST_ASSERT_SUCCESS(status);
1253
1254 logln("Building new BI\n");
1255 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1256 TEST_ASSERT_SUCCESS(status);
1257
1258 logln("Testing:");
1259 filteredBI->setText(text);
1260 TEST_ASSERT(84 == filteredBI->next());
1261 TEST_ASSERT(90 == filteredBI->next());// Capt.
1262 TEST_ASSERT(278 == filteredBI->next());
1263 filteredBI->first();
1264 prtbrks(filteredBI.getAlias(), text, *this);
1265 }
1266 }
1267
1268
1269 {
1270 logln("Constructing empty builder\n");
1271 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1272 TEST_ASSERT_SUCCESS(status);
1273
1274 if (U_SUCCESS(status)) {
1275 logln("Adding Mr. and Capt as an exception\n");
1276 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1277 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
1278 TEST_ASSERT_SUCCESS(status);
1279
1280 logln("Constructing base BI\n");
1281 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1282 TEST_ASSERT_SUCCESS(status);
1283
1284 logln("Building new BI\n");
1285 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1286 TEST_ASSERT_SUCCESS(status);
1287
1288 logln("Testing:");
1289 filteredBI->setText(text);
1290 TEST_ASSERT(84 == filteredBI->next());
1291 TEST_ASSERT(278 == filteredBI->next());
1292 filteredBI->first();
1293 prtbrks(filteredBI.getAlias(), text, *this);
1294 }
1295 }
1296
1297
1298 {
1299 logln("Constructing English builder\n");
1300 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1301 TEST_ASSERT_SUCCESS(status);
1302
1303 logln("Constructing base BI\n");
1304 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1305 TEST_ASSERT_SUCCESS(status);
1306
1307 if (U_SUCCESS(status)) {
1308 logln("unsuppressing 'Capt'");
1309 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
1310
1311 logln("Building new BI\n");
1312 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1313 TEST_ASSERT_SUCCESS(status);
1314
1315 if(filteredBI.isValid()) {
1316 logln("Testing:");
1317 filteredBI->setText(text);
1318 TEST_ASSERT(84 == filteredBI->next());
1319 TEST_ASSERT(90 == filteredBI->next());
1320 TEST_ASSERT(278 == filteredBI->next());
1321 filteredBI->first();
1322 prtbrks(filteredBI.getAlias(), text, *this);
1323 }
1324 }
1325 }
1326
1327
1328 {
1329 logln("Constructing English builder\n");
1330 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1331 TEST_ASSERT_SUCCESS(status);
1332
1333 logln("Constructing base BI\n");
1334 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1335 TEST_ASSERT_SUCCESS(status);
1336
1337 if (U_SUCCESS(status)) {
1338 logln("Building new BI\n");
1339 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1340 TEST_ASSERT_SUCCESS(status);
1341
1342 if(filteredBI.isValid()) {
1343 logln("Testing:");
1344 filteredBI->setText(text);
1345 TEST_ASSERT(84 == filteredBI->next());
1346 TEST_ASSERT(278 == filteredBI->next());
1347 filteredBI->first();
1348 prtbrks(filteredBI.getAlias(), text, *this);
1349 }
1350 }
1351 }
1352
1353 // reenable once french is in
1354 {
1355 logln("Constructing French builder");
1356 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
1357 TEST_ASSERT_SUCCESS(status);
1358
1359 logln("Constructing base BI\n");
1360 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
1361 TEST_ASSERT_SUCCESS(status);
1362
1363 if (U_SUCCESS(status)) {
1364 logln("Building new BI\n");
1365 frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
1366 TEST_ASSERT_SUCCESS(status);
1367 }
1368
1369 if(frenchBI.isValid()) {
1370 logln("Testing:");
1371 UnicodeString frText("C'est MM. Duval.");
1372 frenchBI->setText(frText);
1373 TEST_ASSERT(16 == frenchBI->next());
1374 TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
1375 frenchBI->first();
1376 prtbrks(frenchBI.getAlias(), frText, *this);
1377 logln("Testing against English:");
1378 filteredBI->setText(frText);
1379 TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
1380 TEST_ASSERT(16 == filteredBI->next());
1381 TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
1382 filteredBI->first();
1383 prtbrks(filteredBI.getAlias(), frText, *this);
1384
1385 // Verify ==
1386 TEST_ASSERT_TRUE(*frenchBI == *frenchBI);
1387 TEST_ASSERT_TRUE(*filteredBI != *frenchBI);
1388 TEST_ASSERT_TRUE(*frenchBI != *filteredBI);
1389 } else {
1390 dataerrln("French BI: not valid.");
1391 }
1392 }
1393
1394 #else
1395 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1396 #endif
1397 }
1398
1399 //---------------------------------------------
1400 // runIndexedTest
1401 //---------------------------------------------
1402
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1403 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1404 {
1405 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1406 TESTCASE_AUTO_BEGIN;
1407 #if !UCONFIG_NO_FILE_IO
1408 TESTCASE_AUTO(TestCloneEquals);
1409 TESTCASE_AUTO(TestgetRules);
1410 TESTCASE_AUTO(TestHashCode);
1411 TESTCASE_AUTO(TestGetSetAdoptText);
1412 TESTCASE_AUTO(TestIteration);
1413 #endif
1414 TESTCASE_AUTO(TestBuilder);
1415 TESTCASE_AUTO(TestQuoteGrouping);
1416 TESTCASE_AUTO(TestRuleStatusVec);
1417 TESTCASE_AUTO(TestBug2190);
1418 #if !UCONFIG_NO_FILE_IO
1419 TESTCASE_AUTO(TestRegistration);
1420 TESTCASE_AUTO(TestBoilerPlate);
1421 TESTCASE_AUTO(TestRuleStatus);
1422 TESTCASE_AUTO(TestRoundtripRules);
1423 TESTCASE_AUTO(TestGetBinaryRules);
1424 #endif
1425 TESTCASE_AUTO(TestRefreshInputText);
1426 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
1427 TESTCASE_AUTO(TestFilteredBreakIteratorBuilder);
1428 #endif
1429 TESTCASE_AUTO_END;
1430 }
1431
1432
1433 //---------------------------------------------
1434 //Internal subroutines
1435 //---------------------------------------------
1436
doBoundaryTest(BreakIterator & bi,UnicodeString & text,int32_t * boundaries)1437 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1438 logln((UnicodeString)"testIsBoundary():");
1439 int32_t p = 0;
1440 UBool isB;
1441 for (int32_t i = 0; i < text.length(); i++) {
1442 isB = bi.isBoundary(i);
1443 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1444
1445 if (i == boundaries[p]) {
1446 if (!isB)
1447 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1448 p++;
1449 }
1450 else {
1451 if (isB)
1452 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1453 }
1454 }
1455 }
doTest(UnicodeString & testString,int32_t start,int32_t gotoffset,int32_t expectedOffset,const char * expectedString)1456 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1457 UnicodeString selected;
1458 UnicodeString expected=CharsToUnicodeString(expectedString);
1459
1460 if(gotoffset != expectedOffset)
1461 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1462 if(start <= gotoffset){
1463 testString.extractBetween(start, gotoffset, selected);
1464 }
1465 else{
1466 testString.extractBetween(gotoffset, start, selected);
1467 }
1468 if(selected.compare(expected) != 0)
1469 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1470 else
1471 logln(prettify("****selected \"" + selected + "\""));
1472 }
1473
1474 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1475