1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1999-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************
7 * Date Name Description
8 * 12/14/99 Madhu Creation.
9 * 01/12/2000 Madhu updated for changed API
10 ********************************************************************/
11
12 #include "unicode/utypes.h"
13
14 #if !UCONFIG_NO_BREAK_ITERATION
15
16 #include "unicode/uchar.h"
17 #include "intltest.h"
18 #include "unicode/rbbi.h"
19 #include "unicode/schriter.h"
20 #include "rbbiapts.h"
21 #include "rbbidata.h"
22 #include "cstring.h"
23 #include "ubrkimpl.h"
24 #include "unicode/locid.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utext.h"
27 #include "cmemory.h"
28 #if !UCONFIG_NO_BREAK_ITERATION
29 #include "unicode/filteredbrk.h"
30 #include <stdio.h> // for sprintf
31 #endif
32 /**
33 * API Test the RuleBasedBreakIterator class
34 */
35
36
37 #define TEST_ASSERT_SUCCESS(status) UPRV_BLOCK_MACRO_BEGIN { \
38 if (U_FAILURE(status)) { \
39 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); \
40 } \
41 } UPRV_BLOCK_MACRO_END
42
43 #define TEST_ASSERT(expr) UPRV_BLOCK_MACRO_BEGIN { \
44 if ((expr) == false) { \
45 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr); \
46 } \
47 } UPRV_BLOCK_MACRO_END
48
TestCloneEquals()49 void RBBIAPITest::TestCloneEquals()
50 {
51
52 UErrorCode status=U_ZERO_ERROR;
53 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
54 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
55 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
56 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
57 if(U_FAILURE(status)){
58 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
59 return;
60 }
61
62
63 UnicodeString testString="Testing word break iterators's clone() and equals()";
64 bi1->setText(testString);
65 bi2->setText(testString);
66 biequal->setText(testString);
67
68 bi3->setText("hello");
69
70 logln((UnicodeString)"Testing equals()");
71
72 logln((UnicodeString)"Testing == and !=");
73 bool b = (*bi1 != *biequal);
74 b |= *bi1 == *bi2;
75 b |= *bi1 == *bi3;
76 if (b) {
77 errln("%s:%d ERROR:1 RBBI's == and != operator failed.", __FILE__, __LINE__);
78 }
79
80 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3)
81 errln("%s:%d ERROR:2 RBBI's == and != operator failed.", __FILE__, __LINE__);
82
83
84 // Quick test of RulesBasedBreakIterator assignment -
85 // Check that
86 // two different iterators are !=
87 // they are == after assignment
88 // source and dest iterator produce the same next() after assignment.
89 // deleting one doesn't disable the other.
90 logln("Testing assignment");
91 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
92 if(U_FAILURE(status)){
93 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
94 return;
95 }
96
97 RuleBasedBreakIterator biDefault, biDefault2;
98 if(U_FAILURE(status)){
99 errln("%s:%d FAIL : in construction of default iterator", __FILE__, __LINE__);
100 return;
101 }
102 if (biDefault == *bix) {
103 errln("%s:%d ERROR: iterators should not compare ==", __FILE__, __LINE__);
104 return;
105 }
106 if (biDefault != biDefault2) {
107 errln("%s:%d ERROR: iterators should compare ==", __FILE__, __LINE__);
108 return;
109 }
110
111
112 UnicodeString HelloString("Hello Kitty");
113 bix->setText(HelloString);
114 if (*bix == *bi2) {
115 errln("%s:%d ERROR: strings should not be equal before assignment.", __FILE__, __LINE__);
116 }
117 *bix = *bi2;
118 if (*bix != *bi2) {
119 errln("%s:%d ERROR: strings should be equal before assignment.", __FILE__, __LINE__);
120 }
121
122 int bixnext = bix->next();
123 int bi2next = bi2->next();
124 if (! (bixnext == bi2next && bixnext == 7)) {
125 errln("%s:%d ERROR: iterators behaved differently after assignment.", __FILE__, __LINE__);
126 }
127 delete bix;
128 if (bi2->next() != 8) {
129 errln("%s:%d ERROR: iterator.next() failed after deleting copy.", __FILE__, __LINE__);
130 }
131
132
133
134 logln((UnicodeString)"Testing clone()");
135 RuleBasedBreakIterator* bi1clone = bi1->clone();
136 RuleBasedBreakIterator* bi2clone = bi2->clone();
137
138 if(*bi1clone != *bi1 || *bi1clone != *biequal ||
139 *bi1clone == *bi3 || *bi1clone == *bi2)
140 errln("%s:%d ERROR:1 RBBI's clone() method failed", __FILE__, __LINE__);
141
142 if(*bi2clone == *bi1 || *bi2clone == *biequal ||
143 *bi2clone == *bi3 || *bi2clone != *bi2)
144 errln("%s:%d ERROR:2 RBBI's clone() method failed", __FILE__, __LINE__);
145
146 if(bi1->getText() != bi1clone->getText() ||
147 bi2clone->getText() != bi2->getText() ||
148 *bi2clone == *bi1clone )
149 errln("%s:%d ERROR: RBBI's clone() method failed", __FILE__, __LINE__);
150
151 delete bi1clone;
152 delete bi2clone;
153 delete bi1;
154 delete bi3;
155 delete bi2;
156 delete biequal;
157 }
158
TestBoilerPlate()159 void RBBIAPITest::TestBoilerPlate()
160 {
161 UErrorCode status = U_ZERO_ERROR;
162 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
163 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
164 if (U_FAILURE(status)) {
165 errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
166 return;
167 }
168 if(*a!=*b){
169 errln("Failed: boilerplate method operator!= does not return correct results");
170 }
171 // Japanese word break iterators are identical to root with
172 // a dictionary-based break iterator
173 BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
174 BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
175 if(c && d){
176 if(*c!=*d){
177 errln("Failed: boilerplate method operator== does not return correct results");
178 }
179 }else{
180 errln("creation of break iterator failed");
181 }
182 delete a;
183 delete b;
184 delete c;
185 delete d;
186 }
187
TestgetRules()188 void RBBIAPITest::TestgetRules()
189 {
190 UErrorCode status=U_ZERO_ERROR;
191
192 LocalPointer<RuleBasedBreakIterator> bi1(
193 (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status), status);
194 LocalPointer<RuleBasedBreakIterator> bi2(
195 (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status), status);
196 if(U_FAILURE(status)){
197 errcheckln(status, "%s:%d, FAIL: in construction - %s", __FILE__, __LINE__, u_errorName(status));
198 return;
199 }
200
201 logln((UnicodeString)"Testing getRules()");
202
203 UnicodeString text(u"Hello there");
204 bi1->setText(text);
205
206 LocalPointer <RuleBasedBreakIterator> bi3(bi1->clone());
207
208 UnicodeString temp=bi1->getRules();
209 UnicodeString temp2=bi2->getRules();
210 UnicodeString temp3=bi3->getRules();
211 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
212 errln("%s:%d ERROR: error in getRules() method", __FILE__, __LINE__);
213
214 RuleBasedBreakIterator bi4; // Default RuleBasedBreakIterator constructor gives empty shell with empty rules.
215 if (!bi4.getRules().isEmpty()) {
216 errln("%s:%d Empty string expected.", __FILE__, __LINE__);
217 }
218 }
219
TestHashCode()220 void RBBIAPITest::TestHashCode()
221 {
222 UErrorCode status=U_ZERO_ERROR;
223 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
224 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
225 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
226 if(U_FAILURE(status)){
227 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
228 delete bi1;
229 delete bi2;
230 delete bi3;
231 return;
232 }
233
234
235 logln((UnicodeString)"Testing hashCode()");
236
237 bi1->setText((UnicodeString)"Hash code");
238 bi2->setText((UnicodeString)"Hash code");
239 bi3->setText((UnicodeString)"Hash code");
240
241 RuleBasedBreakIterator* bi1clone= bi1->clone();
242 RuleBasedBreakIterator* bi2clone= bi2->clone();
243
244 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() ||
245 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
246 errln((UnicodeString)"ERROR: identical objects have different hashcodes");
247
248 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() ||
249 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
250 errln((UnicodeString)"ERROR: different objects have same hashcodes");
251
252 delete bi1clone;
253 delete bi2clone;
254 delete bi1;
255 delete bi2;
256 delete bi3;
257
258 }
TestGetSetAdoptText()259 void RBBIAPITest::TestGetSetAdoptText()
260 {
261 logln((UnicodeString)"Testing getText setText ");
262 IcuTestErrorCode status(*this, "TestGetSetAdoptText");
263 UnicodeString str1="first string.";
264 UnicodeString str2="Second string.";
265 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
266 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
267 if(status.isFailure()){
268 errcheckln(status, "Fail : in construction - %s", status.errorName());
269 return;
270 }
271
272
273 CharacterIterator* text1= new StringCharacterIterator(str1);
274 CharacterIterator* text1Clone = text1->clone();
275 CharacterIterator* text2= new StringCharacterIterator(str2);
276 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str"
277
278 wordIter1->setText(str1);
279 CharacterIterator *tci = &wordIter1->getText();
280 UnicodeString tstr;
281 tci->getText(tstr);
282 TEST_ASSERT(tstr == str1);
283 if(wordIter1->current() != 0)
284 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
285
286 wordIter1->next(2);
287
288 wordIter1->setText(str2);
289 if(wordIter1->current() != 0)
290 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
291
292
293 charIter1->adoptText(text1Clone);
294 TEST_ASSERT(wordIter1->getText() != charIter1->getText());
295 tci = &wordIter1->getText();
296 tci->getText(tstr);
297 TEST_ASSERT(tstr == str2);
298 tci = &charIter1->getText();
299 tci->getText(tstr);
300 TEST_ASSERT(tstr == str1);
301
302
303 LocalPointer<RuleBasedBreakIterator> rb(wordIter1->clone());
304 rb->adoptText(text1);
305 if(rb->getText() != *text1)
306 errln((UnicodeString)"ERROR:1 error in adoptText ");
307 rb->adoptText(text2);
308 if(rb->getText() != *text2)
309 errln((UnicodeString)"ERROR:2 error in adoptText ");
310
311 // Adopt where iterator range is less than the entire original source string.
312 // (With the change of the break engine to working with UText internally,
313 // CharacterIterators starting at positions other than zero are not supported)
314 rb->adoptText(text3);
315 TEST_ASSERT(rb->preceding(2) == 0);
316 TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
317 //if(rb->preceding(2) != 3) {
318 // errln((UnicodeString)"ERROR:3 error in adoptText ");
319 //}
320 //if(rb->following(11) != BreakIterator::DONE) {
321 // errln((UnicodeString)"ERROR:4 error in adoptText ");
322 //}
323
324 // UText API
325 //
326 // Quick test to see if UText is working at all.
327 //
328 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
329 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
330 // 012345678901
331
332 status.reset();
333 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
334 wordIter1->setText(ut.getAlias(), status);
335 TEST_ASSERT_SUCCESS(status);
336
337 int32_t pos;
338 pos = wordIter1->first();
339 TEST_ASSERT(pos==0);
340 pos = wordIter1->next();
341 TEST_ASSERT(pos==5);
342 pos = wordIter1->next();
343 TEST_ASSERT(pos==6);
344 pos = wordIter1->next();
345 TEST_ASSERT(pos==11);
346 pos = wordIter1->next();
347 TEST_ASSERT(pos==UBRK_DONE);
348
349 status.reset();
350 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
351 TEST_ASSERT_SUCCESS(status);
352 wordIter1->setText(ut2.getAlias(), status);
353 TEST_ASSERT_SUCCESS(status);
354
355 pos = wordIter1->first();
356 TEST_ASSERT(pos==0);
357 pos = wordIter1->next();
358 TEST_ASSERT(pos==3);
359 pos = wordIter1->next();
360 TEST_ASSERT(pos==4);
361
362 pos = wordIter1->last();
363 TEST_ASSERT(pos==6);
364 pos = wordIter1->previous();
365 TEST_ASSERT(pos==4);
366 pos = wordIter1->previous();
367 TEST_ASSERT(pos==3);
368 pos = wordIter1->previous();
369 TEST_ASSERT(pos==0);
370 pos = wordIter1->previous();
371 TEST_ASSERT(pos==UBRK_DONE);
372
373 status.reset();
374 UnicodeString sEmpty;
375 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
376 wordIter1->getUText(gut2.getAlias(), status);
377 TEST_ASSERT_SUCCESS(status);
378 status.reset();
379 }
380
381
TestIteration()382 void RBBIAPITest::TestIteration()
383 {
384 // This test just verifies that the API is present.
385 // Testing for correct operation of the break rules happens elsewhere.
386
387 UErrorCode status=U_ZERO_ERROR;
388 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
389 if (U_FAILURE(status) || bi == NULL) {
390 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
391 }
392 delete bi;
393
394 status=U_ZERO_ERROR;
395 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
396 if (U_FAILURE(status) || bi == NULL) {
397 errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status));
398 }
399 delete bi;
400
401 status=U_ZERO_ERROR;
402 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
403 if (U_FAILURE(status) || bi == NULL) {
404 errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status));
405 }
406 delete bi;
407
408 status=U_ZERO_ERROR;
409 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
410 if (U_FAILURE(status) || bi == NULL) {
411 errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status));
412 }
413 delete bi;
414
415 status=U_ZERO_ERROR;
416 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
417 if (U_FAILURE(status) || bi == NULL) {
418 errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status));
419 }
420 delete bi;
421
422 status=U_ZERO_ERROR;
423 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
424 if (U_FAILURE(status) || bi == NULL) {
425 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
426 return; // Skip the rest of these tests.
427 }
428
429
430 UnicodeString testString="0123456789";
431 bi->setText(testString);
432
433 int32_t i;
434 i = bi->first();
435 if (i != 0) {
436 errln("%s:%d Incorrect value from bi->first(). Expected 0, got %d.", __FILE__, __LINE__, i);
437 }
438
439 i = bi->last();
440 if (i != 10) {
441 errln("%s:%d Incorrect value from bi->last(). Expected 10, got %d", __FILE__, __LINE__, i);
442 }
443
444 //
445 // Previous
446 //
447 bi->last();
448 i = bi->previous();
449 if (i != 9) {
450 errln("%s:%d Incorrect value from bi->last(). Expected 9, got %d", __FILE__, __LINE__, i);
451 }
452
453
454 bi->first();
455 i = bi->previous();
456 if (i != BreakIterator::DONE) {
457 errln("%s:%d Incorrect value from bi->previous(). Expected DONE, got %d", __FILE__, __LINE__, i);
458 }
459
460 //
461 // next()
462 //
463 bi->first();
464 i = bi->next();
465 if (i != 1) {
466 errln("%s:%d Incorrect value from bi->next(). Expected 1, got %d", __FILE__, __LINE__, i);
467 }
468
469 bi->last();
470 i = bi->next();
471 if (i != BreakIterator::DONE) {
472 errln("%s:%d Incorrect value from bi->next(). Expected DONE, got %d", __FILE__, __LINE__, i);
473 }
474
475
476 //
477 // current()
478 //
479 bi->first();
480 i = bi->current();
481 if (i != 0) {
482 errln("%s:%d Incorrect value from bi->current(). Expected 0, got %d", __FILE__, __LINE__, i);
483 }
484
485 bi->next();
486 i = bi->current();
487 if (i != 1) {
488 errln("%s:%d Incorrect value from bi->current(). Expected 1, got %d", __FILE__, __LINE__, i);
489 }
490
491 bi->last();
492 bi->next();
493 i = bi->current();
494 if (i != 10) {
495 errln("%s:%d Incorrect value from bi->current(). Expected 10, got %d", __FILE__, __LINE__, i);
496 }
497
498 bi->first();
499 bi->previous();
500 i = bi->current();
501 if (i != 0) {
502 errln("%s:%d Incorrect value from bi->current(). Expected 0, got %d", __FILE__, __LINE__, i);
503 }
504
505
506 //
507 // Following()
508 //
509 i = bi->following(4);
510 if (i != 5) {
511 errln("%s:%d Incorrect value from bi->following(). Expected 5, got %d", __FILE__, __LINE__, i);
512 }
513
514 i = bi->following(9);
515 if (i != 10) {
516 errln("%s:%d Incorrect value from bi->following(). Expected 10, got %d", __FILE__, __LINE__, i);
517 }
518
519 i = bi->following(10);
520 if (i != BreakIterator::DONE) {
521 errln("%s:%d Incorrect value from bi->following(). Expected DONE, got %d", __FILE__, __LINE__, i);
522 }
523
524
525 //
526 // Preceding
527 //
528 i = bi->preceding(4);
529 if (i != 3) {
530 errln("%s:%d Incorrect value from bi->preceding(). Expected 3, got %d", __FILE__, __LINE__, i);
531 }
532
533 i = bi->preceding(10);
534 if (i != 9) {
535 errln("%s:%d Incorrect value from bi->preceding(). Expected 9, got %d", __FILE__, __LINE__, i);
536 }
537
538 i = bi->preceding(1);
539 if (i != 0) {
540 errln("%s:%d Incorrect value from bi->preceding(). Expected 0, got %d", __FILE__, __LINE__, i);
541 }
542
543 i = bi->preceding(0);
544 if (i != BreakIterator::DONE) {
545 errln("%s:%d Incorrect value from bi->preceding(). Expected DONE, got %d", __FILE__, __LINE__, i);
546 }
547
548
549 //
550 // isBoundary()
551 //
552 bi->first();
553 if (bi->isBoundary(3) != true) {
554 errln("%s:%d Incorrect value from bi->isBoundary(). Expected true, got false", __FILE__, __LINE__, i);
555 }
556 i = bi->current();
557 if (i != 3) {
558 errln("%s:%d Incorrect value from bi->current(). Expected 3, got %d", __FILE__, __LINE__, i);
559 }
560
561
562 if (bi->isBoundary(11) != false) {
563 errln("%s:%d Incorrect value from bi->isBoundary(). Expected false, got true", __FILE__, __LINE__, i);
564 }
565 i = bi->current();
566 if (i != 10) {
567 errln("%s:%d Incorrect value from bi->current(). Expected 10, got %d", __FILE__, __LINE__, i);
568 }
569
570 //
571 // next(n)
572 //
573 bi->first();
574 i = bi->next(4);
575 if (i != 4) {
576 errln("%s:%d Incorrect value from bi->next(). Expected 4, got %d", __FILE__, __LINE__, i);
577 }
578
579 i = bi->next(6);
580 if (i != 10) {
581 errln("%s:%d Incorrect value from bi->next(). Expected 10, got %d", __FILE__, __LINE__, i);
582 }
583
584 bi->first();
585 i = bi->next(11);
586 if (i != BreakIterator::DONE) {
587 errln("%s:%d Incorrect value from bi->next(). Expected BreakIterator::DONE, got %d", __FILE__, __LINE__, i);
588 }
589
590 delete bi;
591
592 }
593
594
595
596
597
598
TestBuilder()599 void RBBIAPITest::TestBuilder() {
600 UnicodeString rulesString1 = "$Letters = [:L:];\n"
601 "$Numbers = [:N:];\n"
602 "$Letters+;\n"
603 "$Numbers+;\n"
604 "[^$Letters $Numbers];\n"
605 "!.*;\n";
606 UnicodeString testString1 = "abc123..abc";
607 // 01234567890
608 int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
609 UErrorCode status=U_ZERO_ERROR;
610 UParseError parseError;
611
612 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
613 if(U_FAILURE(status)) {
614 dataerrln("Fail : in construction - %s", u_errorName(status));
615 } else {
616 bi->setText(testString1);
617 doBoundaryTest(*bi, testString1, bounds1);
618 }
619 delete bi;
620 }
621
622
623 //
624 // TestQuoteGrouping
625 // Single quotes within rules imply a grouping, so that a modifier
626 // following the quoted text (* or +) applies to all of the quoted chars.
627 //
TestQuoteGrouping()628 void RBBIAPITest::TestQuoteGrouping() {
629 UnicodeString rulesString1 = "#Here comes the rule...\n"
630 "'$@!'*;\n" // (\$\@\!)*
631 ".;\n";
632
633 UnicodeString testString1 = "$@!$@!X$@!!X";
634 // 0123456789012
635 int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
636 UErrorCode status=U_ZERO_ERROR;
637 UParseError parseError;
638
639 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
640 if(U_FAILURE(status)) {
641 dataerrln("Fail : in construction - %s", u_errorName(status));
642 } else {
643 bi->setText(testString1);
644 doBoundaryTest(*bi, testString1, bounds1);
645 }
646 delete bi;
647 }
648
649 //
650 // TestRuleStatus
651 // Test word break rule status constants.
652 //
TestRuleStatus()653 void RBBIAPITest::TestRuleStatus() {
654 UChar str[30];
655 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
656 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
657 u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
658 // 012345678901234567 8 9 0
659 // Katakana
660 str, 30);
661 UnicodeString testString1(str);
662 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
663 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,
664 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,
665 UBRK_WORD_IDEO, UBRK_WORD_NONE};
666
667 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
668 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
669 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
670
671 UErrorCode status=U_ZERO_ERROR;
672
673 BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
674 if(U_FAILURE(status)) {
675 errcheckln(status, "%s:%d Fail in construction - %s", __FILE__, __LINE__, u_errorName(status));
676 } else {
677 bi->setText(testString1);
678 // First test that the breaks are in the right spots.
679 doBoundaryTest(*bi, testString1, bounds1);
680
681 // Then go back and check tag values
682 int32_t i = 0;
683 int32_t pos, tag;
684 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
685 if (pos != bounds1[i]) {
686 errln("%s:%d FAIL: unexpected word break at position %d", __FILE__, __LINE__, pos);
687 break;
688 }
689 tag = bi->getRuleStatus();
690 if (tag < tag_lo[i] || tag >= tag_hi[i]) {
691 errln("%s:%d FAIL: incorrect tag value %d at position %d", __FILE__, __LINE__, tag, pos);
692 break;
693 }
694
695 // Check that we get the same tag values from getRuleStatusVec()
696 int32_t vec[10];
697 int t = bi->getRuleStatusVec(vec, 10, status);
698 TEST_ASSERT_SUCCESS(status);
699 TEST_ASSERT(t==1);
700 TEST_ASSERT(vec[0] == tag);
701 }
702 }
703 delete bi;
704
705 // Now test line break status. This test mostly is to confirm that the status constants
706 // are correctly declared in the header.
707 testString1 = "test line. \n";
708 // break type s s h
709
710 bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
711 if(U_FAILURE(status)) {
712 errcheckln(status, "%s:%d failed to create line break iterator. - %s", __FILE__, __LINE__, u_errorName(status));
713 } else {
714 int32_t i = 0;
715 int32_t pos, tag;
716 UBool success;
717
718 bi->setText(testString1);
719 pos = bi->current();
720 tag = bi->getRuleStatus();
721 for (i=0; i<3; i++) {
722 switch (i) {
723 case 0:
724 success = pos==0 && tag==UBRK_LINE_SOFT; break;
725 case 1:
726 success = pos==5 && tag==UBRK_LINE_SOFT; break;
727 case 2:
728 success = pos==12 && tag==UBRK_LINE_HARD; break;
729 default:
730 success = false; break;
731 }
732 if (success == false) {
733 errln("%s:%d: incorrect line break status or position. i=%d, pos=%d, tag=%d",
734 __FILE__, __LINE__, i, pos, tag);
735 break;
736 }
737 pos = bi->next();
738 tag = bi->getRuleStatus();
739 }
740 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
741 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
742 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
743 errln("%s:%d UBRK_LINE_* constants from header are inconsistent.", __FILE__, __LINE__);
744 }
745 }
746 delete bi;
747
748 }
749
750
751 //
752 // TestRuleStatusVec
753 // Test the vector form of break rule status.
754 //
TestRuleStatusVec()755 void RBBIAPITest::TestRuleStatusVec() {
756 UnicodeString rulesString( "[A-N]{100}; \n"
757 "[a-w]{200}; \n"
758 "[\\p{L}]{300}; \n"
759 "[\\p{N}]{400}; \n"
760 "[0-5]{500}; \n"
761 "!.*;\n", -1, US_INV);
762 UnicodeString testString1 = "Aapz5?";
763 int32_t statusVals[10];
764 int32_t numStatuses;
765 int32_t pos;
766
767 UErrorCode status=U_ZERO_ERROR;
768 UParseError parseError;
769
770 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
771 if (U_FAILURE(status)) {
772 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
773 } else {
774 bi->setText(testString1);
775
776 // A
777 pos = bi->next();
778 TEST_ASSERT(pos==1);
779 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
780 TEST_ASSERT_SUCCESS(status);
781 TEST_ASSERT(numStatuses == 2);
782 TEST_ASSERT(statusVals[0] == 100);
783 TEST_ASSERT(statusVals[1] == 300);
784
785 // a
786 pos = bi->next();
787 TEST_ASSERT(pos==2);
788 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
789 TEST_ASSERT_SUCCESS(status);
790 TEST_ASSERT(numStatuses == 2);
791 TEST_ASSERT(statusVals[0] == 200);
792 TEST_ASSERT(statusVals[1] == 300);
793
794 // p
795 pos = bi->next();
796 TEST_ASSERT(pos==3);
797 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
798 TEST_ASSERT_SUCCESS(status);
799 TEST_ASSERT(numStatuses == 2);
800 TEST_ASSERT(statusVals[0] == 200);
801 TEST_ASSERT(statusVals[1] == 300);
802
803 // z
804 pos = bi->next();
805 TEST_ASSERT(pos==4);
806 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
807 TEST_ASSERT_SUCCESS(status);
808 TEST_ASSERT(numStatuses == 1);
809 TEST_ASSERT(statusVals[0] == 300);
810
811 // 5
812 pos = bi->next();
813 TEST_ASSERT(pos==5);
814 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
815 TEST_ASSERT_SUCCESS(status);
816 TEST_ASSERT(numStatuses == 2);
817 TEST_ASSERT(statusVals[0] == 400);
818 TEST_ASSERT(statusVals[1] == 500);
819
820 // ?
821 pos = bi->next();
822 TEST_ASSERT(pos==6);
823 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
824 TEST_ASSERT_SUCCESS(status);
825 TEST_ASSERT(numStatuses == 1);
826 TEST_ASSERT(statusVals[0] == 0);
827
828 //
829 // Check buffer overflow error handling. Char == A
830 //
831 bi->first();
832 pos = bi->next();
833 TEST_ASSERT(pos==1);
834 memset(statusVals, -1, sizeof(statusVals));
835 numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
836 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
837 TEST_ASSERT(numStatuses == 2);
838 TEST_ASSERT(statusVals[0] == -1);
839
840 status = U_ZERO_ERROR;
841 memset(statusVals, -1, sizeof(statusVals));
842 numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
843 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
844 TEST_ASSERT(numStatuses == 2);
845 TEST_ASSERT(statusVals[0] == 100);
846 TEST_ASSERT(statusVals[1] == -1);
847
848 status = U_ZERO_ERROR;
849 memset(statusVals, -1, sizeof(statusVals));
850 numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
851 TEST_ASSERT_SUCCESS(status);
852 TEST_ASSERT(numStatuses == 2);
853 TEST_ASSERT(statusVals[0] == 100);
854 TEST_ASSERT(statusVals[1] == 300);
855 TEST_ASSERT(statusVals[2] == -1);
856 }
857 delete bi;
858
859 }
860
861 //
862 // Bug 2190 Regression test. Builder crash on rule consisting of only a
863 // $variable reference
TestBug2190()864 void RBBIAPITest::TestBug2190() {
865 UnicodeString rulesString1 = "$aaa = abcd;\n"
866 "$bbb = $aaa;\n"
867 "$bbb;\n";
868 UnicodeString testString1 = "abcdabcd";
869 // 01234567890
870 int32_t bounds1[] = {0, 4, 8};
871 UErrorCode status=U_ZERO_ERROR;
872 UParseError parseError;
873
874 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
875 if(U_FAILURE(status)) {
876 dataerrln("Fail : in construction - %s", u_errorName(status));
877 } else {
878 bi->setText(testString1);
879 doBoundaryTest(*bi, testString1, bounds1);
880 }
881 delete bi;
882 }
883
884
TestRegistration()885 void RBBIAPITest::TestRegistration() {
886 #if !UCONFIG_NO_SERVICE
887 UErrorCode status = U_ZERO_ERROR;
888 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
889 // ok to not delete these if we exit because of error?
890 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
891 BreakIterator* root_word = BreakIterator::createWordInstance("", status);
892 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
893
894 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
895 dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
896
897 delete ja_word;
898 delete ja_char;
899 delete root_word;
900 delete root_char;
901
902 return;
903 }
904
905 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
906 {
907 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
908 if (ja_word && *ja_word == *root_word) {
909 errln("japan not different from root");
910 }
911 #endif
912 }
913
914 {
915 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
916 UBool fail = true;
917 if(result){
918 fail = *result != *ja_word;
919 }
920 delete result;
921 if (fail) {
922 errln("bad result for xx_XX/word");
923 }
924 }
925
926 {
927 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
928 UBool fail = true;
929 if(result){
930 fail = *result != *ja_char;
931 }
932 delete result;
933 if (fail) {
934 errln("bad result for ja_JP/char");
935 }
936 }
937
938 {
939 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
940 UBool fail = true;
941 if(result){
942 fail = *result != *root_char;
943 }
944 delete result;
945 if (fail) {
946 errln("bad result for xx_XX/char");
947 }
948 }
949
950 {
951 StringEnumeration* avail = BreakIterator::getAvailableLocales();
952 UBool found = false;
953 const UnicodeString* p;
954 while ((p = avail->snext(status))) {
955 if (p->compare("xx") == 0) {
956 found = true;
957 break;
958 }
959 }
960 delete avail;
961 if (!found) {
962 errln("did not find test locale");
963 }
964 }
965
966 {
967 UBool unreg = BreakIterator::unregister(key, status);
968 if (!unreg) {
969 errln("unable to unregister");
970 }
971 }
972
973 {
974 BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
975 BreakIterator* root = BreakIterator::createWordInstance("", status);
976 UBool fail = true;
977 if(root){
978 fail = *root != *result;
979 }
980 delete root;
981 delete result;
982 if (fail) {
983 errln("did not get root break");
984 }
985 }
986
987 {
988 StringEnumeration* avail = BreakIterator::getAvailableLocales();
989 UBool found = false;
990 const UnicodeString* p;
991 while ((p = avail->snext(status))) {
992 if (p->compare("xx") == 0) {
993 found = true;
994 break;
995 }
996 }
997 delete avail;
998 if (found) {
999 errln("found test locale");
1000 }
1001 }
1002
1003 {
1004 int32_t count;
1005 UBool foundLocale = false;
1006 const Locale *avail = BreakIterator::getAvailableLocales(count);
1007 for (int i=0; i<count; i++) {
1008 if (avail[i] == Locale::getEnglish()) {
1009 foundLocale = true;
1010 break;
1011 }
1012 }
1013 if (foundLocale == false) {
1014 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1015 }
1016 }
1017
1018
1019 // ja_word was adopted by factory
1020 delete ja_char;
1021 delete root_word;
1022 delete root_char;
1023 #endif
1024 }
1025
RoundtripRule(const char * dataFile)1026 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1027 UErrorCode status = U_ZERO_ERROR;
1028 UParseError parseError;
1029 parseError.line = 0;
1030 parseError.offset = 0;
1031 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1032 uint32_t length;
1033 const char *builtSource;
1034 const uint8_t *rbbiRules;
1035 const uint8_t *builtRules;
1036
1037 if (U_FAILURE(status)) {
1038 errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status));
1039 return;
1040 }
1041
1042 builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1043 builtSource = (const char *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1044 LocalPointer<RuleBasedBreakIterator> brkItr (new RuleBasedBreakIterator(builtSource, parseError, status));
1045 if (U_FAILURE(status)) {
1046 errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
1047 __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset);
1048 errln(UnicodeString(builtSource));
1049 return;
1050 }
1051 rbbiRules = brkItr->getBinaryRules(length);
1052 logln("Comparing \"%s\" len=%d", dataFile, length);
1053 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1054 errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile);
1055 return;
1056 }
1057 }
1058
TestRoundtripRules()1059 void RBBIAPITest::TestRoundtripRules() {
1060 RoundtripRule("word");
1061 RoundtripRule("title");
1062 RoundtripRule("sent");
1063 RoundtripRule("line");
1064 RoundtripRule("char");
1065 if (!quick) {
1066 RoundtripRule("word_POSIX");
1067 }
1068 }
1069
1070
1071 // Check getBinaryRules() and construction of a break iterator from those rules.
1072
TestGetBinaryRules()1073 void RBBIAPITest::TestGetBinaryRules() {
1074 UErrorCode status=U_ZERO_ERROR;
1075 LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
1076 if (U_FAILURE(status)) {
1077 dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status));
1078 return;
1079 }
1080 RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
1081 if (rbbi == NULL) {
1082 dataerrln("FAIL: RuleBasedBreakIterator is NULL");
1083 return;
1084 }
1085
1086 // Check that the new line break iterator is nominally functional.
1087 UnicodeString helloWorld("Hello, World!");
1088 rbbi->setText(helloWorld);
1089 int n = 0;
1090 while (bi->next() != UBRK_DONE) {
1091 ++n;
1092 }
1093 TEST_ASSERT(n == 2);
1094
1095 // Extract the binary rules as a uint8_t blob.
1096 uint32_t ruleLength;
1097 const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
1098 TEST_ASSERT(ruleLength > 0);
1099 TEST_ASSERT(binRules != NULL);
1100
1101 // Clone the binary rules, and create a break iterator from that.
1102 // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
1103 uint8_t *clonedRules = new uint8_t[ruleLength];
1104 memcpy(clonedRules, binRules, ruleLength);
1105 RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
1106 TEST_ASSERT_SUCCESS(status);
1107
1108 // Check that the cloned line break iterator is nominally alive.
1109 clonedBI.setText(helloWorld);
1110 n = 0;
1111 while (clonedBI.next() != UBRK_DONE) {
1112 ++n;
1113 }
1114 TEST_ASSERT(n == 2);
1115
1116 delete[] clonedRules;
1117 }
1118
1119
TestRefreshInputText()1120 void RBBIAPITest::TestRefreshInputText() {
1121 /*
1122 * RefreshInput changes out the input of a Break Iterator without
1123 * changing anything else in the iterator's state. Used with Java JNI,
1124 * when Java moves the underlying string storage. This test
1125 * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1126 * The right set of boundaries should still be found.
1127 */
1128 UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
1129 UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
1130 UErrorCode status = U_ZERO_ERROR;
1131 UText ut1 = UTEXT_INITIALIZER;
1132 UText ut2 = UTEXT_INITIALIZER;
1133 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
1134 TEST_ASSERT_SUCCESS(status);
1135
1136 utext_openUChars(&ut1, testStr, -1, &status);
1137 TEST_ASSERT_SUCCESS(status);
1138
1139 if (U_SUCCESS(status)) {
1140 bi->setText(&ut1, status);
1141 TEST_ASSERT_SUCCESS(status);
1142
1143 /* Line boundaries will occur before each letter in the original string */
1144 TEST_ASSERT(1 == bi->next());
1145 TEST_ASSERT(3 == bi->next());
1146
1147 /* Move the string, kill the original string. */
1148 u_strcpy(movedStr, testStr);
1149 u_memset(testStr, 0x20, u_strlen(testStr));
1150 utext_openUChars(&ut2, movedStr, -1, &status);
1151 TEST_ASSERT_SUCCESS(status);
1152 RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1153 TEST_ASSERT_SUCCESS(status);
1154 TEST_ASSERT(bi == returnedBI);
1155
1156 /* Find the following matches, now working in the moved string. */
1157 TEST_ASSERT(5 == bi->next());
1158 TEST_ASSERT(7 == bi->next());
1159 TEST_ASSERT(8 == bi->next());
1160 TEST_ASSERT(UBRK_DONE == bi->next());
1161
1162 utext_close(&ut1);
1163 utext_close(&ut2);
1164 }
1165 delete bi;
1166
1167 }
1168
1169 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
prtbrks(BreakIterator * brk,const UnicodeString & ustr,IntlTest & it)1170 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1171 static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
1172 it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1173
1174 int32_t *pos = new int32_t[ustr.length()];
1175 int32_t posCount = 0;
1176
1177 // calculate breaks up front, so we can print out
1178 // sans any debugging
1179 for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1180 pos[posCount++] = n;
1181 if(posCount>=ustr.length()) {
1182 it.errln("brk count exceeds string length!");
1183 return;
1184 }
1185 }
1186 UnicodeString out;
1187 out.append((UChar)CHSTR);
1188 int32_t prev = 0;
1189 for(int32_t i=0;i<posCount;i++) {
1190 int32_t n=pos[i];
1191 out.append(ustr.tempSubString(prev,n-prev));
1192 out.append((UChar)PILCROW);
1193 prev=n;
1194 }
1195 out.append(ustr.tempSubString(prev,ustr.length()-prev));
1196 out.append((UChar)CHEND);
1197 it.logln(out);
1198
1199 out.remove();
1200 for(int32_t i=0;i<posCount;i++) {
1201 char tmp[100];
1202 sprintf(tmp,"%d ",pos[i]);
1203 out.append(UnicodeString(tmp));
1204 }
1205 it.logln(out);
1206 delete [] pos;
1207 }
1208 #endif
1209
TestFilteredBreakIteratorBuilder()1210 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1211 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1212 UErrorCode status = U_ZERO_ERROR;
1213 LocalPointer<FilteredBreakIteratorBuilder> builder;
1214 LocalPointer<BreakIterator> baseBI;
1215 LocalPointer<BreakIterator> filteredBI;
1216 LocalPointer<BreakIterator> frenchBI;
1217
1218 const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1219 const UnicodeString ABBR_MR("Mr.");
1220 const UnicodeString ABBR_CAPT("Capt.");
1221
1222 {
1223 logln("Constructing empty builder\n");
1224 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1225 TEST_ASSERT_SUCCESS(status);
1226
1227 logln("Constructing base BI\n");
1228 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1229 TEST_ASSERT_SUCCESS(status);
1230
1231 logln("Building new BI\n");
1232 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1233 TEST_ASSERT_SUCCESS(status);
1234
1235 if (U_SUCCESS(status)) {
1236 logln("Testing:");
1237 filteredBI->setText(text);
1238 TEST_ASSERT(20 == filteredBI->next()); // Mr.
1239 TEST_ASSERT(84 == filteredBI->next()); // recovered.
1240 TEST_ASSERT(90 == filteredBI->next()); // Capt.
1241 TEST_ASSERT(181 == filteredBI->next()); // Mr.
1242 TEST_ASSERT(278 == filteredBI->next()); // charge.
1243 filteredBI->first();
1244 prtbrks(filteredBI.getAlias(), text, *this);
1245 }
1246 }
1247
1248 {
1249 logln("Constructing empty builder\n");
1250 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1251 TEST_ASSERT_SUCCESS(status);
1252
1253 if (U_SUCCESS(status)) {
1254 logln("Adding Mr. as an exception\n");
1255 TEST_ASSERT(true == builder->suppressBreakAfter(ABBR_MR, status));
1256 TEST_ASSERT(false == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
1257 TEST_ASSERT(true == builder->unsuppressBreakAfter(ABBR_MR, status));
1258 TEST_ASSERT(false == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1259 TEST_ASSERT(true == builder->suppressBreakAfter(ABBR_MR, status));
1260 TEST_ASSERT_SUCCESS(status);
1261
1262 logln("Constructing base BI\n");
1263 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1264 TEST_ASSERT_SUCCESS(status);
1265
1266 logln("Building new BI\n");
1267 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1268 TEST_ASSERT_SUCCESS(status);
1269
1270 logln("Testing:");
1271 filteredBI->setText(text);
1272 TEST_ASSERT(84 == filteredBI->next());
1273 TEST_ASSERT(90 == filteredBI->next());// Capt.
1274 TEST_ASSERT(278 == filteredBI->next());
1275 filteredBI->first();
1276 prtbrks(filteredBI.getAlias(), text, *this);
1277 }
1278 }
1279
1280
1281 {
1282 logln("Constructing empty builder\n");
1283 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1284 TEST_ASSERT_SUCCESS(status);
1285
1286 if (U_SUCCESS(status)) {
1287 logln("Adding Mr. and Capt as an exception\n");
1288 TEST_ASSERT(true == builder->suppressBreakAfter(ABBR_MR, status));
1289 TEST_ASSERT(true == builder->suppressBreakAfter(ABBR_CAPT, status));
1290 TEST_ASSERT_SUCCESS(status);
1291
1292 logln("Constructing base BI\n");
1293 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1294 TEST_ASSERT_SUCCESS(status);
1295
1296 logln("Building new BI\n");
1297 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1298 TEST_ASSERT_SUCCESS(status);
1299
1300 logln("Testing:");
1301 filteredBI->setText(text);
1302 TEST_ASSERT(84 == filteredBI->next());
1303 TEST_ASSERT(278 == filteredBI->next());
1304 filteredBI->first();
1305 prtbrks(filteredBI.getAlias(), text, *this);
1306 }
1307 }
1308
1309
1310 {
1311 logln("Constructing English builder\n");
1312 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1313 TEST_ASSERT_SUCCESS(status);
1314
1315 logln("Constructing base BI\n");
1316 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1317 TEST_ASSERT_SUCCESS(status);
1318
1319 if (U_SUCCESS(status)) {
1320 logln("unsuppressing 'Capt'");
1321 TEST_ASSERT(true == builder->unsuppressBreakAfter(ABBR_CAPT, status));
1322
1323 logln("Building new BI\n");
1324 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1325 TEST_ASSERT_SUCCESS(status);
1326
1327 if(filteredBI.isValid()) {
1328 logln("Testing:");
1329 filteredBI->setText(text);
1330 TEST_ASSERT(84 == filteredBI->next());
1331 TEST_ASSERT(90 == filteredBI->next());
1332 TEST_ASSERT(278 == filteredBI->next());
1333 filteredBI->first();
1334 prtbrks(filteredBI.getAlias(), text, *this);
1335 }
1336 }
1337 }
1338
1339
1340 {
1341 logln("Constructing English builder\n");
1342 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1343 TEST_ASSERT_SUCCESS(status);
1344
1345 logln("Constructing base BI\n");
1346 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1347 TEST_ASSERT_SUCCESS(status);
1348
1349 if (U_SUCCESS(status)) {
1350 logln("Building new BI\n");
1351 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1352 TEST_ASSERT_SUCCESS(status);
1353
1354 if(filteredBI.isValid()) {
1355 logln("Testing:");
1356 filteredBI->setText(text);
1357 TEST_ASSERT(84 == filteredBI->next());
1358 TEST_ASSERT(278 == filteredBI->next());
1359 filteredBI->first();
1360 prtbrks(filteredBI.getAlias(), text, *this);
1361 }
1362 }
1363 }
1364
1365 // reenable once french is in
1366 {
1367 logln("Constructing French builder");
1368 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
1369 TEST_ASSERT_SUCCESS(status);
1370
1371 logln("Constructing base BI\n");
1372 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
1373 TEST_ASSERT_SUCCESS(status);
1374
1375 if (U_SUCCESS(status)) {
1376 logln("Building new BI\n");
1377 frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
1378 TEST_ASSERT_SUCCESS(status);
1379 }
1380
1381 if(frenchBI.isValid()) {
1382 logln("Testing:");
1383 UnicodeString frText("C'est MM. Duval.");
1384 frenchBI->setText(frText);
1385 TEST_ASSERT(16 == frenchBI->next());
1386 TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
1387 frenchBI->first();
1388 prtbrks(frenchBI.getAlias(), frText, *this);
1389 logln("Testing against English:");
1390 filteredBI->setText(frText);
1391 TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
1392 TEST_ASSERT(16 == filteredBI->next());
1393 TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
1394 filteredBI->first();
1395 prtbrks(filteredBI.getAlias(), frText, *this);
1396
1397 // Verify ==
1398 assertTrue(WHERE, *frenchBI == *frenchBI);
1399 assertTrue(WHERE, *filteredBI != *frenchBI);
1400 assertTrue(WHERE, *frenchBI != *filteredBI);
1401 } else {
1402 dataerrln("French BI: not valid.");
1403 }
1404 }
1405
1406 #else
1407 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1408 #endif
1409 }
1410
1411 //---------------------------------------------
1412 // runIndexedTest
1413 //---------------------------------------------
1414
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1415 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1416 {
1417 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1418 TESTCASE_AUTO_BEGIN;
1419 #if !UCONFIG_NO_FILE_IO
1420 TESTCASE_AUTO(TestCloneEquals);
1421 TESTCASE_AUTO(TestgetRules);
1422 TESTCASE_AUTO(TestHashCode);
1423 TESTCASE_AUTO(TestGetSetAdoptText);
1424 TESTCASE_AUTO(TestIteration);
1425 #endif
1426 TESTCASE_AUTO(TestBuilder);
1427 TESTCASE_AUTO(TestQuoteGrouping);
1428 TESTCASE_AUTO(TestRuleStatusVec);
1429 TESTCASE_AUTO(TestBug2190);
1430 #if !UCONFIG_NO_FILE_IO
1431 TESTCASE_AUTO(TestRegistration);
1432 TESTCASE_AUTO(TestBoilerPlate);
1433 TESTCASE_AUTO(TestRuleStatus);
1434 TESTCASE_AUTO(TestRoundtripRules);
1435 TESTCASE_AUTO(TestGetBinaryRules);
1436 #endif
1437 TESTCASE_AUTO(TestRefreshInputText);
1438 #if !UCONFIG_NO_BREAK_ITERATION
1439 TESTCASE_AUTO(TestFilteredBreakIteratorBuilder);
1440 #endif
1441 TESTCASE_AUTO_END;
1442 }
1443
1444
1445 //---------------------------------------------
1446 //Internal subroutines
1447 //---------------------------------------------
1448
doBoundaryTest(BreakIterator & bi,UnicodeString & text,int32_t * boundaries)1449 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1450 logln((UnicodeString)"testIsBoundary():");
1451 int32_t p = 0;
1452 UBool isB;
1453 for (int32_t i = 0; i < text.length(); i++) {
1454 isB = bi.isBoundary(i);
1455 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1456
1457 if (i == boundaries[p]) {
1458 if (!isB)
1459 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1460 p++;
1461 }
1462 else {
1463 if (isB)
1464 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1465 }
1466 }
1467 }
doTest(UnicodeString & testString,int32_t start,int32_t gotoffset,int32_t expectedOffset,const char * expectedString)1468 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1469 UnicodeString selected;
1470 UnicodeString expected=CharsToUnicodeString(expectedString);
1471
1472 if(gotoffset != expectedOffset)
1473 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1474 if(start <= gotoffset){
1475 testString.extractBetween(start, gotoffset, selected);
1476 }
1477 else{
1478 testString.extractBetween(gotoffset, start, selected);
1479 }
1480 if(selected.compare(expected) != 0)
1481 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1482 else
1483 logln(prettify("****selected \"" + selected + "\""));
1484 }
1485
1486 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1487