1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1999-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************
7 * Date Name Description
8 * 12/14/99 Madhu Creation.
9 * 01/12/2000 Madhu updated for changed API
10 ********************************************************************/
11
12 #include "unicode/utypes.h"
13
14 #if !UCONFIG_NO_BREAK_ITERATION
15
16 #include "unicode/uchar.h"
17 #include "intltest.h"
18 #include "unicode/rbbi.h"
19 #include "unicode/schriter.h"
20 #include "rbbiapts.h"
21 #include "rbbidata.h"
22 #include "cstring.h"
23 #include "ubrkimpl.h"
24 #include "unicode/locid.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utext.h"
27 #include "cmemory.h"
28 #if !UCONFIG_NO_BREAK_ITERATION
29 #include "unicode/filteredbrk.h"
30 #include <stdio.h> // for sprintf
31 #endif
32 /**
33 * API Test the RuleBasedBreakIterator class
34 */
35
36
37 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
38 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
39
40 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
41 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
42
TestCloneEquals()43 void RBBIAPITest::TestCloneEquals()
44 {
45
46 UErrorCode status=U_ZERO_ERROR;
47 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
48 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
49 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
50 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
51 if(U_FAILURE(status)){
52 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
53 return;
54 }
55
56
57 UnicodeString testString="Testing word break iterators's clone() and equals()";
58 bi1->setText(testString);
59 bi2->setText(testString);
60 biequal->setText(testString);
61
62 bi3->setText("hello");
63
64 logln((UnicodeString)"Testing equals()");
65
66 logln((UnicodeString)"Testing == and !=");
67 UBool b = (*bi1 != *biequal);
68 b |= *bi1 == *bi2;
69 b |= *bi1 == *bi3;
70 if (b) {
71 errln("%s:%d ERROR:1 RBBI's == and != operator failed.", __FILE__, __LINE__);
72 }
73
74 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3)
75 errln("%s:%d ERROR:2 RBBI's == and != operator failed.", __FILE__, __LINE__);
76
77
78 // Quick test of RulesBasedBreakIterator assignment -
79 // Check that
80 // two different iterators are !=
81 // they are == after assignment
82 // source and dest iterator produce the same next() after assignment.
83 // deleting one doesn't disable the other.
84 logln("Testing assignment");
85 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
86 if(U_FAILURE(status)){
87 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
88 return;
89 }
90
91 RuleBasedBreakIterator biDefault, biDefault2;
92 if(U_FAILURE(status)){
93 errln("%s:%d FAIL : in construction of default iterator", __FILE__, __LINE__);
94 return;
95 }
96 if (biDefault == *bix) {
97 errln("%s:%d ERROR: iterators should not compare ==", __FILE__, __LINE__);
98 return;
99 }
100 if (biDefault != biDefault2) {
101 errln("%s:%d ERROR: iterators should compare ==", __FILE__, __LINE__);
102 return;
103 }
104
105
106 UnicodeString HelloString("Hello Kitty");
107 bix->setText(HelloString);
108 if (*bix == *bi2) {
109 errln("%s:%d ERROR: strings should not be equal before assignment.", __FILE__, __LINE__);
110 }
111 *bix = *bi2;
112 if (*bix != *bi2) {
113 errln("%s:%d ERROR: strings should be equal before assignment.", __FILE__, __LINE__);
114 }
115
116 int bixnext = bix->next();
117 int bi2next = bi2->next();
118 if (! (bixnext == bi2next && bixnext == 7)) {
119 errln("%s:%d ERROR: iterators behaved differently after assignment.", __FILE__, __LINE__);
120 }
121 delete bix;
122 if (bi2->next() != 8) {
123 errln("%s:%d ERROR: iterator.next() failed after deleting copy.", __FILE__, __LINE__);
124 }
125
126
127
128 logln((UnicodeString)"Testing clone()");
129 RuleBasedBreakIterator* bi1clone = dynamic_cast<RuleBasedBreakIterator *>(bi1->clone());
130 RuleBasedBreakIterator* bi2clone = dynamic_cast<RuleBasedBreakIterator *>(bi2->clone());
131
132 if(*bi1clone != *bi1 || *bi1clone != *biequal ||
133 *bi1clone == *bi3 || *bi1clone == *bi2)
134 errln("%s:%d ERROR:1 RBBI's clone() method failed", __FILE__, __LINE__);
135
136 if(*bi2clone == *bi1 || *bi2clone == *biequal ||
137 *bi2clone == *bi3 || *bi2clone != *bi2)
138 errln("%s:%d ERROR:2 RBBI's clone() method failed", __FILE__, __LINE__);
139
140 if(bi1->getText() != bi1clone->getText() ||
141 bi2clone->getText() != bi2->getText() ||
142 *bi2clone == *bi1clone )
143 errln("%s:%d ERROR: RBBI's clone() method failed", __FILE__, __LINE__);
144
145 delete bi1clone;
146 delete bi2clone;
147 delete bi1;
148 delete bi3;
149 delete bi2;
150 delete biequal;
151 }
152
TestBoilerPlate()153 void RBBIAPITest::TestBoilerPlate()
154 {
155 UErrorCode status = U_ZERO_ERROR;
156 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
157 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
158 if (U_FAILURE(status)) {
159 errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
160 return;
161 }
162 if(*a!=*b){
163 errln("Failed: boilerplate method operator!= does not return correct results");
164 }
165 // Japanese word break iterators are identical to root with
166 // a dictionary-based break iterator
167 BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
168 BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
169 if(c && d){
170 if(*c!=*d){
171 errln("Failed: boilerplate method operator== does not return correct results");
172 }
173 }else{
174 errln("creation of break iterator failed");
175 }
176 delete a;
177 delete b;
178 delete c;
179 delete d;
180 }
181
TestgetRules()182 void RBBIAPITest::TestgetRules()
183 {
184 UErrorCode status=U_ZERO_ERROR;
185
186 LocalPointer<RuleBasedBreakIterator> bi1(
187 (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status), status);
188 LocalPointer<RuleBasedBreakIterator> bi2(
189 (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status), status);
190 if(U_FAILURE(status)){
191 errcheckln(status, "%s:%d, FAIL: in construction - %s", __FILE__, __LINE__, u_errorName(status));
192 return;
193 }
194
195 logln((UnicodeString)"Testing getRules()");
196
197 UnicodeString text(u"Hello there");
198 bi1->setText(text);
199
200 LocalPointer <RuleBasedBreakIterator> bi3((RuleBasedBreakIterator*)bi1->clone());
201
202 UnicodeString temp=bi1->getRules();
203 UnicodeString temp2=bi2->getRules();
204 UnicodeString temp3=bi3->getRules();
205 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
206 errln("%s:%d ERROR: error in getRules() method", __FILE__, __LINE__);
207
208 RuleBasedBreakIterator bi4; // Default RuleBasedBreakIterator constructor gives empty shell with empty rules.
209 if (!bi4.getRules().isEmpty()) {
210 errln("%s:%d Empty string expected.", __FILE__, __LINE__);
211 }
212 }
213
TestHashCode()214 void RBBIAPITest::TestHashCode()
215 {
216 UErrorCode status=U_ZERO_ERROR;
217 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
218 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
219 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
220 if(U_FAILURE(status)){
221 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
222 delete bi1;
223 delete bi2;
224 delete bi3;
225 return;
226 }
227
228
229 logln((UnicodeString)"Testing hashCode()");
230
231 bi1->setText((UnicodeString)"Hash code");
232 bi2->setText((UnicodeString)"Hash code");
233 bi3->setText((UnicodeString)"Hash code");
234
235 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
236 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
237
238 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() ||
239 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
240 errln((UnicodeString)"ERROR: identical objects have different hashcodes");
241
242 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() ||
243 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
244 errln((UnicodeString)"ERROR: different objects have same hashcodes");
245
246 delete bi1clone;
247 delete bi2clone;
248 delete bi1;
249 delete bi2;
250 delete bi3;
251
252 }
TestGetSetAdoptText()253 void RBBIAPITest::TestGetSetAdoptText()
254 {
255 logln((UnicodeString)"Testing getText setText ");
256 IcuTestErrorCode status(*this, "TestGetSetAdoptText");
257 UnicodeString str1="first string.";
258 UnicodeString str2="Second string.";
259 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
260 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
261 if(status.isFailure()){
262 errcheckln(status, "Fail : in construction - %s", status.errorName());
263 return;
264 }
265
266
267 CharacterIterator* text1= new StringCharacterIterator(str1);
268 CharacterIterator* text1Clone = text1->clone();
269 CharacterIterator* text2= new StringCharacterIterator(str2);
270 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str"
271
272 wordIter1->setText(str1);
273 CharacterIterator *tci = &wordIter1->getText();
274 UnicodeString tstr;
275 tci->getText(tstr);
276 TEST_ASSERT(tstr == str1);
277 if(wordIter1->current() != 0)
278 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
279
280 wordIter1->next(2);
281
282 wordIter1->setText(str2);
283 if(wordIter1->current() != 0)
284 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
285
286
287 charIter1->adoptText(text1Clone);
288 TEST_ASSERT(wordIter1->getText() != charIter1->getText());
289 tci = &wordIter1->getText();
290 tci->getText(tstr);
291 TEST_ASSERT(tstr == str2);
292 tci = &charIter1->getText();
293 tci->getText(tstr);
294 TEST_ASSERT(tstr == str1);
295
296
297 LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
298 rb->adoptText(text1);
299 if(rb->getText() != *text1)
300 errln((UnicodeString)"ERROR:1 error in adoptText ");
301 rb->adoptText(text2);
302 if(rb->getText() != *text2)
303 errln((UnicodeString)"ERROR:2 error in adoptText ");
304
305 // Adopt where iterator range is less than the entire orignal source string.
306 // (With the change of the break engine to working with UText internally,
307 // CharacterIterators starting at positions other than zero are not supported)
308 rb->adoptText(text3);
309 TEST_ASSERT(rb->preceding(2) == 0);
310 TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
311 //if(rb->preceding(2) != 3) {
312 // errln((UnicodeString)"ERROR:3 error in adoptText ");
313 //}
314 //if(rb->following(11) != BreakIterator::DONE) {
315 // errln((UnicodeString)"ERROR:4 error in adoptText ");
316 //}
317
318 // UText API
319 //
320 // Quick test to see if UText is working at all.
321 //
322 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
323 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
324 // 012345678901
325
326 status.reset();
327 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
328 wordIter1->setText(ut.getAlias(), status);
329 TEST_ASSERT_SUCCESS(status);
330
331 int32_t pos;
332 pos = wordIter1->first();
333 TEST_ASSERT(pos==0);
334 pos = wordIter1->next();
335 TEST_ASSERT(pos==5);
336 pos = wordIter1->next();
337 TEST_ASSERT(pos==6);
338 pos = wordIter1->next();
339 TEST_ASSERT(pos==11);
340 pos = wordIter1->next();
341 TEST_ASSERT(pos==UBRK_DONE);
342
343 status.reset();
344 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
345 TEST_ASSERT_SUCCESS(status);
346 wordIter1->setText(ut2.getAlias(), status);
347 TEST_ASSERT_SUCCESS(status);
348
349 pos = wordIter1->first();
350 TEST_ASSERT(pos==0);
351 pos = wordIter1->next();
352 TEST_ASSERT(pos==3);
353 pos = wordIter1->next();
354 TEST_ASSERT(pos==4);
355
356 pos = wordIter1->last();
357 TEST_ASSERT(pos==6);
358 pos = wordIter1->previous();
359 TEST_ASSERT(pos==4);
360 pos = wordIter1->previous();
361 TEST_ASSERT(pos==3);
362 pos = wordIter1->previous();
363 TEST_ASSERT(pos==0);
364 pos = wordIter1->previous();
365 TEST_ASSERT(pos==UBRK_DONE);
366
367 status.reset();
368 UnicodeString sEmpty;
369 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
370 wordIter1->getUText(gut2.getAlias(), status);
371 TEST_ASSERT_SUCCESS(status);
372 status.reset();
373 }
374
375
TestIteration()376 void RBBIAPITest::TestIteration()
377 {
378 // This test just verifies that the API is present.
379 // Testing for correct operation of the break rules happens elsewhere.
380
381 UErrorCode status=U_ZERO_ERROR;
382 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
383 if (U_FAILURE(status) || bi == NULL) {
384 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
385 }
386 delete bi;
387
388 status=U_ZERO_ERROR;
389 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
390 if (U_FAILURE(status) || bi == NULL) {
391 errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status));
392 }
393 delete bi;
394
395 status=U_ZERO_ERROR;
396 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
397 if (U_FAILURE(status) || bi == NULL) {
398 errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status));
399 }
400 delete bi;
401
402 status=U_ZERO_ERROR;
403 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
404 if (U_FAILURE(status) || bi == NULL) {
405 errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status));
406 }
407 delete bi;
408
409 status=U_ZERO_ERROR;
410 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
411 if (U_FAILURE(status) || bi == NULL) {
412 errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status));
413 }
414 delete bi;
415
416 status=U_ZERO_ERROR;
417 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
418 if (U_FAILURE(status) || bi == NULL) {
419 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
420 return; // Skip the rest of these tests.
421 }
422
423
424 UnicodeString testString="0123456789";
425 bi->setText(testString);
426
427 int32_t i;
428 i = bi->first();
429 if (i != 0) {
430 errln("%s:%d Incorrect value from bi->first(). Expected 0, got %d.", __FILE__, __LINE__, i);
431 }
432
433 i = bi->last();
434 if (i != 10) {
435 errln("%s:%d Incorrect value from bi->last(). Expected 10, got %d", __FILE__, __LINE__, i);
436 }
437
438 //
439 // Previous
440 //
441 bi->last();
442 i = bi->previous();
443 if (i != 9) {
444 errln("%s:%d Incorrect value from bi->last(). Expected 9, got %d", __FILE__, __LINE__, i);
445 }
446
447
448 bi->first();
449 i = bi->previous();
450 if (i != BreakIterator::DONE) {
451 errln("%s:%d Incorrect value from bi->previous(). Expected DONE, got %d", __FILE__, __LINE__, i);
452 }
453
454 //
455 // next()
456 //
457 bi->first();
458 i = bi->next();
459 if (i != 1) {
460 errln("%s:%d Incorrect value from bi->next(). Expected 1, got %d", __FILE__, __LINE__, i);
461 }
462
463 bi->last();
464 i = bi->next();
465 if (i != BreakIterator::DONE) {
466 errln("%s:%d Incorrect value from bi->next(). Expected DONE, got %d", __FILE__, __LINE__, i);
467 }
468
469
470 //
471 // current()
472 //
473 bi->first();
474 i = bi->current();
475 if (i != 0) {
476 errln("%s:%d Incorrect value from bi->current(). Expected 0, got %d", __FILE__, __LINE__, i);
477 }
478
479 bi->next();
480 i = bi->current();
481 if (i != 1) {
482 errln("%s:%d Incorrect value from bi->current(). Expected 1, got %d", __FILE__, __LINE__, i);
483 }
484
485 bi->last();
486 bi->next();
487 i = bi->current();
488 if (i != 10) {
489 errln("%s:%d Incorrect value from bi->current(). Expected 10, got %d", __FILE__, __LINE__, i);
490 }
491
492 bi->first();
493 bi->previous();
494 i = bi->current();
495 if (i != 0) {
496 errln("%s:%d Incorrect value from bi->current(). Expected 0, got %d", __FILE__, __LINE__, i);
497 }
498
499
500 //
501 // Following()
502 //
503 i = bi->following(4);
504 if (i != 5) {
505 errln("%s:%d Incorrect value from bi->following(). Expected 5, got %d", __FILE__, __LINE__, i);
506 }
507
508 i = bi->following(9);
509 if (i != 10) {
510 errln("%s:%d Incorrect value from bi->following(). Expected 10, got %d", __FILE__, __LINE__, i);
511 }
512
513 i = bi->following(10);
514 if (i != BreakIterator::DONE) {
515 errln("%s:%d Incorrect value from bi->following(). Expected DONE, got %d", __FILE__, __LINE__, i);
516 }
517
518
519 //
520 // Preceding
521 //
522 i = bi->preceding(4);
523 if (i != 3) {
524 errln("%s:%d Incorrect value from bi->preceding(). Expected 3, got %d", __FILE__, __LINE__, i);
525 }
526
527 i = bi->preceding(10);
528 if (i != 9) {
529 errln("%s:%d Incorrect value from bi->preceding(). Expected 9, got %d", __FILE__, __LINE__, i);
530 }
531
532 i = bi->preceding(1);
533 if (i != 0) {
534 errln("%s:%d Incorrect value from bi->preceding(). Expected 0, got %d", __FILE__, __LINE__, i);
535 }
536
537 i = bi->preceding(0);
538 if (i != BreakIterator::DONE) {
539 errln("%s:%d Incorrect value from bi->preceding(). Expected DONE, got %d", __FILE__, __LINE__, i);
540 }
541
542
543 //
544 // isBoundary()
545 //
546 bi->first();
547 if (bi->isBoundary(3) != TRUE) {
548 errln("%s:%d Incorrect value from bi->isBoudary(). Expected TRUE, got FALSE", __FILE__, __LINE__, i);
549 }
550 i = bi->current();
551 if (i != 3) {
552 errln("%s:%d Incorrect value from bi->current(). Expected 3, got %d", __FILE__, __LINE__, i);
553 }
554
555
556 if (bi->isBoundary(11) != FALSE) {
557 errln("%s:%d Incorrect value from bi->isBoudary(). Expected FALSE, got TRUE", __FILE__, __LINE__, i);
558 }
559 i = bi->current();
560 if (i != 10) {
561 errln("%s:%d Incorrect value from bi->current(). Expected 10, got %d", __FILE__, __LINE__, i);
562 }
563
564 //
565 // next(n)
566 //
567 bi->first();
568 i = bi->next(4);
569 if (i != 4) {
570 errln("%s:%d Incorrect value from bi->next(). Expected 4, got %d", __FILE__, __LINE__, i);
571 }
572
573 i = bi->next(6);
574 if (i != 10) {
575 errln("%s:%d Incorrect value from bi->next(). Expected 10, got %d", __FILE__, __LINE__, i);
576 }
577
578 bi->first();
579 i = bi->next(11);
580 if (i != BreakIterator::DONE) {
581 errln("%s:%d Incorrect value from bi->next(). Expected BreakIterator::DONE, got %d", __FILE__, __LINE__, i);
582 }
583
584 delete bi;
585
586 }
587
588
589
590
591
592
TestBuilder()593 void RBBIAPITest::TestBuilder() {
594 UnicodeString rulesString1 = "$Letters = [:L:];\n"
595 "$Numbers = [:N:];\n"
596 "$Letters+;\n"
597 "$Numbers+;\n"
598 "[^$Letters $Numbers];\n"
599 "!.*;\n";
600 UnicodeString testString1 = "abc123..abc";
601 // 01234567890
602 int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
603 UErrorCode status=U_ZERO_ERROR;
604 UParseError parseError;
605
606 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
607 if(U_FAILURE(status)) {
608 dataerrln("Fail : in construction - %s", u_errorName(status));
609 } else {
610 bi->setText(testString1);
611 doBoundaryTest(*bi, testString1, bounds1);
612 }
613 delete bi;
614 }
615
616
617 //
618 // TestQuoteGrouping
619 // Single quotes within rules imply a grouping, so that a modifier
620 // following the quoted text (* or +) applies to all of the quoted chars.
621 //
TestQuoteGrouping()622 void RBBIAPITest::TestQuoteGrouping() {
623 UnicodeString rulesString1 = "#Here comes the rule...\n"
624 "'$@!'*;\n" // (\$\@\!)*
625 ".;\n";
626
627 UnicodeString testString1 = "$@!$@!X$@!!X";
628 // 0123456789012
629 int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
630 UErrorCode status=U_ZERO_ERROR;
631 UParseError parseError;
632
633 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
634 if(U_FAILURE(status)) {
635 dataerrln("Fail : in construction - %s", u_errorName(status));
636 } else {
637 bi->setText(testString1);
638 doBoundaryTest(*bi, testString1, bounds1);
639 }
640 delete bi;
641 }
642
643 //
644 // TestRuleStatus
645 // Test word break rule status constants.
646 //
TestRuleStatus()647 void RBBIAPITest::TestRuleStatus() {
648 UChar str[30];
649 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
650 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
651 u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
652 // 012345678901234567 8 9 0
653 // Katakana
654 str, 30);
655 UnicodeString testString1(str);
656 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
657 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,
658 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,
659 UBRK_WORD_IDEO, UBRK_WORD_NONE};
660
661 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
662 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
663 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
664
665 UErrorCode status=U_ZERO_ERROR;
666
667 BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
668 if(U_FAILURE(status)) {
669 errcheckln(status, "%s:%d Fail in construction - %s", __FILE__, __LINE__, u_errorName(status));
670 } else {
671 bi->setText(testString1);
672 // First test that the breaks are in the right spots.
673 doBoundaryTest(*bi, testString1, bounds1);
674
675 // Then go back and check tag values
676 int32_t i = 0;
677 int32_t pos, tag;
678 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
679 if (pos != bounds1[i]) {
680 errln("%s:%d FAIL: unexpected word break at postion %d", __FILE__, __LINE__, pos);
681 break;
682 }
683 tag = bi->getRuleStatus();
684 if (tag < tag_lo[i] || tag >= tag_hi[i]) {
685 errln("%s:%d FAIL: incorrect tag value %d at position %d", __FILE__, __LINE__, tag, pos);
686 break;
687 }
688
689 // Check that we get the same tag values from getRuleStatusVec()
690 int32_t vec[10];
691 int t = bi->getRuleStatusVec(vec, 10, status);
692 TEST_ASSERT_SUCCESS(status);
693 TEST_ASSERT(t==1);
694 TEST_ASSERT(vec[0] == tag);
695 }
696 }
697 delete bi;
698
699 // Now test line break status. This test mostly is to confirm that the status constants
700 // are correctly declared in the header.
701 testString1 = "test line. \n";
702 // break type s s h
703
704 bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
705 if(U_FAILURE(status)) {
706 errcheckln(status, "%s:%d failed to create line break iterator. - %s", __FILE__, __LINE__, u_errorName(status));
707 } else {
708 int32_t i = 0;
709 int32_t pos, tag;
710 UBool success;
711
712 bi->setText(testString1);
713 pos = bi->current();
714 tag = bi->getRuleStatus();
715 for (i=0; i<3; i++) {
716 switch (i) {
717 case 0:
718 success = pos==0 && tag==UBRK_LINE_SOFT; break;
719 case 1:
720 success = pos==5 && tag==UBRK_LINE_SOFT; break;
721 case 2:
722 success = pos==12 && tag==UBRK_LINE_HARD; break;
723 default:
724 success = FALSE; break;
725 }
726 if (success == FALSE) {
727 errln("%s:%d: incorrect line break status or position. i=%d, pos=%d, tag=%d",
728 __FILE__, __LINE__, i, pos, tag);
729 break;
730 }
731 pos = bi->next();
732 tag = bi->getRuleStatus();
733 }
734 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
735 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
736 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
737 errln("%s:%d UBRK_LINE_* constants from header are inconsistent.", __FILE__, __LINE__);
738 }
739 }
740 delete bi;
741
742 }
743
744
745 //
746 // TestRuleStatusVec
747 // Test the vector form of break rule status.
748 //
TestRuleStatusVec()749 void RBBIAPITest::TestRuleStatusVec() {
750 UnicodeString rulesString( "[A-N]{100}; \n"
751 "[a-w]{200}; \n"
752 "[\\p{L}]{300}; \n"
753 "[\\p{N}]{400}; \n"
754 "[0-5]{500}; \n"
755 "!.*;\n", -1, US_INV);
756 UnicodeString testString1 = "Aapz5?";
757 int32_t statusVals[10];
758 int32_t numStatuses;
759 int32_t pos;
760
761 UErrorCode status=U_ZERO_ERROR;
762 UParseError parseError;
763
764 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
765 if (U_FAILURE(status)) {
766 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
767 } else {
768 bi->setText(testString1);
769
770 // A
771 pos = bi->next();
772 TEST_ASSERT(pos==1);
773 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
774 TEST_ASSERT_SUCCESS(status);
775 TEST_ASSERT(numStatuses == 2);
776 TEST_ASSERT(statusVals[0] == 100);
777 TEST_ASSERT(statusVals[1] == 300);
778
779 // a
780 pos = bi->next();
781 TEST_ASSERT(pos==2);
782 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
783 TEST_ASSERT_SUCCESS(status);
784 TEST_ASSERT(numStatuses == 2);
785 TEST_ASSERT(statusVals[0] == 200);
786 TEST_ASSERT(statusVals[1] == 300);
787
788 // p
789 pos = bi->next();
790 TEST_ASSERT(pos==3);
791 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
792 TEST_ASSERT_SUCCESS(status);
793 TEST_ASSERT(numStatuses == 2);
794 TEST_ASSERT(statusVals[0] == 200);
795 TEST_ASSERT(statusVals[1] == 300);
796
797 // z
798 pos = bi->next();
799 TEST_ASSERT(pos==4);
800 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
801 TEST_ASSERT_SUCCESS(status);
802 TEST_ASSERT(numStatuses == 1);
803 TEST_ASSERT(statusVals[0] == 300);
804
805 // 5
806 pos = bi->next();
807 TEST_ASSERT(pos==5);
808 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
809 TEST_ASSERT_SUCCESS(status);
810 TEST_ASSERT(numStatuses == 2);
811 TEST_ASSERT(statusVals[0] == 400);
812 TEST_ASSERT(statusVals[1] == 500);
813
814 // ?
815 pos = bi->next();
816 TEST_ASSERT(pos==6);
817 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
818 TEST_ASSERT_SUCCESS(status);
819 TEST_ASSERT(numStatuses == 1);
820 TEST_ASSERT(statusVals[0] == 0);
821
822 //
823 // Check buffer overflow error handling. Char == A
824 //
825 bi->first();
826 pos = bi->next();
827 TEST_ASSERT(pos==1);
828 memset(statusVals, -1, sizeof(statusVals));
829 numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
830 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
831 TEST_ASSERT(numStatuses == 2);
832 TEST_ASSERT(statusVals[0] == -1);
833
834 status = U_ZERO_ERROR;
835 memset(statusVals, -1, sizeof(statusVals));
836 numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
837 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
838 TEST_ASSERT(numStatuses == 2);
839 TEST_ASSERT(statusVals[0] == 100);
840 TEST_ASSERT(statusVals[1] == -1);
841
842 status = U_ZERO_ERROR;
843 memset(statusVals, -1, sizeof(statusVals));
844 numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
845 TEST_ASSERT_SUCCESS(status);
846 TEST_ASSERT(numStatuses == 2);
847 TEST_ASSERT(statusVals[0] == 100);
848 TEST_ASSERT(statusVals[1] == 300);
849 TEST_ASSERT(statusVals[2] == -1);
850 }
851 delete bi;
852
853 }
854
855 //
856 // Bug 2190 Regression test. Builder crash on rule consisting of only a
857 // $variable reference
TestBug2190()858 void RBBIAPITest::TestBug2190() {
859 UnicodeString rulesString1 = "$aaa = abcd;\n"
860 "$bbb = $aaa;\n"
861 "$bbb;\n";
862 UnicodeString testString1 = "abcdabcd";
863 // 01234567890
864 int32_t bounds1[] = {0, 4, 8};
865 UErrorCode status=U_ZERO_ERROR;
866 UParseError parseError;
867
868 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
869 if(U_FAILURE(status)) {
870 dataerrln("Fail : in construction - %s", u_errorName(status));
871 } else {
872 bi->setText(testString1);
873 doBoundaryTest(*bi, testString1, bounds1);
874 }
875 delete bi;
876 }
877
878
TestRegistration()879 void RBBIAPITest::TestRegistration() {
880 #if !UCONFIG_NO_SERVICE
881 UErrorCode status = U_ZERO_ERROR;
882 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
883 // ok to not delete these if we exit because of error?
884 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
885 BreakIterator* root_word = BreakIterator::createWordInstance("", status);
886 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
887
888 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
889 dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
890
891 delete ja_word;
892 delete ja_char;
893 delete root_word;
894 delete root_char;
895
896 return;
897 }
898
899 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
900 {
901 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
902 if (ja_word && *ja_word == *root_word) {
903 errln("japan not different from root");
904 }
905 #endif
906 }
907
908 {
909 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
910 UBool fail = TRUE;
911 if(result){
912 fail = *result != *ja_word;
913 }
914 delete result;
915 if (fail) {
916 errln("bad result for xx_XX/word");
917 }
918 }
919
920 {
921 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
922 UBool fail = TRUE;
923 if(result){
924 fail = *result != *ja_char;
925 }
926 delete result;
927 if (fail) {
928 errln("bad result for ja_JP/char");
929 }
930 }
931
932 {
933 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
934 UBool fail = TRUE;
935 if(result){
936 fail = *result != *root_char;
937 }
938 delete result;
939 if (fail) {
940 errln("bad result for xx_XX/char");
941 }
942 }
943
944 {
945 StringEnumeration* avail = BreakIterator::getAvailableLocales();
946 UBool found = FALSE;
947 const UnicodeString* p;
948 while ((p = avail->snext(status))) {
949 if (p->compare("xx") == 0) {
950 found = TRUE;
951 break;
952 }
953 }
954 delete avail;
955 if (!found) {
956 errln("did not find test locale");
957 }
958 }
959
960 {
961 UBool unreg = BreakIterator::unregister(key, status);
962 if (!unreg) {
963 errln("unable to unregister");
964 }
965 }
966
967 {
968 BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
969 BreakIterator* root = BreakIterator::createWordInstance("", status);
970 UBool fail = TRUE;
971 if(root){
972 fail = *root != *result;
973 }
974 delete root;
975 delete result;
976 if (fail) {
977 errln("did not get root break");
978 }
979 }
980
981 {
982 StringEnumeration* avail = BreakIterator::getAvailableLocales();
983 UBool found = FALSE;
984 const UnicodeString* p;
985 while ((p = avail->snext(status))) {
986 if (p->compare("xx") == 0) {
987 found = TRUE;
988 break;
989 }
990 }
991 delete avail;
992 if (found) {
993 errln("found test locale");
994 }
995 }
996
997 {
998 int32_t count;
999 UBool foundLocale = FALSE;
1000 const Locale *avail = BreakIterator::getAvailableLocales(count);
1001 for (int i=0; i<count; i++) {
1002 if (avail[i] == Locale::getEnglish()) {
1003 foundLocale = TRUE;
1004 break;
1005 }
1006 }
1007 if (foundLocale == FALSE) {
1008 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1009 }
1010 }
1011
1012
1013 // ja_word was adopted by factory
1014 delete ja_char;
1015 delete root_word;
1016 delete root_char;
1017 #endif
1018 }
1019
RoundtripRule(const char * dataFile)1020 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1021 UErrorCode status = U_ZERO_ERROR;
1022 UParseError parseError;
1023 parseError.line = 0;
1024 parseError.offset = 0;
1025 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1026 uint32_t length;
1027 const UChar *builtSource;
1028 const uint8_t *rbbiRules;
1029 const uint8_t *builtRules;
1030
1031 if (U_FAILURE(status)) {
1032 errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status));
1033 return;
1034 }
1035
1036 builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1037 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1038 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1039 if (U_FAILURE(status)) {
1040 errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
1041 __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset);
1042 errln(UnicodeString(builtSource));
1043 return;
1044 };
1045 rbbiRules = brkItr->getBinaryRules(length);
1046 logln("Comparing \"%s\" len=%d", dataFile, length);
1047 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1048 errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile);
1049 return;
1050 }
1051 delete brkItr;
1052 }
1053
TestRoundtripRules()1054 void RBBIAPITest::TestRoundtripRules() {
1055 RoundtripRule("word");
1056 RoundtripRule("title");
1057 RoundtripRule("sent");
1058 RoundtripRule("line");
1059 RoundtripRule("char");
1060 if (!quick) {
1061 RoundtripRule("word_POSIX");
1062 }
1063 }
1064
1065
1066 // Check getBinaryRules() and construction of a break iterator from those rules.
1067
TestGetBinaryRules()1068 void RBBIAPITest::TestGetBinaryRules() {
1069 UErrorCode status=U_ZERO_ERROR;
1070 LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
1071 if (U_FAILURE(status)) {
1072 dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status));
1073 return;
1074 }
1075 RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
1076 if (rbbi == NULL) {
1077 dataerrln("FAIL: RuleBasedBreakIterator is NULL");
1078 return;
1079 }
1080
1081 // Check that the new line break iterator is nominally functional.
1082 UnicodeString helloWorld("Hello, World!");
1083 rbbi->setText(helloWorld);
1084 int n = 0;
1085 while (bi->next() != UBRK_DONE) {
1086 ++n;
1087 }
1088 TEST_ASSERT(n == 2);
1089
1090 // Extract the binary rules as a uint8_t blob.
1091 uint32_t ruleLength;
1092 const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
1093 TEST_ASSERT(ruleLength > 0);
1094 TEST_ASSERT(binRules != NULL);
1095
1096 // Clone the binary rules, and create a break iterator from that.
1097 // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
1098 uint8_t *clonedRules = new uint8_t[ruleLength];
1099 memcpy(clonedRules, binRules, ruleLength);
1100 RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
1101 TEST_ASSERT_SUCCESS(status);
1102
1103 // Check that the cloned line break iterator is nominally alive.
1104 clonedBI.setText(helloWorld);
1105 n = 0;
1106 while (clonedBI.next() != UBRK_DONE) {
1107 ++n;
1108 }
1109 TEST_ASSERT(n == 2);
1110
1111 delete[] clonedRules;
1112 }
1113
1114
TestRefreshInputText()1115 void RBBIAPITest::TestRefreshInputText() {
1116 /*
1117 * RefreshInput changes out the input of a Break Iterator without
1118 * changing anything else in the iterator's state. Used with Java JNI,
1119 * when Java moves the underlying string storage. This test
1120 * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1121 * The right set of boundaries should still be found.
1122 */
1123 UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
1124 UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
1125 UErrorCode status = U_ZERO_ERROR;
1126 UText ut1 = UTEXT_INITIALIZER;
1127 UText ut2 = UTEXT_INITIALIZER;
1128 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
1129 TEST_ASSERT_SUCCESS(status);
1130
1131 utext_openUChars(&ut1, testStr, -1, &status);
1132 TEST_ASSERT_SUCCESS(status);
1133
1134 if (U_SUCCESS(status)) {
1135 bi->setText(&ut1, status);
1136 TEST_ASSERT_SUCCESS(status);
1137
1138 /* Line boundaries will occur before each letter in the original string */
1139 TEST_ASSERT(1 == bi->next());
1140 TEST_ASSERT(3 == bi->next());
1141
1142 /* Move the string, kill the original string. */
1143 u_strcpy(movedStr, testStr);
1144 u_memset(testStr, 0x20, u_strlen(testStr));
1145 utext_openUChars(&ut2, movedStr, -1, &status);
1146 TEST_ASSERT_SUCCESS(status);
1147 RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1148 TEST_ASSERT_SUCCESS(status);
1149 TEST_ASSERT(bi == returnedBI);
1150
1151 /* Find the following matches, now working in the moved string. */
1152 TEST_ASSERT(5 == bi->next());
1153 TEST_ASSERT(7 == bi->next());
1154 TEST_ASSERT(8 == bi->next());
1155 TEST_ASSERT(UBRK_DONE == bi->next());
1156
1157 utext_close(&ut1);
1158 utext_close(&ut2);
1159 }
1160 delete bi;
1161
1162 }
1163
1164 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
prtbrks(BreakIterator * brk,const UnicodeString & ustr,IntlTest & it)1165 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1166 static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
1167 it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1168
1169 int32_t *pos = new int32_t[ustr.length()];
1170 int32_t posCount = 0;
1171
1172 // calculate breaks up front, so we can print out
1173 // sans any debugging
1174 for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1175 pos[posCount++] = n;
1176 if(posCount>=ustr.length()) {
1177 it.errln("brk count exceeds string length!");
1178 return;
1179 }
1180 }
1181 UnicodeString out;
1182 out.append((UChar)CHSTR);
1183 int32_t prev = 0;
1184 for(int32_t i=0;i<posCount;i++) {
1185 int32_t n=pos[i];
1186 out.append(ustr.tempSubString(prev,n-prev));
1187 out.append((UChar)PILCROW);
1188 prev=n;
1189 }
1190 out.append(ustr.tempSubString(prev,ustr.length()-prev));
1191 out.append((UChar)CHEND);
1192 it.logln(out);
1193
1194 out.remove();
1195 for(int32_t i=0;i<posCount;i++) {
1196 char tmp[100];
1197 sprintf(tmp,"%d ",pos[i]);
1198 out.append(UnicodeString(tmp));
1199 }
1200 it.logln(out);
1201 delete [] pos;
1202 }
1203 #endif
1204
TestFilteredBreakIteratorBuilder()1205 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1206 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1207 UErrorCode status = U_ZERO_ERROR;
1208 LocalPointer<FilteredBreakIteratorBuilder> builder;
1209 LocalPointer<BreakIterator> baseBI;
1210 LocalPointer<BreakIterator> filteredBI;
1211 LocalPointer<BreakIterator> frenchBI;
1212
1213 const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1214 const UnicodeString ABBR_MR("Mr.");
1215 const UnicodeString ABBR_CAPT("Capt.");
1216
1217 {
1218 logln("Constructing empty builder\n");
1219 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1220 TEST_ASSERT_SUCCESS(status);
1221
1222 logln("Constructing base BI\n");
1223 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1224 TEST_ASSERT_SUCCESS(status);
1225
1226 logln("Building new BI\n");
1227 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1228 TEST_ASSERT_SUCCESS(status);
1229
1230 if (U_SUCCESS(status)) {
1231 logln("Testing:");
1232 filteredBI->setText(text);
1233 TEST_ASSERT(20 == filteredBI->next()); // Mr.
1234 TEST_ASSERT(84 == filteredBI->next()); // recovered.
1235 TEST_ASSERT(90 == filteredBI->next()); // Capt.
1236 TEST_ASSERT(181 == filteredBI->next()); // Mr.
1237 TEST_ASSERT(278 == filteredBI->next()); // charge.
1238 filteredBI->first();
1239 prtbrks(filteredBI.getAlias(), text, *this);
1240 }
1241 }
1242
1243 {
1244 logln("Constructing empty builder\n");
1245 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1246 TEST_ASSERT_SUCCESS(status);
1247
1248 if (U_SUCCESS(status)) {
1249 logln("Adding Mr. as an exception\n");
1250 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1251 TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
1252 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
1253 TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1254 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1255 TEST_ASSERT_SUCCESS(status);
1256
1257 logln("Constructing base BI\n");
1258 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1259 TEST_ASSERT_SUCCESS(status);
1260
1261 logln("Building new BI\n");
1262 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1263 TEST_ASSERT_SUCCESS(status);
1264
1265 logln("Testing:");
1266 filteredBI->setText(text);
1267 TEST_ASSERT(84 == filteredBI->next());
1268 TEST_ASSERT(90 == filteredBI->next());// Capt.
1269 TEST_ASSERT(278 == filteredBI->next());
1270 filteredBI->first();
1271 prtbrks(filteredBI.getAlias(), text, *this);
1272 }
1273 }
1274
1275
1276 {
1277 logln("Constructing empty builder\n");
1278 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1279 TEST_ASSERT_SUCCESS(status);
1280
1281 if (U_SUCCESS(status)) {
1282 logln("Adding Mr. and Capt as an exception\n");
1283 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1284 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
1285 TEST_ASSERT_SUCCESS(status);
1286
1287 logln("Constructing base BI\n");
1288 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1289 TEST_ASSERT_SUCCESS(status);
1290
1291 logln("Building new BI\n");
1292 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1293 TEST_ASSERT_SUCCESS(status);
1294
1295 logln("Testing:");
1296 filteredBI->setText(text);
1297 TEST_ASSERT(84 == filteredBI->next());
1298 TEST_ASSERT(278 == filteredBI->next());
1299 filteredBI->first();
1300 prtbrks(filteredBI.getAlias(), text, *this);
1301 }
1302 }
1303
1304
1305 {
1306 logln("Constructing English builder\n");
1307 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1308 TEST_ASSERT_SUCCESS(status);
1309
1310 logln("Constructing base BI\n");
1311 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1312 TEST_ASSERT_SUCCESS(status);
1313
1314 if (U_SUCCESS(status)) {
1315 logln("unsuppressing 'Capt'");
1316 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
1317
1318 logln("Building new BI\n");
1319 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1320 TEST_ASSERT_SUCCESS(status);
1321
1322 if(filteredBI.isValid()) {
1323 logln("Testing:");
1324 filteredBI->setText(text);
1325 TEST_ASSERT(84 == filteredBI->next());
1326 TEST_ASSERT(90 == filteredBI->next());
1327 TEST_ASSERT(278 == filteredBI->next());
1328 filteredBI->first();
1329 prtbrks(filteredBI.getAlias(), text, *this);
1330 }
1331 }
1332 }
1333
1334
1335 {
1336 logln("Constructing English builder\n");
1337 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1338 TEST_ASSERT_SUCCESS(status);
1339
1340 logln("Constructing base BI\n");
1341 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1342 TEST_ASSERT_SUCCESS(status);
1343
1344 if (U_SUCCESS(status)) {
1345 logln("Building new BI\n");
1346 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1347 TEST_ASSERT_SUCCESS(status);
1348
1349 if(filteredBI.isValid()) {
1350 logln("Testing:");
1351 filteredBI->setText(text);
1352 TEST_ASSERT(84 == filteredBI->next());
1353 TEST_ASSERT(278 == filteredBI->next());
1354 filteredBI->first();
1355 prtbrks(filteredBI.getAlias(), text, *this);
1356 }
1357 }
1358 }
1359
1360 // reenable once french is in
1361 {
1362 logln("Constructing French builder");
1363 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
1364 TEST_ASSERT_SUCCESS(status);
1365
1366 logln("Constructing base BI\n");
1367 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
1368 TEST_ASSERT_SUCCESS(status);
1369
1370 if (U_SUCCESS(status)) {
1371 logln("Building new BI\n");
1372 frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
1373 TEST_ASSERT_SUCCESS(status);
1374 }
1375
1376 if(frenchBI.isValid()) {
1377 logln("Testing:");
1378 UnicodeString frText("C'est MM. Duval.");
1379 frenchBI->setText(frText);
1380 TEST_ASSERT(16 == frenchBI->next());
1381 TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
1382 frenchBI->first();
1383 prtbrks(frenchBI.getAlias(), frText, *this);
1384 logln("Testing against English:");
1385 filteredBI->setText(frText);
1386 TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
1387 TEST_ASSERT(16 == filteredBI->next());
1388 TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
1389 filteredBI->first();
1390 prtbrks(filteredBI.getAlias(), frText, *this);
1391
1392 // Verify ==
1393 TEST_ASSERT_TRUE(*frenchBI == *frenchBI);
1394 TEST_ASSERT_TRUE(*filteredBI != *frenchBI);
1395 TEST_ASSERT_TRUE(*frenchBI != *filteredBI);
1396 } else {
1397 dataerrln("French BI: not valid.");
1398 }
1399 }
1400
1401 #else
1402 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1403 #endif
1404 }
1405
1406 //---------------------------------------------
1407 // runIndexedTest
1408 //---------------------------------------------
1409
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1410 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1411 {
1412 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1413 TESTCASE_AUTO_BEGIN;
1414 #if !UCONFIG_NO_FILE_IO
1415 TESTCASE_AUTO(TestCloneEquals);
1416 TESTCASE_AUTO(TestgetRules);
1417 TESTCASE_AUTO(TestHashCode);
1418 TESTCASE_AUTO(TestGetSetAdoptText);
1419 TESTCASE_AUTO(TestIteration);
1420 #endif
1421 TESTCASE_AUTO(TestBuilder);
1422 TESTCASE_AUTO(TestQuoteGrouping);
1423 TESTCASE_AUTO(TestRuleStatusVec);
1424 TESTCASE_AUTO(TestBug2190);
1425 #if !UCONFIG_NO_FILE_IO
1426 TESTCASE_AUTO(TestRegistration);
1427 TESTCASE_AUTO(TestBoilerPlate);
1428 TESTCASE_AUTO(TestRuleStatus);
1429 TESTCASE_AUTO(TestRoundtripRules);
1430 TESTCASE_AUTO(TestGetBinaryRules);
1431 #endif
1432 TESTCASE_AUTO(TestRefreshInputText);
1433 #if !UCONFIG_NO_BREAK_ITERATION
1434 TESTCASE_AUTO(TestFilteredBreakIteratorBuilder);
1435 #endif
1436 TESTCASE_AUTO_END;
1437 }
1438
1439
1440 //---------------------------------------------
1441 //Internal subroutines
1442 //---------------------------------------------
1443
doBoundaryTest(BreakIterator & bi,UnicodeString & text,int32_t * boundaries)1444 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1445 logln((UnicodeString)"testIsBoundary():");
1446 int32_t p = 0;
1447 UBool isB;
1448 for (int32_t i = 0; i < text.length(); i++) {
1449 isB = bi.isBoundary(i);
1450 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1451
1452 if (i == boundaries[p]) {
1453 if (!isB)
1454 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1455 p++;
1456 }
1457 else {
1458 if (isB)
1459 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1460 }
1461 }
1462 }
doTest(UnicodeString & testString,int32_t start,int32_t gotoffset,int32_t expectedOffset,const char * expectedString)1463 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1464 UnicodeString selected;
1465 UnicodeString expected=CharsToUnicodeString(expectedString);
1466
1467 if(gotoffset != expectedOffset)
1468 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1469 if(start <= gotoffset){
1470 testString.extractBetween(start, gotoffset, selected);
1471 }
1472 else{
1473 testString.extractBetween(gotoffset, start, selected);
1474 }
1475 if(selected.compare(expected) != 0)
1476 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1477 else
1478 logln(prettify("****selected \"" + selected + "\""));
1479 }
1480
1481 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1482