1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1999-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************
7 * Date Name Description
8 * 12/14/99 Madhu Creation.
9 * 01/12/2000 Madhu updated for changed API
10 ********************************************************************/
11
12 #include "unicode/utypes.h"
13
14 #if !UCONFIG_NO_BREAK_ITERATION
15
16 #include "unicode/uchar.h"
17 #include "intltest.h"
18 #include "unicode/rbbi.h"
19 #include "unicode/schriter.h"
20 #include "rbbiapts.h"
21 #include "rbbidata.h"
22 #include "cstring.h"
23 #include "ubrkimpl.h"
24 #include "unicode/locid.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utext.h"
27 #include "cmemory.h"
28 #if !UCONFIG_NO_BREAK_ITERATION
29 #include "unicode/filteredbrk.h"
30 #include <stdio.h> // for sprintf
31 #endif
32 /**
33 * API Test the RuleBasedBreakIterator class
34 */
35
36
37 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
38 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
39
40 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
41 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
42
TestCloneEquals()43 void RBBIAPITest::TestCloneEquals()
44 {
45
46 UErrorCode status=U_ZERO_ERROR;
47 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
48 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
49 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
50 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
51 if(U_FAILURE(status)){
52 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
53 return;
54 }
55
56
57 UnicodeString testString="Testing word break iterators's clone() and equals()";
58 bi1->setText(testString);
59 bi2->setText(testString);
60 biequal->setText(testString);
61
62 bi3->setText("hello");
63
64 logln((UnicodeString)"Testing equals()");
65
66 logln((UnicodeString)"Testing == and !=");
67 UBool b = (*bi1 != *biequal);
68 b |= *bi1 == *bi2;
69 b |= *bi1 == *bi3;
70 if (b) {
71 errln("%s:%d ERROR:1 RBBI's == and != operator failed.", __FILE__, __LINE__);
72 }
73
74 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3)
75 errln("%s:%d ERROR:2 RBBI's == and != operator failed.", __FILE__, __LINE__);
76
77
78 // Quick test of RulesBasedBreakIterator assignment -
79 // Check that
80 // two different iterators are !=
81 // they are == after assignment
82 // source and dest iterator produce the same next() after assignment.
83 // deleting one doesn't disable the other.
84 logln("Testing assignment");
85 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
86 if(U_FAILURE(status)){
87 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
88 return;
89 }
90
91 RuleBasedBreakIterator biDefault, biDefault2;
92 if(U_FAILURE(status)){
93 errln("%s:%d FAIL : in construction of default iterator", __FILE__, __LINE__);
94 return;
95 }
96 if (biDefault == *bix) {
97 errln("%s:%d ERROR: iterators should not compare ==", __FILE__, __LINE__);
98 return;
99 }
100 if (biDefault != biDefault2) {
101 errln("%s:%d ERROR: iterators should compare ==", __FILE__, __LINE__);
102 return;
103 }
104
105
106 UnicodeString HelloString("Hello Kitty");
107 bix->setText(HelloString);
108 if (*bix == *bi2) {
109 errln("%s:%d ERROR: strings should not be equal before assignment.", __FILE__, __LINE__);
110 }
111 *bix = *bi2;
112 if (*bix != *bi2) {
113 errln("%s:%d ERROR: strings should be equal before assignment.", __FILE__, __LINE__);
114 }
115
116 int bixnext = bix->next();
117 int bi2next = bi2->next();
118 if (! (bixnext == bi2next && bixnext == 7)) {
119 errln("%s:%d ERROR: iterators behaved differently after assignment.", __FILE__, __LINE__);
120 }
121 delete bix;
122 if (bi2->next() != 8) {
123 errln("%s:%d ERROR: iterator.next() failed after deleting copy.", __FILE__, __LINE__);
124 }
125
126
127
128 logln((UnicodeString)"Testing clone()");
129 RuleBasedBreakIterator* bi1clone = dynamic_cast<RuleBasedBreakIterator *>(bi1->clone());
130 RuleBasedBreakIterator* bi2clone = dynamic_cast<RuleBasedBreakIterator *>(bi2->clone());
131
132 if(*bi1clone != *bi1 || *bi1clone != *biequal ||
133 *bi1clone == *bi3 || *bi1clone == *bi2)
134 errln("%s:%d ERROR:1 RBBI's clone() method failed", __FILE__, __LINE__);
135
136 if(*bi2clone == *bi1 || *bi2clone == *biequal ||
137 *bi2clone == *bi3 || *bi2clone != *bi2)
138 errln("%s:%d ERROR:2 RBBI's clone() method failed", __FILE__, __LINE__);
139
140 if(bi1->getText() != bi1clone->getText() ||
141 bi2clone->getText() != bi2->getText() ||
142 *bi2clone == *bi1clone )
143 errln("%s:%d ERROR: RBBI's clone() method failed", __FILE__, __LINE__);
144
145 delete bi1clone;
146 delete bi2clone;
147 delete bi1;
148 delete bi3;
149 delete bi2;
150 delete biequal;
151 }
152
TestBoilerPlate()153 void RBBIAPITest::TestBoilerPlate()
154 {
155 UErrorCode status = U_ZERO_ERROR;
156 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
157 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
158 if (U_FAILURE(status)) {
159 errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
160 return;
161 }
162 if(*a!=*b){
163 errln("Failed: boilerplate method operator!= does not return correct results");
164 }
165 // Japanese word break iterators are identical to root with
166 // a dictionary-based break iterator
167 BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
168 BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
169 if(c && d){
170 if(*c!=*d){
171 errln("Failed: boilerplate method operator== does not return correct results");
172 }
173 }else{
174 errln("creation of break iterator failed");
175 }
176 delete a;
177 delete b;
178 delete c;
179 delete d;
180 }
181
TestgetRules()182 void RBBIAPITest::TestgetRules()
183 {
184 UErrorCode status=U_ZERO_ERROR;
185
186 LocalPointer<RuleBasedBreakIterator> bi1(
187 (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status), status);
188 LocalPointer<RuleBasedBreakIterator> bi2(
189 (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status), status);
190 if(U_FAILURE(status)){
191 errcheckln(status, "%s:%d, FAIL: in construction - %s", __FILE__, __LINE__, u_errorName(status));
192 return;
193 }
194
195 logln((UnicodeString)"Testing getRules()");
196
197 UnicodeString text(u"Hello there");
198 bi1->setText(text);
199
200 LocalPointer <RuleBasedBreakIterator> bi3((RuleBasedBreakIterator*)bi1->clone());
201
202 UnicodeString temp=bi1->getRules();
203 UnicodeString temp2=bi2->getRules();
204 UnicodeString temp3=bi3->getRules();
205 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
206 errln("%s:%d ERROR: error in getRules() method", __FILE__, __LINE__);
207
208 RuleBasedBreakIterator bi4; // Default RuleBasedBreakIterator constructor gives empty shell with empty rules.
209 if (!bi4.getRules().isEmpty()) {
210 errln("%s:%d Empty string expected.", __FILE__, __LINE__);
211 }
212 }
213
TestHashCode()214 void RBBIAPITest::TestHashCode()
215 {
216 UErrorCode status=U_ZERO_ERROR;
217 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
218 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
219 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
220 if(U_FAILURE(status)){
221 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
222 delete bi1;
223 delete bi2;
224 delete bi3;
225 return;
226 }
227
228
229 logln((UnicodeString)"Testing hashCode()");
230
231 bi1->setText((UnicodeString)"Hash code");
232 bi2->setText((UnicodeString)"Hash code");
233 bi3->setText((UnicodeString)"Hash code");
234
235 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
236 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
237
238 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() ||
239 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
240 errln((UnicodeString)"ERROR: identical objects have different hashcodes");
241
242 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() ||
243 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
244 errln((UnicodeString)"ERROR: different objects have same hashcodes");
245
246 delete bi1clone;
247 delete bi2clone;
248 delete bi1;
249 delete bi2;
250 delete bi3;
251
252 }
TestGetSetAdoptText()253 void RBBIAPITest::TestGetSetAdoptText()
254 {
255 logln((UnicodeString)"Testing getText setText ");
256 IcuTestErrorCode status(*this, "TestGetSetAdoptText");
257 UnicodeString str1="first string.";
258 UnicodeString str2="Second string.";
259 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
260 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
261 if(status.isFailure()){
262 errcheckln(status, "Fail : in construction - %s", status.errorName());
263 return;
264 }
265
266
267 CharacterIterator* text1= new StringCharacterIterator(str1);
268 CharacterIterator* text1Clone = text1->clone();
269 CharacterIterator* text2= new StringCharacterIterator(str2);
270 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str"
271
272 wordIter1->setText(str1);
273 CharacterIterator *tci = &wordIter1->getText();
274 UnicodeString tstr;
275 tci->getText(tstr);
276 TEST_ASSERT(tstr == str1);
277 if(wordIter1->current() != 0)
278 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
279
280 wordIter1->next(2);
281
282 wordIter1->setText(str2);
283 if(wordIter1->current() != 0)
284 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
285
286
287 charIter1->adoptText(text1Clone);
288 TEST_ASSERT(wordIter1->getText() != charIter1->getText());
289 tci = &wordIter1->getText();
290 tci->getText(tstr);
291 TEST_ASSERT(tstr == str2);
292 tci = &charIter1->getText();
293 tci->getText(tstr);
294 TEST_ASSERT(tstr == str1);
295
296
297 LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
298 rb->adoptText(text1);
299 if(rb->getText() != *text1)
300 errln((UnicodeString)"ERROR:1 error in adoptText ");
301 rb->adoptText(text2);
302 if(rb->getText() != *text2)
303 errln((UnicodeString)"ERROR:2 error in adoptText ");
304
305 // Adopt where iterator range is less than the entire orignal source string.
306 // (With the change of the break engine to working with UText internally,
307 // CharacterIterators starting at positions other than zero are not supported)
308 rb->adoptText(text3);
309 TEST_ASSERT(rb->preceding(2) == 0);
310 TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
311 //if(rb->preceding(2) != 3) {
312 // errln((UnicodeString)"ERROR:3 error in adoptText ");
313 //}
314 //if(rb->following(11) != BreakIterator::DONE) {
315 // errln((UnicodeString)"ERROR:4 error in adoptText ");
316 //}
317
318 // UText API
319 //
320 // Quick test to see if UText is working at all.
321 //
322 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
323 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
324 // 012345678901
325
326 status.reset();
327 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
328 wordIter1->setText(ut.getAlias(), status);
329 TEST_ASSERT_SUCCESS(status);
330
331 int32_t pos;
332 pos = wordIter1->first();
333 TEST_ASSERT(pos==0);
334 pos = wordIter1->next();
335 TEST_ASSERT(pos==5);
336 pos = wordIter1->next();
337 TEST_ASSERT(pos==6);
338 pos = wordIter1->next();
339 TEST_ASSERT(pos==11);
340 pos = wordIter1->next();
341 TEST_ASSERT(pos==UBRK_DONE);
342
343 status.reset();
344 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
345 TEST_ASSERT_SUCCESS(status);
346 wordIter1->setText(ut2.getAlias(), status);
347 TEST_ASSERT_SUCCESS(status);
348
349 pos = wordIter1->first();
350 TEST_ASSERT(pos==0);
351 pos = wordIter1->next();
352 TEST_ASSERT(pos==3);
353 pos = wordIter1->next();
354 TEST_ASSERT(pos==4);
355
356 pos = wordIter1->last();
357 TEST_ASSERT(pos==6);
358 pos = wordIter1->previous();
359 TEST_ASSERT(pos==4);
360 pos = wordIter1->previous();
361 TEST_ASSERT(pos==3);
362 pos = wordIter1->previous();
363 TEST_ASSERT(pos==0);
364 pos = wordIter1->previous();
365 TEST_ASSERT(pos==UBRK_DONE);
366
367 status.reset();
368 UnicodeString sEmpty;
369 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
370 wordIter1->getUText(gut2.getAlias(), status);
371 TEST_ASSERT_SUCCESS(status);
372 status.reset();
373 }
374
375
TestIteration()376 void RBBIAPITest::TestIteration()
377 {
378 // This test just verifies that the API is present.
379 // Testing for correct operation of the break rules happens elsewhere.
380
381 UErrorCode status=U_ZERO_ERROR;
382 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
383 if (U_FAILURE(status) || bi == NULL) {
384 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
385 }
386 delete bi;
387
388 status=U_ZERO_ERROR;
389 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
390 if (U_FAILURE(status) || bi == NULL) {
391 errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status));
392 }
393 delete bi;
394
395 status=U_ZERO_ERROR;
396 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
397 if (U_FAILURE(status) || bi == NULL) {
398 errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status));
399 }
400 delete bi;
401
402 status=U_ZERO_ERROR;
403 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
404 if (U_FAILURE(status) || bi == NULL) {
405 errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status));
406 }
407 delete bi;
408
409 status=U_ZERO_ERROR;
410 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
411 if (U_FAILURE(status) || bi == NULL) {
412 errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status));
413 }
414 delete bi;
415
416 status=U_ZERO_ERROR;
417 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
418 if (U_FAILURE(status) || bi == NULL) {
419 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
420 return; // Skip the rest of these tests.
421 }
422
423
424 UnicodeString testString="0123456789";
425 bi->setText(testString);
426
427 int32_t i;
428 i = bi->first();
429 if (i != 0) {
430 errln("%s:%d Incorrect value from bi->first(). Expected 0, got %d.", __FILE__, __LINE__, i);
431 }
432
433 i = bi->last();
434 if (i != 10) {
435 errln("%s:%d Incorrect value from bi->last(). Expected 10, got %d", __FILE__, __LINE__, i);
436 }
437
438 //
439 // Previous
440 //
441 bi->last();
442 i = bi->previous();
443 if (i != 9) {
444 errln("%s:%d Incorrect value from bi->last(). Expected 9, got %d", __FILE__, __LINE__, i);
445 }
446
447
448 bi->first();
449 i = bi->previous();
450 if (i != BreakIterator::DONE) {
451 errln("%s:%d Incorrect value from bi->previous(). Expected DONE, got %d", __FILE__, __LINE__, i);
452 }
453
454 //
455 // next()
456 //
457 bi->first();
458 i = bi->next();
459 if (i != 1) {
460 errln("%s:%d Incorrect value from bi->next(). Expected 1, got %d", __FILE__, __LINE__, i);
461 }
462
463 bi->last();
464 i = bi->next();
465 if (i != BreakIterator::DONE) {
466 errln("%s:%d Incorrect value from bi->next(). Expected DONE, got %d", __FILE__, __LINE__, i);
467 }
468
469
470 //
471 // current()
472 //
473 bi->first();
474 i = bi->current();
475 if (i != 0) {
476 errln("%s:%d Incorrect value from bi->current(). Expected 0, got %d", __FILE__, __LINE__, i);
477 }
478
479 bi->next();
480 i = bi->current();
481 if (i != 1) {
482 errln("%s:%d Incorrect value from bi->current(). Expected 1, got %d", __FILE__, __LINE__, i);
483 }
484
485 bi->last();
486 bi->next();
487 i = bi->current();
488 if (i != 10) {
489 errln("%s:%d Incorrect value from bi->current(). Expected 10, got %d", __FILE__, __LINE__, i);
490 }
491
492 bi->first();
493 bi->previous();
494 i = bi->current();
495 if (i != 0) {
496 errln("%s:%d Incorrect value from bi->current(). Expected 0, got %d", __FILE__, __LINE__, i);
497 }
498
499
500 //
501 // Following()
502 //
503 i = bi->following(4);
504 if (i != 5) {
505 errln("%s:%d Incorrect value from bi->following(). Expected 5, got %d", __FILE__, __LINE__, i);
506 }
507
508 i = bi->following(9);
509 if (i != 10) {
510 errln("%s:%d Incorrect value from bi->following(). Expected 10, got %d", __FILE__, __LINE__, i);
511 }
512
513 i = bi->following(10);
514 if (i != BreakIterator::DONE) {
515 errln("%s:%d Incorrect value from bi->following(). Expected DONE, got %d", __FILE__, __LINE__, i);
516 }
517
518
519 //
520 // Preceding
521 //
522 i = bi->preceding(4);
523 if (i != 3) {
524 errln("%s:%d Incorrect value from bi->preceding(). Expected 3, got %d", __FILE__, __LINE__, i);
525 }
526
527 i = bi->preceding(10);
528 if (i != 9) {
529 errln("%s:%d Incorrect value from bi->preceding(). Expected 9, got %d", __FILE__, __LINE__, i);
530 }
531
532 i = bi->preceding(1);
533 if (i != 0) {
534 errln("%s:%d Incorrect value from bi->preceding(). Expected 0, got %d", __FILE__, __LINE__, i);
535 }
536
537 i = bi->preceding(0);
538 if (i != BreakIterator::DONE) {
539 errln("%s:%d Incorrect value from bi->preceding(). Expected DONE, got %d", __FILE__, __LINE__, i);
540 }
541
542
543 //
544 // isBoundary()
545 //
546 bi->first();
547 if (bi->isBoundary(3) != TRUE) {
548 errln("%s:%d Incorrect value from bi->isBoudary(). Expected TRUE, got FALSE", __FILE__, __LINE__, i);
549 }
550 i = bi->current();
551 if (i != 3) {
552 errln("%s:%d Incorrect value from bi->current(). Expected 3, got %d", __FILE__, __LINE__, i);
553 }
554
555
556 if (bi->isBoundary(11) != FALSE) {
557 errln("%s:%d Incorrect value from bi->isBoudary(). Expected FALSE, got TRUE", __FILE__, __LINE__, i);
558 }
559 i = bi->current();
560 if (i != 10) {
561 errln("%s:%d Incorrect value from bi->current(). Expected 10, got %d", __FILE__, __LINE__, i);
562 }
563
564 //
565 // next(n)
566 //
567 bi->first();
568 i = bi->next(4);
569 if (i != 4) {
570 errln("%s:%d Incorrect value from bi->next(). Expected 4, got %d", __FILE__, __LINE__, i);
571 }
572
573 i = bi->next(6);
574 if (i != 10) {
575 errln("%s:%d Incorrect value from bi->next(). Expected 10, got %d", __FILE__, __LINE__, i);
576 }
577
578 bi->first();
579 i = bi->next(11);
580 if (i != BreakIterator::DONE) {
581 errln("%s:%d Incorrect value from bi->next(). Expected BreakIterator::DONE, got %d", __FILE__, __LINE__, i);
582 }
583
584 delete bi;
585
586 }
587
588
589
590
591
592
TestBuilder()593 void RBBIAPITest::TestBuilder() {
594 UnicodeString rulesString1 = "$Letters = [:L:];\n"
595 "$Numbers = [:N:];\n"
596 "$Letters+;\n"
597 "$Numbers+;\n"
598 "[^$Letters $Numbers];\n"
599 "!.*;\n";
600 UnicodeString testString1 = "abc123..abc";
601 // 01234567890
602 int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
603 UErrorCode status=U_ZERO_ERROR;
604 UParseError parseError;
605
606 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
607 if(U_FAILURE(status)) {
608 dataerrln("Fail : in construction - %s", u_errorName(status));
609 } else {
610 bi->setText(testString1);
611 doBoundaryTest(*bi, testString1, bounds1);
612 }
613 delete bi;
614 }
615
616
617 //
618 // TestQuoteGrouping
619 // Single quotes within rules imply a grouping, so that a modifier
620 // following the quoted text (* or +) applies to all of the quoted chars.
621 //
TestQuoteGrouping()622 void RBBIAPITest::TestQuoteGrouping() {
623 UnicodeString rulesString1 = "#Here comes the rule...\n"
624 "'$@!'*;\n" // (\$\@\!)*
625 ".;\n";
626
627 UnicodeString testString1 = "$@!$@!X$@!!X";
628 // 0123456789012
629 int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
630 UErrorCode status=U_ZERO_ERROR;
631 UParseError parseError;
632
633 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
634 if(U_FAILURE(status)) {
635 dataerrln("Fail : in construction - %s", u_errorName(status));
636 } else {
637 bi->setText(testString1);
638 doBoundaryTest(*bi, testString1, bounds1);
639 }
640 delete bi;
641 }
642
643 //
644 // TestRuleStatus
645 // Test word break rule status constants.
646 //
TestRuleStatus()647 void RBBIAPITest::TestRuleStatus() {
648 UChar str[30];
649 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
650 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
651 u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
652 // 012345678901234567 8 9 0
653 // Katakana
654 str, 30);
655 UnicodeString testString1(str);
656 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
657 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,
658 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,
659 UBRK_WORD_IDEO, UBRK_WORD_NONE};
660
661 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
662 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
663 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
664
665 UErrorCode status=U_ZERO_ERROR;
666
667 BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
668 if(U_FAILURE(status)) {
669 errcheckln(status, "%s:%d Fail in construction - %s", __FILE__, __LINE__, u_errorName(status));
670 } else {
671 bi->setText(testString1);
672 // First test that the breaks are in the right spots.
673 doBoundaryTest(*bi, testString1, bounds1);
674
675 // Then go back and check tag values
676 int32_t i = 0;
677 int32_t pos, tag;
678 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
679 if (pos != bounds1[i]) {
680 errln("%s:%d FAIL: unexpected word break at postion %d", __FILE__, __LINE__, pos);
681 break;
682 }
683 tag = bi->getRuleStatus();
684 if (tag < tag_lo[i] || tag >= tag_hi[i]) {
685 errln("%s:%d FAIL: incorrect tag value %d at position %d", __FILE__, __LINE__, tag, pos);
686 break;
687 }
688
689 // Check that we get the same tag values from getRuleStatusVec()
690 int32_t vec[10];
691 int t = bi->getRuleStatusVec(vec, 10, status);
692 TEST_ASSERT_SUCCESS(status);
693 TEST_ASSERT(t==1);
694 TEST_ASSERT(vec[0] == tag);
695 }
696 }
697 delete bi;
698
699 // Now test line break status. This test mostly is to confirm that the status constants
700 // are correctly declared in the header.
701 testString1 = "test line. \n";
702 // break type s s h
703
704 bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
705 if(U_FAILURE(status)) {
706 errcheckln(status, "%s:%d failed to create line break iterator. - %s", __FILE__, __LINE__, u_errorName(status));
707 } else {
708 int32_t i = 0;
709 int32_t pos, tag;
710 UBool success;
711
712 bi->setText(testString1);
713 pos = bi->current();
714 tag = bi->getRuleStatus();
715 for (i=0; i<3; i++) {
716 switch (i) {
717 case 0:
718 success = pos==0 && tag==UBRK_LINE_SOFT; break;
719 case 1:
720 success = pos==5 && tag==UBRK_LINE_SOFT; break;
721 case 2:
722 success = pos==12 && tag==UBRK_LINE_HARD; break;
723 default:
724 success = FALSE; break;
725 }
726 if (success == FALSE) {
727 errln("%s:%d: incorrect line break status or position. i=%d, pos=%d, tag=%d",
728 __FILE__, __LINE__, i, pos, tag);
729 break;
730 }
731 pos = bi->next();
732 tag = bi->getRuleStatus();
733 }
734 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
735 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
736 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
737 errln("%s:%d UBRK_LINE_* constants from header are inconsistent.", __FILE__, __LINE__);
738 }
739 }
740 delete bi;
741
742 }
743
744
745 //
746 // TestRuleStatusVec
747 // Test the vector form of break rule status.
748 //
TestRuleStatusVec()749 void RBBIAPITest::TestRuleStatusVec() {
750 UnicodeString rulesString( "[A-N]{100}; \n"
751 "[a-w]{200}; \n"
752 "[\\p{L}]{300}; \n"
753 "[\\p{N}]{400}; \n"
754 "[0-5]{500}; \n"
755 "!.*;\n", -1, US_INV);
756 UnicodeString testString1 = "Aapz5?";
757 int32_t statusVals[10];
758 int32_t numStatuses;
759 int32_t pos;
760
761 UErrorCode status=U_ZERO_ERROR;
762 UParseError parseError;
763
764 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
765 if (U_FAILURE(status)) {
766 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
767 } else {
768 bi->setText(testString1);
769
770 // A
771 pos = bi->next();
772 TEST_ASSERT(pos==1);
773 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
774 TEST_ASSERT_SUCCESS(status);
775 TEST_ASSERT(numStatuses == 2);
776 TEST_ASSERT(statusVals[0] == 100);
777 TEST_ASSERT(statusVals[1] == 300);
778
779 // a
780 pos = bi->next();
781 TEST_ASSERT(pos==2);
782 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
783 TEST_ASSERT_SUCCESS(status);
784 TEST_ASSERT(numStatuses == 2);
785 TEST_ASSERT(statusVals[0] == 200);
786 TEST_ASSERT(statusVals[1] == 300);
787
788 // p
789 pos = bi->next();
790 TEST_ASSERT(pos==3);
791 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
792 TEST_ASSERT_SUCCESS(status);
793 TEST_ASSERT(numStatuses == 2);
794 TEST_ASSERT(statusVals[0] == 200);
795 TEST_ASSERT(statusVals[1] == 300);
796
797 // z
798 pos = bi->next();
799 TEST_ASSERT(pos==4);
800 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
801 TEST_ASSERT_SUCCESS(status);
802 TEST_ASSERT(numStatuses == 1);
803 TEST_ASSERT(statusVals[0] == 300);
804
805 // 5
806 pos = bi->next();
807 TEST_ASSERT(pos==5);
808 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
809 TEST_ASSERT_SUCCESS(status);
810 TEST_ASSERT(numStatuses == 2);
811 TEST_ASSERT(statusVals[0] == 400);
812 TEST_ASSERT(statusVals[1] == 500);
813
814 // ?
815 pos = bi->next();
816 TEST_ASSERT(pos==6);
817 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
818 TEST_ASSERT_SUCCESS(status);
819 TEST_ASSERT(numStatuses == 1);
820 TEST_ASSERT(statusVals[0] == 0);
821
822 //
823 // Check buffer overflow error handling. Char == A
824 //
825 bi->first();
826 pos = bi->next();
827 TEST_ASSERT(pos==1);
828 memset(statusVals, -1, sizeof(statusVals));
829 numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
830 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
831 TEST_ASSERT(numStatuses == 2);
832 TEST_ASSERT(statusVals[0] == -1);
833
834 status = U_ZERO_ERROR;
835 memset(statusVals, -1, sizeof(statusVals));
836 numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
837 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
838 TEST_ASSERT(numStatuses == 2);
839 TEST_ASSERT(statusVals[0] == 100);
840 TEST_ASSERT(statusVals[1] == -1);
841
842 status = U_ZERO_ERROR;
843 memset(statusVals, -1, sizeof(statusVals));
844 numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
845 TEST_ASSERT_SUCCESS(status);
846 TEST_ASSERT(numStatuses == 2);
847 TEST_ASSERT(statusVals[0] == 100);
848 TEST_ASSERT(statusVals[1] == 300);
849 TEST_ASSERT(statusVals[2] == -1);
850 }
851 delete bi;
852
853 }
854
855 //
856 // Bug 2190 Regression test. Builder crash on rule consisting of only a
857 // $variable reference
TestBug2190()858 void RBBIAPITest::TestBug2190() {
859 UnicodeString rulesString1 = "$aaa = abcd;\n"
860 "$bbb = $aaa;\n"
861 "$bbb;\n";
862 UnicodeString testString1 = "abcdabcd";
863 // 01234567890
864 int32_t bounds1[] = {0, 4, 8};
865 UErrorCode status=U_ZERO_ERROR;
866 UParseError parseError;
867
868 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
869 if(U_FAILURE(status)) {
870 dataerrln("Fail : in construction - %s", u_errorName(status));
871 } else {
872 bi->setText(testString1);
873 doBoundaryTest(*bi, testString1, bounds1);
874 }
875 delete bi;
876 }
877
878
TestRegistration()879 void RBBIAPITest::TestRegistration() {
880 #if !UCONFIG_NO_SERVICE
881 UErrorCode status = U_ZERO_ERROR;
882 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
883 // ok to not delete these if we exit because of error?
884 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
885 BreakIterator* root_word = BreakIterator::createWordInstance("", status);
886 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
887
888 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
889 dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
890
891 delete ja_word;
892 delete ja_char;
893 delete root_word;
894 delete root_char;
895
896 return;
897 }
898
899 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
900 {
901 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
902 if (ja_word && *ja_word == *root_word) {
903 errln("japan not different from root");
904 }
905 #endif
906 }
907
908 {
909 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
910 UBool fail = TRUE;
911 if(result){
912 fail = *result != *ja_word;
913 }
914 delete result;
915 if (fail) {
916 errln("bad result for xx_XX/word");
917 }
918 }
919
920 {
921 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
922 UBool fail = TRUE;
923 if(result){
924 fail = *result != *ja_char;
925 }
926 delete result;
927 if (fail) {
928 errln("bad result for ja_JP/char");
929 }
930 }
931
932 {
933 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
934 UBool fail = TRUE;
935 if(result){
936 fail = *result != *root_char;
937 }
938 delete result;
939 if (fail) {
940 errln("bad result for xx_XX/char");
941 }
942 }
943
944 {
945 StringEnumeration* avail = BreakIterator::getAvailableLocales();
946 UBool found = FALSE;
947 const UnicodeString* p;
948 while ((p = avail->snext(status))) {
949 if (p->compare("xx") == 0) {
950 found = TRUE;
951 break;
952 }
953 }
954 delete avail;
955 if (!found) {
956 errln("did not find test locale");
957 }
958 }
959
960 {
961 UBool unreg = BreakIterator::unregister(key, status);
962 if (!unreg) {
963 errln("unable to unregister");
964 }
965 }
966
967 {
968 BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
969 BreakIterator* root = BreakIterator::createWordInstance("", status);
970 UBool fail = TRUE;
971 if(root){
972 fail = *root != *result;
973 }
974 delete root;
975 delete result;
976 if (fail) {
977 errln("did not get root break");
978 }
979 }
980
981 {
982 StringEnumeration* avail = BreakIterator::getAvailableLocales();
983 UBool found = FALSE;
984 const UnicodeString* p;
985 while ((p = avail->snext(status))) {
986 if (p->compare("xx") == 0) {
987 found = TRUE;
988 break;
989 }
990 }
991 delete avail;
992 if (found) {
993 errln("found test locale");
994 }
995 }
996
997 {
998 int32_t count;
999 UBool foundLocale = FALSE;
1000 const Locale *avail = BreakIterator::getAvailableLocales(count);
1001 for (int i=0; i<count; i++) {
1002 if (avail[i] == Locale::getEnglish()) {
1003 foundLocale = TRUE;
1004 break;
1005 }
1006 }
1007 if (foundLocale == FALSE) {
1008 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1009 }
1010 }
1011
1012
1013 // ja_word was adopted by factory
1014 delete ja_char;
1015 delete root_word;
1016 delete root_char;
1017 #endif
1018 }
1019
RoundtripRule(const char * dataFile)1020 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1021 UErrorCode status = U_ZERO_ERROR;
1022 UParseError parseError;
1023 parseError.line = 0;
1024 parseError.offset = 0;
1025 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1026 uint32_t length;
1027 const UChar *builtSource;
1028 const uint8_t *rbbiRules;
1029 const uint8_t *builtRules;
1030
1031 if (U_FAILURE(status)) {
1032 errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status));
1033 return;
1034 }
1035
1036 builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1037 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1038 LocalPointer<RuleBasedBreakIterator> brkItr (new RuleBasedBreakIterator(builtSource, parseError, status));
1039 if (U_FAILURE(status)) {
1040 errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
1041 __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset);
1042 errln(UnicodeString(builtSource));
1043 return;
1044 };
1045 rbbiRules = brkItr->getBinaryRules(length);
1046 logln("Comparing \"%s\" len=%d", dataFile, length);
1047 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1048 errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile);
1049 return;
1050 }
1051 }
1052
TestRoundtripRules()1053 void RBBIAPITest::TestRoundtripRules() {
1054 RoundtripRule("word");
1055 RoundtripRule("title");
1056 RoundtripRule("sent");
1057 RoundtripRule("line");
1058 RoundtripRule("char");
1059 if (!quick) {
1060 RoundtripRule("word_POSIX");
1061 }
1062 }
1063
1064
1065 // Check getBinaryRules() and construction of a break iterator from those rules.
1066
TestGetBinaryRules()1067 void RBBIAPITest::TestGetBinaryRules() {
1068 UErrorCode status=U_ZERO_ERROR;
1069 LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
1070 if (U_FAILURE(status)) {
1071 dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status));
1072 return;
1073 }
1074 RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
1075 if (rbbi == NULL) {
1076 dataerrln("FAIL: RuleBasedBreakIterator is NULL");
1077 return;
1078 }
1079
1080 // Check that the new line break iterator is nominally functional.
1081 UnicodeString helloWorld("Hello, World!");
1082 rbbi->setText(helloWorld);
1083 int n = 0;
1084 while (bi->next() != UBRK_DONE) {
1085 ++n;
1086 }
1087 TEST_ASSERT(n == 2);
1088
1089 // Extract the binary rules as a uint8_t blob.
1090 uint32_t ruleLength;
1091 const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
1092 TEST_ASSERT(ruleLength > 0);
1093 TEST_ASSERT(binRules != NULL);
1094
1095 // Clone the binary rules, and create a break iterator from that.
1096 // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
1097 uint8_t *clonedRules = new uint8_t[ruleLength];
1098 memcpy(clonedRules, binRules, ruleLength);
1099 RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
1100 TEST_ASSERT_SUCCESS(status);
1101
1102 // Check that the cloned line break iterator is nominally alive.
1103 clonedBI.setText(helloWorld);
1104 n = 0;
1105 while (clonedBI.next() != UBRK_DONE) {
1106 ++n;
1107 }
1108 TEST_ASSERT(n == 2);
1109
1110 delete[] clonedRules;
1111 }
1112
1113
TestRefreshInputText()1114 void RBBIAPITest::TestRefreshInputText() {
1115 /*
1116 * RefreshInput changes out the input of a Break Iterator without
1117 * changing anything else in the iterator's state. Used with Java JNI,
1118 * when Java moves the underlying string storage. This test
1119 * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1120 * The right set of boundaries should still be found.
1121 */
1122 UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
1123 UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
1124 UErrorCode status = U_ZERO_ERROR;
1125 UText ut1 = UTEXT_INITIALIZER;
1126 UText ut2 = UTEXT_INITIALIZER;
1127 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
1128 TEST_ASSERT_SUCCESS(status);
1129
1130 utext_openUChars(&ut1, testStr, -1, &status);
1131 TEST_ASSERT_SUCCESS(status);
1132
1133 if (U_SUCCESS(status)) {
1134 bi->setText(&ut1, status);
1135 TEST_ASSERT_SUCCESS(status);
1136
1137 /* Line boundaries will occur before each letter in the original string */
1138 TEST_ASSERT(1 == bi->next());
1139 TEST_ASSERT(3 == bi->next());
1140
1141 /* Move the string, kill the original string. */
1142 u_strcpy(movedStr, testStr);
1143 u_memset(testStr, 0x20, u_strlen(testStr));
1144 utext_openUChars(&ut2, movedStr, -1, &status);
1145 TEST_ASSERT_SUCCESS(status);
1146 RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1147 TEST_ASSERT_SUCCESS(status);
1148 TEST_ASSERT(bi == returnedBI);
1149
1150 /* Find the following matches, now working in the moved string. */
1151 TEST_ASSERT(5 == bi->next());
1152 TEST_ASSERT(7 == bi->next());
1153 TEST_ASSERT(8 == bi->next());
1154 TEST_ASSERT(UBRK_DONE == bi->next());
1155
1156 utext_close(&ut1);
1157 utext_close(&ut2);
1158 }
1159 delete bi;
1160
1161 }
1162
1163 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
prtbrks(BreakIterator * brk,const UnicodeString & ustr,IntlTest & it)1164 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1165 static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
1166 it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1167
1168 int32_t *pos = new int32_t[ustr.length()];
1169 int32_t posCount = 0;
1170
1171 // calculate breaks up front, so we can print out
1172 // sans any debugging
1173 for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1174 pos[posCount++] = n;
1175 if(posCount>=ustr.length()) {
1176 it.errln("brk count exceeds string length!");
1177 return;
1178 }
1179 }
1180 UnicodeString out;
1181 out.append((UChar)CHSTR);
1182 int32_t prev = 0;
1183 for(int32_t i=0;i<posCount;i++) {
1184 int32_t n=pos[i];
1185 out.append(ustr.tempSubString(prev,n-prev));
1186 out.append((UChar)PILCROW);
1187 prev=n;
1188 }
1189 out.append(ustr.tempSubString(prev,ustr.length()-prev));
1190 out.append((UChar)CHEND);
1191 it.logln(out);
1192
1193 out.remove();
1194 for(int32_t i=0;i<posCount;i++) {
1195 char tmp[100];
1196 sprintf(tmp,"%d ",pos[i]);
1197 out.append(UnicodeString(tmp));
1198 }
1199 it.logln(out);
1200 delete [] pos;
1201 }
1202 #endif
1203
TestFilteredBreakIteratorBuilder()1204 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1205 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1206 UErrorCode status = U_ZERO_ERROR;
1207 LocalPointer<FilteredBreakIteratorBuilder> builder;
1208 LocalPointer<BreakIterator> baseBI;
1209 LocalPointer<BreakIterator> filteredBI;
1210 LocalPointer<BreakIterator> frenchBI;
1211
1212 const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1213 const UnicodeString ABBR_MR("Mr.");
1214 const UnicodeString ABBR_CAPT("Capt.");
1215
1216 {
1217 logln("Constructing empty builder\n");
1218 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1219 TEST_ASSERT_SUCCESS(status);
1220
1221 logln("Constructing base BI\n");
1222 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1223 TEST_ASSERT_SUCCESS(status);
1224
1225 logln("Building new BI\n");
1226 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1227 TEST_ASSERT_SUCCESS(status);
1228
1229 if (U_SUCCESS(status)) {
1230 logln("Testing:");
1231 filteredBI->setText(text);
1232 TEST_ASSERT(20 == filteredBI->next()); // Mr.
1233 TEST_ASSERT(84 == filteredBI->next()); // recovered.
1234 TEST_ASSERT(90 == filteredBI->next()); // Capt.
1235 TEST_ASSERT(181 == filteredBI->next()); // Mr.
1236 TEST_ASSERT(278 == filteredBI->next()); // charge.
1237 filteredBI->first();
1238 prtbrks(filteredBI.getAlias(), text, *this);
1239 }
1240 }
1241
1242 {
1243 logln("Constructing empty builder\n");
1244 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1245 TEST_ASSERT_SUCCESS(status);
1246
1247 if (U_SUCCESS(status)) {
1248 logln("Adding Mr. as an exception\n");
1249 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1250 TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
1251 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
1252 TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1253 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1254 TEST_ASSERT_SUCCESS(status);
1255
1256 logln("Constructing base BI\n");
1257 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1258 TEST_ASSERT_SUCCESS(status);
1259
1260 logln("Building new BI\n");
1261 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1262 TEST_ASSERT_SUCCESS(status);
1263
1264 logln("Testing:");
1265 filteredBI->setText(text);
1266 TEST_ASSERT(84 == filteredBI->next());
1267 TEST_ASSERT(90 == filteredBI->next());// Capt.
1268 TEST_ASSERT(278 == filteredBI->next());
1269 filteredBI->first();
1270 prtbrks(filteredBI.getAlias(), text, *this);
1271 }
1272 }
1273
1274
1275 {
1276 logln("Constructing empty builder\n");
1277 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1278 TEST_ASSERT_SUCCESS(status);
1279
1280 if (U_SUCCESS(status)) {
1281 logln("Adding Mr. and Capt as an exception\n");
1282 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1283 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
1284 TEST_ASSERT_SUCCESS(status);
1285
1286 logln("Constructing base BI\n");
1287 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1288 TEST_ASSERT_SUCCESS(status);
1289
1290 logln("Building new BI\n");
1291 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1292 TEST_ASSERT_SUCCESS(status);
1293
1294 logln("Testing:");
1295 filteredBI->setText(text);
1296 TEST_ASSERT(84 == filteredBI->next());
1297 TEST_ASSERT(278 == filteredBI->next());
1298 filteredBI->first();
1299 prtbrks(filteredBI.getAlias(), text, *this);
1300 }
1301 }
1302
1303
1304 {
1305 logln("Constructing English builder\n");
1306 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1307 TEST_ASSERT_SUCCESS(status);
1308
1309 logln("Constructing base BI\n");
1310 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1311 TEST_ASSERT_SUCCESS(status);
1312
1313 if (U_SUCCESS(status)) {
1314 logln("unsuppressing 'Capt'");
1315 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
1316
1317 logln("Building new BI\n");
1318 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1319 TEST_ASSERT_SUCCESS(status);
1320
1321 if(filteredBI.isValid()) {
1322 logln("Testing:");
1323 filteredBI->setText(text);
1324 TEST_ASSERT(84 == filteredBI->next());
1325 TEST_ASSERT(90 == filteredBI->next());
1326 TEST_ASSERT(278 == filteredBI->next());
1327 filteredBI->first();
1328 prtbrks(filteredBI.getAlias(), text, *this);
1329 }
1330 }
1331 }
1332
1333
1334 {
1335 logln("Constructing English builder\n");
1336 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1337 TEST_ASSERT_SUCCESS(status);
1338
1339 logln("Constructing base BI\n");
1340 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1341 TEST_ASSERT_SUCCESS(status);
1342
1343 if (U_SUCCESS(status)) {
1344 logln("Building new BI\n");
1345 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1346 TEST_ASSERT_SUCCESS(status);
1347
1348 if(filteredBI.isValid()) {
1349 logln("Testing:");
1350 filteredBI->setText(text);
1351 TEST_ASSERT(84 == filteredBI->next());
1352 TEST_ASSERT(278 == filteredBI->next());
1353 filteredBI->first();
1354 prtbrks(filteredBI.getAlias(), text, *this);
1355 }
1356 }
1357 }
1358
1359 // reenable once french is in
1360 {
1361 logln("Constructing French builder");
1362 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
1363 TEST_ASSERT_SUCCESS(status);
1364
1365 logln("Constructing base BI\n");
1366 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
1367 TEST_ASSERT_SUCCESS(status);
1368
1369 if (U_SUCCESS(status)) {
1370 logln("Building new BI\n");
1371 frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
1372 TEST_ASSERT_SUCCESS(status);
1373 }
1374
1375 if(frenchBI.isValid()) {
1376 logln("Testing:");
1377 UnicodeString frText("C'est MM. Duval.");
1378 frenchBI->setText(frText);
1379 TEST_ASSERT(16 == frenchBI->next());
1380 TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
1381 frenchBI->first();
1382 prtbrks(frenchBI.getAlias(), frText, *this);
1383 logln("Testing against English:");
1384 filteredBI->setText(frText);
1385 TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
1386 TEST_ASSERT(16 == filteredBI->next());
1387 TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
1388 filteredBI->first();
1389 prtbrks(filteredBI.getAlias(), frText, *this);
1390
1391 // Verify ==
1392 TEST_ASSERT_TRUE(*frenchBI == *frenchBI);
1393 TEST_ASSERT_TRUE(*filteredBI != *frenchBI);
1394 TEST_ASSERT_TRUE(*frenchBI != *filteredBI);
1395 } else {
1396 dataerrln("French BI: not valid.");
1397 }
1398 }
1399
1400 #else
1401 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1402 #endif
1403 }
1404
1405 //---------------------------------------------
1406 // runIndexedTest
1407 //---------------------------------------------
1408
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1409 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1410 {
1411 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1412 TESTCASE_AUTO_BEGIN;
1413 #if !UCONFIG_NO_FILE_IO
1414 TESTCASE_AUTO(TestCloneEquals);
1415 TESTCASE_AUTO(TestgetRules);
1416 TESTCASE_AUTO(TestHashCode);
1417 TESTCASE_AUTO(TestGetSetAdoptText);
1418 TESTCASE_AUTO(TestIteration);
1419 #endif
1420 TESTCASE_AUTO(TestBuilder);
1421 TESTCASE_AUTO(TestQuoteGrouping);
1422 TESTCASE_AUTO(TestRuleStatusVec);
1423 TESTCASE_AUTO(TestBug2190);
1424 #if !UCONFIG_NO_FILE_IO
1425 TESTCASE_AUTO(TestRegistration);
1426 TESTCASE_AUTO(TestBoilerPlate);
1427 TESTCASE_AUTO(TestRuleStatus);
1428 TESTCASE_AUTO(TestRoundtripRules);
1429 TESTCASE_AUTO(TestGetBinaryRules);
1430 #endif
1431 TESTCASE_AUTO(TestRefreshInputText);
1432 #if !UCONFIG_NO_BREAK_ITERATION
1433 TESTCASE_AUTO(TestFilteredBreakIteratorBuilder);
1434 #endif
1435 TESTCASE_AUTO_END;
1436 }
1437
1438
1439 //---------------------------------------------
1440 //Internal subroutines
1441 //---------------------------------------------
1442
doBoundaryTest(BreakIterator & bi,UnicodeString & text,int32_t * boundaries)1443 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1444 logln((UnicodeString)"testIsBoundary():");
1445 int32_t p = 0;
1446 UBool isB;
1447 for (int32_t i = 0; i < text.length(); i++) {
1448 isB = bi.isBoundary(i);
1449 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1450
1451 if (i == boundaries[p]) {
1452 if (!isB)
1453 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1454 p++;
1455 }
1456 else {
1457 if (isB)
1458 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1459 }
1460 }
1461 }
doTest(UnicodeString & testString,int32_t start,int32_t gotoffset,int32_t expectedOffset,const char * expectedString)1462 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1463 UnicodeString selected;
1464 UnicodeString expected=CharsToUnicodeString(expectedString);
1465
1466 if(gotoffset != expectedOffset)
1467 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1468 if(start <= gotoffset){
1469 testString.extractBetween(start, gotoffset, selected);
1470 }
1471 else{
1472 testString.extractBetween(gotoffset, start, selected);
1473 }
1474 if(selected.compare(expected) != 0)
1475 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1476 else
1477 logln(prettify("****selected \"" + selected + "\""));
1478 }
1479
1480 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1481