1 /*
2 *******************************************************************************
3 * Copyright (C) 2010-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: ucharstrietest.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2010nov16
12 * created by: Markus W. Scherer
13 */
14
15 #include <string.h>
16
17 #include "unicode/utypes.h"
18 #include "unicode/appendable.h"
19 #include "unicode/localpointer.h"
20 #include "unicode/ucharstrie.h"
21 #include "unicode/ucharstriebuilder.h"
22 #include "unicode/uniset.h"
23 #include "unicode/unistr.h"
24 #include "intltest.h"
25
26 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
27
28 struct StringAndValue {
29 const char *s;
30 int32_t value;
31 };
32
33 class UCharsTrieTest : public IntlTest {
34 public:
35 UCharsTrieTest();
36 virtual ~UCharsTrieTest();
37
38 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
39 void TestBuilder();
40 void TestEmpty();
41 void Test_a();
42 void Test_a_ab();
43 void TestShortestBranch();
44 void TestBranches();
45 void TestLongSequence();
46 void TestLongBranch();
47 void TestValuesForState();
48 void TestCompact();
49 void TestFirstForCodePoint();
50 void TestNextForCodePoint();
51
52 UCharsTrie *buildLargeTrie(int32_t numUniqueFirst);
53 void TestLargeTrie();
54
55 UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
56 void TestHasUniqueValue();
57 void TestGetNextUChars();
58 void TestIteratorFromBranch();
59 void TestIteratorFromLinearMatch();
60 void TestTruncatingIteratorFromRoot();
61 void TestTruncatingIteratorFromLinearMatchShort();
62 void TestTruncatingIteratorFromLinearMatchLong();
63 void TestIteratorFromUChars();
64
65 void checkData(const StringAndValue data[], int32_t dataLength);
66 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
67 UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
68 UStringTrieBuildOption buildOption);
69 void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
70 void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
71 void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
72 void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
73 void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
74 void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
75
76 private:
77 UCharsTrieBuilder *builder_;
78 };
79
createUCharsTrieTest()80 extern IntlTest *createUCharsTrieTest() {
81 return new UCharsTrieTest();
82 }
83
UCharsTrieTest()84 UCharsTrieTest::UCharsTrieTest() : builder_(NULL) {
85 IcuTestErrorCode errorCode(*this, "UCharsTrieTest()");
86 builder_=new UCharsTrieBuilder(errorCode);
87 }
88
~UCharsTrieTest()89 UCharsTrieTest::~UCharsTrieTest() {
90 delete builder_;
91 }
92
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)93 void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
94 if(exec) {
95 logln("TestSuite UCharsTrieTest: ");
96 }
97 TESTCASE_AUTO_BEGIN;
98 TESTCASE_AUTO(TestBuilder);
99 TESTCASE_AUTO(TestEmpty);
100 TESTCASE_AUTO(Test_a);
101 TESTCASE_AUTO(Test_a_ab);
102 TESTCASE_AUTO(TestShortestBranch);
103 TESTCASE_AUTO(TestBranches);
104 TESTCASE_AUTO(TestLongSequence);
105 TESTCASE_AUTO(TestLongBranch);
106 TESTCASE_AUTO(TestValuesForState);
107 TESTCASE_AUTO(TestCompact);
108 TESTCASE_AUTO(TestFirstForCodePoint);
109 TESTCASE_AUTO(TestNextForCodePoint);
110 TESTCASE_AUTO(TestLargeTrie);
111 TESTCASE_AUTO(TestHasUniqueValue);
112 TESTCASE_AUTO(TestGetNextUChars);
113 TESTCASE_AUTO(TestIteratorFromBranch);
114 TESTCASE_AUTO(TestIteratorFromLinearMatch);
115 TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
116 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
117 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
118 TESTCASE_AUTO(TestIteratorFromUChars);
119 TESTCASE_AUTO_END;
120 }
121
TestBuilder()122 void UCharsTrieTest::TestBuilder() {
123 IcuTestErrorCode errorCode(*this, "TestBuilder()");
124 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
125 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
126 errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
127 return;
128 }
129 // TODO: remove .build(...) once add() checks for duplicates.
130 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
131 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
132 errln("UCharsTrieBuilder.add() did not detect duplicates");
133 return;
134 }
135 }
136
TestEmpty()137 void UCharsTrieTest::TestEmpty() {
138 static const StringAndValue data[]={
139 { "", 0 }
140 };
141 checkData(data, LENGTHOF(data));
142 }
143
Test_a()144 void UCharsTrieTest::Test_a() {
145 static const StringAndValue data[]={
146 { "a", 1 }
147 };
148 checkData(data, LENGTHOF(data));
149 }
150
Test_a_ab()151 void UCharsTrieTest::Test_a_ab() {
152 static const StringAndValue data[]={
153 { "a", 1 },
154 { "ab", 100 }
155 };
156 checkData(data, LENGTHOF(data));
157 }
158
TestShortestBranch()159 void UCharsTrieTest::TestShortestBranch() {
160 static const StringAndValue data[]={
161 { "a", 1000 },
162 { "b", 2000 }
163 };
164 checkData(data, LENGTHOF(data));
165 }
166
TestBranches()167 void UCharsTrieTest::TestBranches() {
168 static const StringAndValue data[]={
169 { "a", 0x10 },
170 { "cc", 0x40 },
171 { "e", 0x100 },
172 { "ggg", 0x400 },
173 { "i", 0x1000 },
174 { "kkkk", 0x4000 },
175 { "n", 0x10000 },
176 { "ppppp", 0x40000 },
177 { "r", 0x100000 },
178 { "sss", 0x200000 },
179 { "t", 0x400000 },
180 { "uu", 0x800000 },
181 { "vv", 0x7fffffff },
182 { "zz", (int32_t)0x80000000 }
183 };
184 for(int32_t length=2; length<=LENGTHOF(data); ++length) {
185 infoln("TestBranches length=%d", (int)length);
186 checkData(data, length);
187 }
188 }
189
TestLongSequence()190 void UCharsTrieTest::TestLongSequence() {
191 static const StringAndValue data[]={
192 { "a", -1 },
193 // sequence of linear-match nodes
194 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
195 // more than 256 units
196 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
197 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
198 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
199 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
200 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
201 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
202 };
203 checkData(data, LENGTHOF(data));
204 }
205
TestLongBranch()206 void UCharsTrieTest::TestLongBranch() {
207 // Split-branch and interesting compact-integer values.
208 static const StringAndValue data[]={
209 { "a", -2 },
210 { "b", -1 },
211 { "c", 0 },
212 { "d2", 1 },
213 { "f", 0x3f },
214 { "g", 0x40 },
215 { "h", 0x41 },
216 { "j23", 0x1900 },
217 { "j24", 0x19ff },
218 { "j25", 0x1a00 },
219 { "k2", 0x1a80 },
220 { "k3", 0x1aff },
221 { "l234567890", 0x1b00 },
222 { "l234567890123", 0x1b01 },
223 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
224 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
225 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
226 { "r", 0x333333 },
227 { "s2345", 0x4444444 },
228 { "t234567890", 0x77777777 },
229 { "z", (int32_t)0x80000001 }
230 };
231 checkData(data, LENGTHOF(data));
232 }
233
TestValuesForState()234 void UCharsTrieTest::TestValuesForState() {
235 // Check that saveState() and resetToState() interact properly
236 // with next() and current().
237 static const StringAndValue data[]={
238 { "a", -1 },
239 { "ab", -2 },
240 { "abc", -3 },
241 { "abcd", -4 },
242 { "abcde", -5 },
243 { "abcdef", -6 }
244 };
245 checkData(data, LENGTHOF(data));
246 }
247
TestCompact()248 void UCharsTrieTest::TestCompact() {
249 // Duplicate trailing strings and values provide opportunities for compacting.
250 static const StringAndValue data[]={
251 { "+", 0 },
252 { "+august", 8 },
253 { "+december", 12 },
254 { "+july", 7 },
255 { "+june", 6 },
256 { "+november", 11 },
257 { "+october", 10 },
258 { "+september", 9 },
259 { "-", 0 },
260 { "-august", 8 },
261 { "-december", 12 },
262 { "-july", 7 },
263 { "-june", 6 },
264 { "-november", 11 },
265 { "-october", 10 },
266 { "-september", 9 },
267 // The l+n branch (with its sub-nodes) is a duplicate but will be written
268 // both times because each time it follows a different linear-match node.
269 { "xjuly", 7 },
270 { "xjune", 6 }
271 };
272 checkData(data, LENGTHOF(data));
273 }
274
TestFirstForCodePoint()275 void UCharsTrieTest::TestFirstForCodePoint() {
276 static const StringAndValue data[]={
277 { "a", 1 },
278 { "a\\ud800", 2 },
279 { "a\\U00010000", 3 },
280 { "\\ud840", 4 },
281 { "\\U00020000\\udbff", 5 },
282 { "\\U00020000\\U0010ffff", 6 },
283 { "\\U00020000\\U0010ffffz", 7 },
284 { "\\U00050000xy", 8 },
285 { "\\U00050000xyz", 9 }
286 };
287 checkData(data, LENGTHOF(data));
288 }
289
TestNextForCodePoint()290 void UCharsTrieTest::TestNextForCodePoint() {
291 static const StringAndValue data[]={
292 { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
293 { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
294 { "\\u4dff\\U000103ff", 99999 }
295 };
296 LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
297 if(trie.isNull()) {
298 return; // buildTrie() reported an error
299 }
300 UStringTrieResult result;
301 if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
302 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
303 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
304 (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
305 (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
306 (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
307 trie->getValue()!=2000000000
308 ) {
309 errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
310 }
311 if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
312 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
313 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
314 (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
315 trie->getValue()!=44444
316 ) {
317 errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
318 }
319 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
320 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
321 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
322 (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current() // no match for trail surrogate
323 ) {
324 errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
325 }
326 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
327 (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
328 trie->getValue()!=99999
329 ) {
330 errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
331 }
332 }
333
334 // Definitions in the anonymous namespace are invisible outside this file.
335 namespace {
336
337 // Generate (string, value) pairs.
338 // The first string (before next()) will be empty.
339 class Generator {
340 public:
Generator()341 Generator() : value(4711), num(0) {}
next()342 void next() {
343 UChar c;
344 s.truncate(0);
345 s.append(c=(UChar)(value>>16));
346 s.append((UChar)(value>>4));
347 if(value&1) {
348 s.append((UChar)value);
349 }
350 set.add(c);
351 value+=((value>>5)&0x7ff)*3+1;
352 ++num;
353 }
getString() const354 const UnicodeString &getString() const { return s; }
getValue() const355 int32_t getValue() const { return value; }
countUniqueFirstChars() const356 int32_t countUniqueFirstChars() const { return set.size(); }
getIndex() const357 int32_t getIndex() const { return num; }
358
359 private:
360 UnicodeString s;
361 UnicodeSet set;
362 int32_t value;
363 int32_t num;
364 };
365
366 } // end namespace
367
buildLargeTrie(int32_t numUniqueFirst)368 UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) {
369 IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
370 Generator gen;
371 builder_->clear();
372 while(gen.countUniqueFirstChars()<numUniqueFirst) {
373 builder_->add(gen.getString(), gen.getValue(), errorCode);
374 gen.next();
375 }
376 infoln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
377 UnicodeString trieUChars;
378 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
379 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
380 return new UCharsTrie(trieUChars.getBuffer());
381 }
382
383 // Exercise a large branch node.
TestLargeTrie()384 void UCharsTrieTest::TestLargeTrie() {
385 LocalPointer<UCharsTrie> trie(buildLargeTrie(1111));
386 if(trie.isNull()) {
387 return; // buildTrie() reported an error
388 }
389 Generator gen;
390 while(gen.countUniqueFirstChars()<1111) {
391 UnicodeString x(gen.getString());
392 int32_t value=gen.getValue();
393 if(!x.isEmpty()) {
394 if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) {
395 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
396 x[0], (long)gen.getIndex());
397 break;
398 }
399 x.remove(0, 1);
400 }
401 UStringTrieResult result=trie->next(x.getBuffer(), x.length());
402 if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) {
403 errln("next(%d chars U+%04X U+%04X)!=hasValue or "
404 "next()!=current() or getValue() wrong "
405 "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
406 break;
407 }
408 gen.next();
409 }
410 }
411
412 enum {
413 u_a=0x61,
414 u_b=0x62,
415 u_c=0x63,
416 u_j=0x6a,
417 u_n=0x6e,
418 u_r=0x72,
419 u_u=0x75,
420 u_y=0x79
421 };
422
buildMonthsTrie(UStringTrieBuildOption buildOption)423 UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
424 // All types of nodes leading to the same value,
425 // for code coverage of recursive functions.
426 // In particular, we need a lot of branches on some single level
427 // to exercise a split-branch node.
428 static const StringAndValue data[]={
429 { "august", 8 },
430 { "jan", 1 },
431 { "jan.", 1 },
432 { "jana", 1 },
433 { "janbb", 1 },
434 { "janc", 1 },
435 { "janddd", 1 },
436 { "janee", 1 },
437 { "janef", 1 },
438 { "janf", 1 },
439 { "jangg", 1 },
440 { "janh", 1 },
441 { "janiiii", 1 },
442 { "janj", 1 },
443 { "jankk", 1 },
444 { "jankl", 1 },
445 { "jankmm", 1 },
446 { "janl", 1 },
447 { "janm", 1 },
448 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
449 { "jano", 1 },
450 { "janpp", 1 },
451 { "janqqq", 1 },
452 { "janr", 1 },
453 { "januar", 1 },
454 { "january", 1 },
455 { "july", 7 },
456 { "jun", 6 },
457 { "jun.", 6 },
458 { "june", 6 }
459 };
460 return buildTrie(data, LENGTHOF(data), buildOption);
461 }
462
TestHasUniqueValue()463 void UCharsTrieTest::TestHasUniqueValue() {
464 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
465 if(trie.isNull()) {
466 return; // buildTrie() reported an error
467 }
468 int32_t uniqueValue;
469 if(trie->hasUniqueValue(uniqueValue)) {
470 errln("unique value at root");
471 }
472 trie->next(u_j);
473 trie->next(u_a);
474 trie->next(u_n);
475 // hasUniqueValue() directly after next()
476 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
477 errln("not unique value 1 after \"jan\"");
478 }
479 trie->first(u_j);
480 trie->next(u_u);
481 if(trie->hasUniqueValue(uniqueValue)) {
482 errln("unique value after \"ju\"");
483 }
484 if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
485 errln("not normal value 6 after \"jun\"");
486 }
487 // hasUniqueValue() after getValue()
488 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
489 errln("not unique value 6 after \"jun\"");
490 }
491 // hasUniqueValue() from within a linear-match node
492 trie->first(u_a);
493 trie->next(u_u);
494 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
495 errln("not unique value 8 after \"au\"");
496 }
497 }
498
TestGetNextUChars()499 void UCharsTrieTest::TestGetNextUChars() {
500 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
501 if(trie.isNull()) {
502 return; // buildTrie() reported an error
503 }
504 UnicodeString buffer;
505 UnicodeStringAppendable app(buffer);
506 int32_t count=trie->getNextUChars(app);
507 if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) {
508 errln("months getNextUChars()!=[aj] at root");
509 }
510 trie->next(u_j);
511 trie->next(u_a);
512 trie->next(u_n);
513 // getNextUChars() directly after next()
514 buffer.remove();
515 count=trie->getNextUChars(app);
516 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
517 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
518 }
519 // getNextUChars() after getValue()
520 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
521 buffer.remove();
522 count=trie->getNextUChars(app);
523 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
524 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
525 }
526 // getNextUChars() from a linear-match node
527 trie->next(u_u);
528 buffer.remove();
529 count=trie->getNextUChars(app);
530 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) {
531 errln("months getNextUChars()!=[a] after \"janu\"");
532 }
533 trie->next(u_a);
534 buffer.remove();
535 count=trie->getNextUChars(app);
536 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) {
537 errln("months getNextUChars()!=[r] after \"janua\"");
538 }
539 trie->next(u_r);
540 trie->next(u_y);
541 // getNextUChars() after a final match
542 buffer.remove();
543 count=trie->getNextUChars(app);
544 if(count!=0 || buffer.length()!=0) {
545 errln("months getNextUChars()!=[] after \"january\"");
546 }
547 }
548
TestIteratorFromBranch()549 void UCharsTrieTest::TestIteratorFromBranch() {
550 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
551 if(trie.isNull()) {
552 return; // buildTrie() reported an error
553 }
554 // Go to a branch node.
555 trie->next(u_j);
556 trie->next(u_a);
557 trie->next(u_n);
558 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
559 UCharsTrie::Iterator iter(*trie, 0, errorCode);
560 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
561 return;
562 }
563 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
564 // following "jan".
565 static const StringAndValue data[]={
566 { "", 1 },
567 { ".", 1 },
568 { "a", 1 },
569 { "bb", 1 },
570 { "c", 1 },
571 { "ddd", 1 },
572 { "ee", 1 },
573 { "ef", 1 },
574 { "f", 1 },
575 { "gg", 1 },
576 { "h", 1 },
577 { "iiii", 1 },
578 { "j", 1 },
579 { "kk", 1 },
580 { "kl", 1 },
581 { "kmm", 1 },
582 { "l", 1 },
583 { "m", 1 },
584 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
585 { "o", 1 },
586 { "pp", 1 },
587 { "qqq", 1 },
588 { "r", 1 },
589 { "uar", 1 },
590 { "uary", 1 }
591 };
592 checkIterator(iter, data, LENGTHOF(data));
593 // Reset, and we should get the same result.
594 logln("after iter.reset()");
595 checkIterator(iter.reset(), data, LENGTHOF(data));
596 }
597
TestIteratorFromLinearMatch()598 void UCharsTrieTest::TestIteratorFromLinearMatch() {
599 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
600 if(trie.isNull()) {
601 return; // buildTrie() reported an error
602 }
603 // Go into a linear-match node.
604 trie->next(u_j);
605 trie->next(u_a);
606 trie->next(u_n);
607 trie->next(u_u);
608 trie->next(u_a);
609 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
610 UCharsTrie::Iterator iter(*trie, 0, errorCode);
611 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
612 return;
613 }
614 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
615 // following "janua".
616 static const StringAndValue data[]={
617 { "r", 1 },
618 { "ry", 1 }
619 };
620 checkIterator(iter, data, LENGTHOF(data));
621 // Reset, and we should get the same result.
622 logln("after iter.reset()");
623 checkIterator(iter.reset(), data, LENGTHOF(data));
624 }
625
TestTruncatingIteratorFromRoot()626 void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
627 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
628 if(trie.isNull()) {
629 return; // buildTrie() reported an error
630 }
631 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
632 UCharsTrie::Iterator iter(*trie, 4, errorCode);
633 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
634 return;
635 }
636 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
637 // of each string, and no string duplicates from the truncation.
638 static const StringAndValue data[]={
639 { "augu", -1 },
640 { "jan", 1 },
641 { "jan.", 1 },
642 { "jana", 1 },
643 { "janb", -1 },
644 { "janc", 1 },
645 { "jand", -1 },
646 { "jane", -1 },
647 { "janf", 1 },
648 { "jang", -1 },
649 { "janh", 1 },
650 { "jani", -1 },
651 { "janj", 1 },
652 { "jank", -1 },
653 { "janl", 1 },
654 { "janm", 1 },
655 { "jann", -1 },
656 { "jano", 1 },
657 { "janp", -1 },
658 { "janq", -1 },
659 { "janr", 1 },
660 { "janu", -1 },
661 { "july", 7 },
662 { "jun", 6 },
663 { "jun.", 6 },
664 { "june", 6 }
665 };
666 checkIterator(iter, data, LENGTHOF(data));
667 // Reset, and we should get the same result.
668 logln("after iter.reset()");
669 checkIterator(iter.reset(), data, LENGTHOF(data));
670 }
671
TestTruncatingIteratorFromLinearMatchShort()672 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
673 static const StringAndValue data[]={
674 { "abcdef", 10 },
675 { "abcdepq", 200 },
676 { "abcdeyz", 3000 }
677 };
678 LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
679 if(trie.isNull()) {
680 return; // buildTrie() reported an error
681 }
682 // Go into a linear-match node.
683 trie->next(u_a);
684 trie->next(u_b);
685 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
686 // Truncate within the linear-match node.
687 UCharsTrie::Iterator iter(*trie, 2, errorCode);
688 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
689 return;
690 }
691 static const StringAndValue expected[]={
692 { "cd", -1 }
693 };
694 checkIterator(iter, expected, LENGTHOF(expected));
695 // Reset, and we should get the same result.
696 logln("after iter.reset()");
697 checkIterator(iter.reset(), expected, LENGTHOF(expected));
698 }
699
TestTruncatingIteratorFromLinearMatchLong()700 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
701 static const StringAndValue data[]={
702 { "abcdef", 10 },
703 { "abcdepq", 200 },
704 { "abcdeyz", 3000 }
705 };
706 LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
707 if(trie.isNull()) {
708 return; // buildTrie() reported an error
709 }
710 // Go into a linear-match node.
711 trie->next(u_a);
712 trie->next(u_b);
713 trie->next(u_c);
714 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
715 // Truncate after the linear-match node.
716 UCharsTrie::Iterator iter(*trie, 3, errorCode);
717 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
718 return;
719 }
720 static const StringAndValue expected[]={
721 { "def", 10 },
722 { "dep", -1 },
723 { "dey", -1 }
724 };
725 checkIterator(iter, expected, LENGTHOF(expected));
726 // Reset, and we should get the same result.
727 logln("after iter.reset()");
728 checkIterator(iter.reset(), expected, LENGTHOF(expected));
729 }
730
TestIteratorFromUChars()731 void UCharsTrieTest::TestIteratorFromUChars() {
732 static const StringAndValue data[]={
733 { "mm", 3 },
734 { "mmm", 33 },
735 { "mmnop", 333 }
736 };
737 builder_->clear();
738 IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()");
739 for(int32_t i=0; i<LENGTHOF(data); ++i) {
740 builder_->add(data[i].s, data[i].value, errorCode);
741 }
742 UnicodeString trieUChars;
743 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
744 UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
745 checkIterator(iter, data, LENGTHOF(data));
746 }
747
checkData(const StringAndValue data[],int32_t dataLength)748 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
749 logln("checkData(dataLength=%d, fast)", (int)dataLength);
750 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
751 logln("checkData(dataLength=%d, small)", (int)dataLength);
752 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
753 }
754
checkData(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)755 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
756 LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption));
757 if(trie.isNull()) {
758 return; // buildTrie() reported an error
759 }
760 checkFirst(*trie, data, dataLength);
761 checkNext(*trie, data, dataLength);
762 checkNextWithState(*trie, data, dataLength);
763 checkNextString(*trie, data, dataLength);
764 checkIterator(*trie, data, dataLength);
765 }
766
buildTrie(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)767 UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
768 UStringTrieBuildOption buildOption) {
769 IcuTestErrorCode errorCode(*this, "buildTrie()");
770 // Add the items to the trie builder in an interesting (not trivial, not random) order.
771 int32_t index, step;
772 if(dataLength&1) {
773 // Odd number of items.
774 index=dataLength/2;
775 step=2;
776 } else if((dataLength%3)!=0) {
777 // Not a multiple of 3.
778 index=dataLength/5;
779 step=3;
780 } else {
781 index=dataLength-1;
782 step=-1;
783 }
784 builder_->clear();
785 for(int32_t i=0; i<dataLength; ++i) {
786 builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(),
787 data[index].value, errorCode);
788 index=(index+step)%dataLength;
789 }
790 UnicodeString trieUChars;
791 builder_->buildUnicodeString(buildOption, trieUChars, errorCode);
792 LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode));
793 if(!errorCode.logIfFailureAndReset("add()/build()")) {
794 builder_->add("zzz", 999, errorCode);
795 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
796 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
797 }
798 }
799 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
800 UnicodeString trieUChars2;
801 builder_->buildUnicodeString(buildOption, trieUChars2, errorCode);
802 if(trieUChars.getBuffer()==trieUChars2.getBuffer()) {
803 errln("builder.buildUnicodeString() before & after build() returned same array");
804 }
805 if(errorCode.isFailure()) {
806 return NULL;
807 }
808 // Tries from either build() method should be identical but
809 // UCharsTrie does not implement equals().
810 // We just return either one.
811 if((dataLength&1)!=0) {
812 return trie.orphan();
813 } else {
814 return new UCharsTrie(trieUChars2.getBuffer());
815 }
816 }
817
checkFirst(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)818 void UCharsTrieTest::checkFirst(UCharsTrie &trie,
819 const StringAndValue data[], int32_t dataLength) {
820 for(int32_t i=0; i<dataLength; ++i) {
821 if(*data[i].s==0) {
822 continue; // skip empty string
823 }
824 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
825 UChar32 c=expectedString[0];
826 UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
827 UStringTrieResult firstResult=trie.first(c);
828 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
829 UStringTrieResult nextResult=trie.next(nextCp);
830 if(firstResult!=trie.reset().next(c) ||
831 firstResult!=trie.current() ||
832 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
833 nextResult!=trie.next(nextCp)
834 ) {
835 errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
836 c, data[i].s);
837 }
838 c=expectedString.char32At(0);
839 int32_t cLength=U16_LENGTH(c);
840 nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
841 firstResult=trie.firstForCodePoint(c);
842 firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
843 nextResult=trie.nextForCodePoint(nextCp);
844 if(firstResult!=trie.reset().nextForCodePoint(c) ||
845 firstResult!=trie.current() ||
846 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
847 nextResult!=trie.nextForCodePoint(nextCp)
848 ) {
849 errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
850 c, data[i].s);
851 }
852 }
853 trie.reset();
854 }
855
checkNext(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)856 void UCharsTrieTest::checkNext(UCharsTrie &trie,
857 const StringAndValue data[], int32_t dataLength) {
858 UCharsTrie::State state;
859 for(int32_t i=0; i<dataLength; ++i) {
860 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
861 int32_t stringLength= (i&1) ? -1 : expectedString.length();
862 UStringTrieResult result;
863 if( !USTRINGTRIE_HAS_VALUE(
864 result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
865 result!=trie.current()
866 ) {
867 errln("trie does not seem to contain %s", data[i].s);
868 } else if(trie.getValue()!=data[i].value) {
869 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
870 data[i].s,
871 (long)trie.getValue(), (long)trie.getValue(),
872 (long)data[i].value, (long)data[i].value);
873 } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
874 errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
875 }
876 trie.reset();
877 stringLength=expectedString.length();
878 result=trie.current();
879 for(int32_t j=0; j<stringLength; ++j) {
880 if(!USTRINGTRIE_HAS_NEXT(result)) {
881 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
882 break;
883 }
884 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
885 trie.getValue();
886 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
887 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
888 break;
889 }
890 }
891 result=trie.next(expectedString[j]);
892 if(!USTRINGTRIE_MATCHES(result)) {
893 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
894 break;
895 }
896 if(result!=trie.current()) {
897 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
898 break;
899 }
900 }
901 if(!USTRINGTRIE_HAS_VALUE(result)) {
902 errln("trie.next()!=hasValue at the end of %s", data[i].s);
903 continue;
904 }
905 trie.getValue();
906 if(result!=trie.current()) {
907 errln("trie.current() != current()+getValue()+current() after end of %s",
908 data[i].s);
909 }
910 // Compare the final current() with whether next() can actually continue.
911 trie.saveState(state);
912 UBool nextContinues=FALSE;
913 for(int32_t c=0x20; c<0xe000; ++c) {
914 if(c==0x80) {
915 c=0xd800; // Check for ASCII and surrogates but not all of the BMP.
916 }
917 if(trie.resetToState(state).next(c)) {
918 nextContinues=TRUE;
919 break;
920 }
921 }
922 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
923 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
924 "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
925 }
926 trie.reset();
927 }
928 }
929
checkNextWithState(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)930 void UCharsTrieTest::checkNextWithState(UCharsTrie &trie,
931 const StringAndValue data[], int32_t dataLength) {
932 UCharsTrie::State noState, state;
933 for(int32_t i=0; i<dataLength; ++i) {
934 if((i&1)==0) {
935 // This should have no effect.
936 trie.resetToState(noState);
937 }
938 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
939 int32_t stringLength=expectedString.length();
940 int32_t partialLength=stringLength/3;
941 for(int32_t j=0; j<partialLength; ++j) {
942 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
943 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
944 return;
945 }
946 }
947 trie.saveState(state);
948 UStringTrieResult resultAtState=trie.current();
949 UStringTrieResult result;
950 int32_t valueAtState=-99;
951 if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
952 valueAtState=trie.getValue();
953 }
954 result=trie.next(0); // mismatch
955 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
956 errln("trie.next(0) matched after part of %s", data[i].s);
957 }
958 if( resultAtState!=trie.resetToState(state).current() ||
959 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
960 ) {
961 errln("trie.next(part of %s) changes current()/getValue() after "
962 "saveState/next(0)/resetToState",
963 data[i].s);
964 } else if(!USTRINGTRIE_HAS_VALUE(
965 result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
966 stringLength-partialLength)) ||
967 result!=trie.current()) {
968 errln("trie.next(rest of %s) does not seem to contain %s after "
969 "saveState/next(0)/resetToState",
970 data[i].s, data[i].s);
971 } else if(!USTRINGTRIE_HAS_VALUE(
972 result=trie.resetToState(state).
973 next(expectedString.getTerminatedBuffer()+partialLength,
974 stringLength-partialLength)) ||
975 result!=trie.current()) {
976 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
977 data[i].s);
978 } else if(trie.getValue()!=data[i].value) {
979 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
980 data[i].s,
981 (long)trie.getValue(), (long)trie.getValue(),
982 (long)data[i].value, (long)data[i].value);
983 }
984 trie.reset();
985 }
986 }
987
988 // next(string) is also tested in other functions,
989 // but here we try to go partway through the string, and then beyond it.
checkNextString(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)990 void UCharsTrieTest::checkNextString(UCharsTrie &trie,
991 const StringAndValue data[], int32_t dataLength) {
992 for(int32_t i=0; i<dataLength; ++i) {
993 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
994 int32_t stringLength=expectedString.length();
995 if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
996 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
997 continue;
998 }
999 // Test that we stop properly at the end of the string.
1000 if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
1001 stringLength+1-stringLength/2)) {
1002 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
1003 }
1004 trie.reset();
1005 }
1006 }
1007
checkIterator(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)1008 void UCharsTrieTest::checkIterator(UCharsTrie &trie,
1009 const StringAndValue data[], int32_t dataLength) {
1010 IcuTestErrorCode errorCode(*this, "checkIterator()");
1011 UCharsTrie::Iterator iter(trie, 0, errorCode);
1012 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
1013 return;
1014 }
1015 checkIterator(iter, data, dataLength);
1016 }
1017
checkIterator(UCharsTrie::Iterator & iter,const StringAndValue data[],int32_t dataLength)1018 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter,
1019 const StringAndValue data[], int32_t dataLength) {
1020 IcuTestErrorCode errorCode(*this, "checkIterator()");
1021 for(int32_t i=0; i<dataLength; ++i) {
1022 if(!iter.hasNext()) {
1023 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
1024 break;
1025 }
1026 UBool hasNext=iter.next(errorCode);
1027 if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
1028 break;
1029 }
1030 if(!hasNext) {
1031 errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
1032 break;
1033 }
1034 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
1035 if(iter.getString()!=expectedString) {
1036 char buffer[1000];
1037 UnicodeString invString(prettify(iter.getString()));
1038 invString.extract(0, invString.length(), buffer, LENGTHOF(buffer), US_INV);
1039 errln("trie iterator next().getString()=%s but expected %s for item %d",
1040 buffer, data[i].s, (int)i);
1041 }
1042 if(iter.getValue()!=data[i].value) {
1043 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
1044 (long)iter.getValue(), (long)iter.getValue(),
1045 (long)data[i].value, (long)data[i].value,
1046 (int)i, data[i].s);
1047 }
1048 }
1049 if(iter.hasNext()) {
1050 errln("trie iterator hasNext()=TRUE after all items");
1051 }
1052 UBool hasNext=iter.next(errorCode);
1053 errorCode.logIfFailureAndReset("trie iterator next() after all items");
1054 if(hasNext) {
1055 errln("trie iterator next()=TRUE after all items");
1056 }
1057 }
1058