• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *   Copyright (C) 2010-2012, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   file name:  ucharstrietest.cpp
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 2010nov16
12 *   created by: Markus W. Scherer
13 */
14 
15 #include <string.h>
16 
17 #include "unicode/utypes.h"
18 #include "unicode/appendable.h"
19 #include "unicode/localpointer.h"
20 #include "unicode/ucharstrie.h"
21 #include "unicode/ucharstriebuilder.h"
22 #include "unicode/uniset.h"
23 #include "unicode/unistr.h"
24 #include "intltest.h"
25 
26 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
27 
28 struct StringAndValue {
29     const char *s;
30     int32_t value;
31 };
32 
33 class UCharsTrieTest : public IntlTest {
34 public:
35     UCharsTrieTest();
36     virtual ~UCharsTrieTest();
37 
38     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
39     void TestBuilder();
40     void TestEmpty();
41     void Test_a();
42     void Test_a_ab();
43     void TestShortestBranch();
44     void TestBranches();
45     void TestLongSequence();
46     void TestLongBranch();
47     void TestValuesForState();
48     void TestCompact();
49     void TestFirstForCodePoint();
50     void TestNextForCodePoint();
51 
52     UCharsTrie *buildLargeTrie(int32_t numUniqueFirst);
53     void TestLargeTrie();
54 
55     UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
56     void TestHasUniqueValue();
57     void TestGetNextUChars();
58     void TestIteratorFromBranch();
59     void TestIteratorFromLinearMatch();
60     void TestTruncatingIteratorFromRoot();
61     void TestTruncatingIteratorFromLinearMatchShort();
62     void TestTruncatingIteratorFromLinearMatchLong();
63     void TestIteratorFromUChars();
64 
65     void checkData(const StringAndValue data[], int32_t dataLength);
66     void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
67     UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
68                           UStringTrieBuildOption buildOption);
69     void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
70     void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
71     void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
72     void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
73     void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
74     void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
75 
76 private:
77     UCharsTrieBuilder *builder_;
78 };
79 
createUCharsTrieTest()80 extern IntlTest *createUCharsTrieTest() {
81     return new UCharsTrieTest();
82 }
83 
UCharsTrieTest()84 UCharsTrieTest::UCharsTrieTest() : builder_(NULL) {
85     IcuTestErrorCode errorCode(*this, "UCharsTrieTest()");
86     builder_=new UCharsTrieBuilder(errorCode);
87 }
88 
~UCharsTrieTest()89 UCharsTrieTest::~UCharsTrieTest() {
90     delete builder_;
91 }
92 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)93 void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
94     if(exec) {
95         logln("TestSuite UCharsTrieTest: ");
96     }
97     TESTCASE_AUTO_BEGIN;
98     TESTCASE_AUTO(TestBuilder);
99     TESTCASE_AUTO(TestEmpty);
100     TESTCASE_AUTO(Test_a);
101     TESTCASE_AUTO(Test_a_ab);
102     TESTCASE_AUTO(TestShortestBranch);
103     TESTCASE_AUTO(TestBranches);
104     TESTCASE_AUTO(TestLongSequence);
105     TESTCASE_AUTO(TestLongBranch);
106     TESTCASE_AUTO(TestValuesForState);
107     TESTCASE_AUTO(TestCompact);
108     TESTCASE_AUTO(TestFirstForCodePoint);
109     TESTCASE_AUTO(TestNextForCodePoint);
110     TESTCASE_AUTO(TestLargeTrie);
111     TESTCASE_AUTO(TestHasUniqueValue);
112     TESTCASE_AUTO(TestGetNextUChars);
113     TESTCASE_AUTO(TestIteratorFromBranch);
114     TESTCASE_AUTO(TestIteratorFromLinearMatch);
115     TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
116     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
117     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
118     TESTCASE_AUTO(TestIteratorFromUChars);
119     TESTCASE_AUTO_END;
120 }
121 
TestBuilder()122 void UCharsTrieTest::TestBuilder() {
123     IcuTestErrorCode errorCode(*this, "TestBuilder()");
124     delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
125     if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
126         errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
127         return;
128     }
129     // TODO: remove .build(...) once add() checks for duplicates.
130     builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
131     if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
132         errln("UCharsTrieBuilder.add() did not detect duplicates");
133         return;
134     }
135 }
136 
TestEmpty()137 void UCharsTrieTest::TestEmpty() {
138     static const StringAndValue data[]={
139         { "", 0 }
140     };
141     checkData(data, LENGTHOF(data));
142 }
143 
Test_a()144 void UCharsTrieTest::Test_a() {
145     static const StringAndValue data[]={
146         { "a", 1 }
147     };
148     checkData(data, LENGTHOF(data));
149 }
150 
Test_a_ab()151 void UCharsTrieTest::Test_a_ab() {
152     static const StringAndValue data[]={
153         { "a", 1 },
154         { "ab", 100 }
155     };
156     checkData(data, LENGTHOF(data));
157 }
158 
TestShortestBranch()159 void UCharsTrieTest::TestShortestBranch() {
160     static const StringAndValue data[]={
161         { "a", 1000 },
162         { "b", 2000 }
163     };
164     checkData(data, LENGTHOF(data));
165 }
166 
TestBranches()167 void UCharsTrieTest::TestBranches() {
168     static const StringAndValue data[]={
169         { "a", 0x10 },
170         { "cc", 0x40 },
171         { "e", 0x100 },
172         { "ggg", 0x400 },
173         { "i", 0x1000 },
174         { "kkkk", 0x4000 },
175         { "n", 0x10000 },
176         { "ppppp", 0x40000 },
177         { "r", 0x100000 },
178         { "sss", 0x200000 },
179         { "t", 0x400000 },
180         { "uu", 0x800000 },
181         { "vv", 0x7fffffff },
182         { "zz", (int32_t)0x80000000 }
183     };
184     for(int32_t length=2; length<=LENGTHOF(data); ++length) {
185         infoln("TestBranches length=%d", (int)length);
186         checkData(data, length);
187     }
188 }
189 
TestLongSequence()190 void UCharsTrieTest::TestLongSequence() {
191     static const StringAndValue data[]={
192         { "a", -1 },
193         // sequence of linear-match nodes
194         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
195         // more than 256 units
196         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
197           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
198           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
199           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
200           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
201           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
202     };
203     checkData(data, LENGTHOF(data));
204 }
205 
TestLongBranch()206 void UCharsTrieTest::TestLongBranch() {
207     // Split-branch and interesting compact-integer values.
208     static const StringAndValue data[]={
209         { "a", -2 },
210         { "b", -1 },
211         { "c", 0 },
212         { "d2", 1 },
213         { "f", 0x3f },
214         { "g", 0x40 },
215         { "h", 0x41 },
216         { "j23", 0x1900 },
217         { "j24", 0x19ff },
218         { "j25", 0x1a00 },
219         { "k2", 0x1a80 },
220         { "k3", 0x1aff },
221         { "l234567890", 0x1b00 },
222         { "l234567890123", 0x1b01 },
223         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
224         { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
225         { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
226         { "r", 0x333333 },
227         { "s2345", 0x4444444 },
228         { "t234567890", 0x77777777 },
229         { "z", (int32_t)0x80000001 }
230     };
231     checkData(data, LENGTHOF(data));
232 }
233 
TestValuesForState()234 void UCharsTrieTest::TestValuesForState() {
235     // Check that saveState() and resetToState() interact properly
236     // with next() and current().
237     static const StringAndValue data[]={
238         { "a", -1 },
239         { "ab", -2 },
240         { "abc", -3 },
241         { "abcd", -4 },
242         { "abcde", -5 },
243         { "abcdef", -6 }
244     };
245     checkData(data, LENGTHOF(data));
246 }
247 
TestCompact()248 void UCharsTrieTest::TestCompact() {
249     // Duplicate trailing strings and values provide opportunities for compacting.
250     static const StringAndValue data[]={
251         { "+", 0 },
252         { "+august", 8 },
253         { "+december", 12 },
254         { "+july", 7 },
255         { "+june", 6 },
256         { "+november", 11 },
257         { "+october", 10 },
258         { "+september", 9 },
259         { "-", 0 },
260         { "-august", 8 },
261         { "-december", 12 },
262         { "-july", 7 },
263         { "-june", 6 },
264         { "-november", 11 },
265         { "-october", 10 },
266         { "-september", 9 },
267         // The l+n branch (with its sub-nodes) is a duplicate but will be written
268         // both times because each time it follows a different linear-match node.
269         { "xjuly", 7 },
270         { "xjune", 6 }
271     };
272     checkData(data, LENGTHOF(data));
273 }
274 
TestFirstForCodePoint()275 void UCharsTrieTest::TestFirstForCodePoint() {
276     static const StringAndValue data[]={
277         { "a", 1 },
278         { "a\\ud800", 2 },
279         { "a\\U00010000", 3 },
280         { "\\ud840", 4 },
281         { "\\U00020000\\udbff", 5 },
282         { "\\U00020000\\U0010ffff", 6 },
283         { "\\U00020000\\U0010ffffz", 7 },
284         { "\\U00050000xy", 8 },
285         { "\\U00050000xyz", 9 }
286     };
287     checkData(data, LENGTHOF(data));
288 }
289 
TestNextForCodePoint()290 void UCharsTrieTest::TestNextForCodePoint() {
291     static const StringAndValue data[]={
292         { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
293         { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
294         { "\\u4dff\\U000103ff", 99999 }
295     };
296     LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
297     if(trie.isNull()) {
298         return;  // buildTrie() reported an error
299     }
300     UStringTrieResult result;
301     if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
302         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
303         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
304         (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
305         (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
306         (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
307         trie->getValue()!=2000000000
308     ) {
309         errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
310     }
311     if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
312         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
313         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
314         (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
315         trie->getValue()!=44444
316     ) {
317         errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
318     }
319     if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
320         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
321         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
322         (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current()  // no match for trail surrogate
323     ) {
324         errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
325     }
326     if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
327         (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
328         trie->getValue()!=99999
329     ) {
330         errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
331     }
332 }
333 
334 // Definitions in the anonymous namespace are invisible outside this file.
335 namespace {
336 
337 // Generate (string, value) pairs.
338 // The first string (before next()) will be empty.
339 class Generator {
340 public:
Generator()341     Generator() : value(4711), num(0) {}
next()342     void next() {
343         UChar c;
344         s.truncate(0);
345         s.append(c=(UChar)(value>>16));
346         s.append((UChar)(value>>4));
347         if(value&1) {
348             s.append((UChar)value);
349         }
350         set.add(c);
351         value+=((value>>5)&0x7ff)*3+1;
352         ++num;
353     }
getString() const354     const UnicodeString &getString() const { return s; }
getValue() const355     int32_t getValue() const { return value; }
countUniqueFirstChars() const356     int32_t countUniqueFirstChars() const { return set.size(); }
getIndex() const357     int32_t getIndex() const { return num; }
358 
359 private:
360     UnicodeString s;
361     UnicodeSet set;
362     int32_t value;
363     int32_t num;
364 };
365 
366 }  // end namespace
367 
buildLargeTrie(int32_t numUniqueFirst)368 UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) {
369     IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
370     Generator gen;
371     builder_->clear();
372     while(gen.countUniqueFirstChars()<numUniqueFirst) {
373         builder_->add(gen.getString(), gen.getValue(), errorCode);
374         gen.next();
375     }
376     infoln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
377     UnicodeString trieUChars;
378     builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
379     logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
380     return new UCharsTrie(trieUChars.getBuffer());
381 }
382 
383 // Exercise a large branch node.
TestLargeTrie()384 void UCharsTrieTest::TestLargeTrie() {
385     LocalPointer<UCharsTrie> trie(buildLargeTrie(1111));
386     if(trie.isNull()) {
387         return;  // buildTrie() reported an error
388     }
389     Generator gen;
390     while(gen.countUniqueFirstChars()<1111) {
391         UnicodeString x(gen.getString());
392         int32_t value=gen.getValue();
393         if(!x.isEmpty()) {
394             if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) {
395                 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
396                       x[0], (long)gen.getIndex());
397                 break;
398             }
399             x.remove(0, 1);
400         }
401         UStringTrieResult result=trie->next(x.getBuffer(), x.length());
402         if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) {
403             errln("next(%d chars U+%04X U+%04X)!=hasValue or "
404                   "next()!=current() or getValue() wrong "
405                   "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
406             break;
407         }
408         gen.next();
409     }
410 }
411 
412 enum {
413     u_a=0x61,
414     u_b=0x62,
415     u_c=0x63,
416     u_j=0x6a,
417     u_n=0x6e,
418     u_r=0x72,
419     u_u=0x75,
420     u_y=0x79
421 };
422 
buildMonthsTrie(UStringTrieBuildOption buildOption)423 UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
424     // All types of nodes leading to the same value,
425     // for code coverage of recursive functions.
426     // In particular, we need a lot of branches on some single level
427     // to exercise a split-branch node.
428     static const StringAndValue data[]={
429         { "august", 8 },
430         { "jan", 1 },
431         { "jan.", 1 },
432         { "jana", 1 },
433         { "janbb", 1 },
434         { "janc", 1 },
435         { "janddd", 1 },
436         { "janee", 1 },
437         { "janef", 1 },
438         { "janf", 1 },
439         { "jangg", 1 },
440         { "janh", 1 },
441         { "janiiii", 1 },
442         { "janj", 1 },
443         { "jankk", 1 },
444         { "jankl", 1 },
445         { "jankmm", 1 },
446         { "janl", 1 },
447         { "janm", 1 },
448         { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
449         { "jano", 1 },
450         { "janpp", 1 },
451         { "janqqq", 1 },
452         { "janr", 1 },
453         { "januar", 1 },
454         { "january", 1 },
455         { "july", 7 },
456         { "jun", 6 },
457         { "jun.", 6 },
458         { "june", 6 }
459     };
460     return buildTrie(data, LENGTHOF(data), buildOption);
461 }
462 
TestHasUniqueValue()463 void UCharsTrieTest::TestHasUniqueValue() {
464     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
465     if(trie.isNull()) {
466         return;  // buildTrie() reported an error
467     }
468     int32_t uniqueValue;
469     if(trie->hasUniqueValue(uniqueValue)) {
470         errln("unique value at root");
471     }
472     trie->next(u_j);
473     trie->next(u_a);
474     trie->next(u_n);
475     // hasUniqueValue() directly after next()
476     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
477         errln("not unique value 1 after \"jan\"");
478     }
479     trie->first(u_j);
480     trie->next(u_u);
481     if(trie->hasUniqueValue(uniqueValue)) {
482         errln("unique value after \"ju\"");
483     }
484     if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
485         errln("not normal value 6 after \"jun\"");
486     }
487     // hasUniqueValue() after getValue()
488     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
489         errln("not unique value 6 after \"jun\"");
490     }
491     // hasUniqueValue() from within a linear-match node
492     trie->first(u_a);
493     trie->next(u_u);
494     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
495         errln("not unique value 8 after \"au\"");
496     }
497 }
498 
TestGetNextUChars()499 void UCharsTrieTest::TestGetNextUChars() {
500     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
501     if(trie.isNull()) {
502         return;  // buildTrie() reported an error
503     }
504     UnicodeString buffer;
505     UnicodeStringAppendable app(buffer);
506     int32_t count=trie->getNextUChars(app);
507     if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) {
508         errln("months getNextUChars()!=[aj] at root");
509     }
510     trie->next(u_j);
511     trie->next(u_a);
512     trie->next(u_n);
513     // getNextUChars() directly after next()
514     buffer.remove();
515     count=trie->getNextUChars(app);
516     if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
517         errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
518     }
519     // getNextUChars() after getValue()
520     trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
521     buffer.remove();
522     count=trie->getNextUChars(app);
523     if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
524         errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
525     }
526     // getNextUChars() from a linear-match node
527     trie->next(u_u);
528     buffer.remove();
529     count=trie->getNextUChars(app);
530     if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) {
531         errln("months getNextUChars()!=[a] after \"janu\"");
532     }
533     trie->next(u_a);
534     buffer.remove();
535     count=trie->getNextUChars(app);
536     if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) {
537         errln("months getNextUChars()!=[r] after \"janua\"");
538     }
539     trie->next(u_r);
540     trie->next(u_y);
541     // getNextUChars() after a final match
542     buffer.remove();
543     count=trie->getNextUChars(app);
544     if(count!=0 || buffer.length()!=0) {
545         errln("months getNextUChars()!=[] after \"january\"");
546     }
547 }
548 
TestIteratorFromBranch()549 void UCharsTrieTest::TestIteratorFromBranch() {
550     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
551     if(trie.isNull()) {
552         return;  // buildTrie() reported an error
553     }
554     // Go to a branch node.
555     trie->next(u_j);
556     trie->next(u_a);
557     trie->next(u_n);
558     IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
559     UCharsTrie::Iterator iter(*trie, 0, errorCode);
560     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
561         return;
562     }
563     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
564     // following "jan".
565     static const StringAndValue data[]={
566         { "", 1 },
567         { ".", 1 },
568         { "a", 1 },
569         { "bb", 1 },
570         { "c", 1 },
571         { "ddd", 1 },
572         { "ee", 1 },
573         { "ef", 1 },
574         { "f", 1 },
575         { "gg", 1 },
576         { "h", 1 },
577         { "iiii", 1 },
578         { "j", 1 },
579         { "kk", 1 },
580         { "kl", 1 },
581         { "kmm", 1 },
582         { "l", 1 },
583         { "m", 1 },
584         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
585         { "o", 1 },
586         { "pp", 1 },
587         { "qqq", 1 },
588         { "r", 1 },
589         { "uar", 1 },
590         { "uary", 1 }
591     };
592     checkIterator(iter, data, LENGTHOF(data));
593     // Reset, and we should get the same result.
594     logln("after iter.reset()");
595     checkIterator(iter.reset(), data, LENGTHOF(data));
596 }
597 
TestIteratorFromLinearMatch()598 void UCharsTrieTest::TestIteratorFromLinearMatch() {
599     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
600     if(trie.isNull()) {
601         return;  // buildTrie() reported an error
602     }
603     // Go into a linear-match node.
604     trie->next(u_j);
605     trie->next(u_a);
606     trie->next(u_n);
607     trie->next(u_u);
608     trie->next(u_a);
609     IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
610     UCharsTrie::Iterator iter(*trie, 0, errorCode);
611     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
612         return;
613     }
614     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
615     // following "janua".
616     static const StringAndValue data[]={
617         { "r", 1 },
618         { "ry", 1 }
619     };
620     checkIterator(iter, data, LENGTHOF(data));
621     // Reset, and we should get the same result.
622     logln("after iter.reset()");
623     checkIterator(iter.reset(), data, LENGTHOF(data));
624 }
625 
TestTruncatingIteratorFromRoot()626 void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
627     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
628     if(trie.isNull()) {
629         return;  // buildTrie() reported an error
630     }
631     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
632     UCharsTrie::Iterator iter(*trie, 4, errorCode);
633     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
634         return;
635     }
636     // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
637     // of each string, and no string duplicates from the truncation.
638     static const StringAndValue data[]={
639         { "augu", -1 },
640         { "jan", 1 },
641         { "jan.", 1 },
642         { "jana", 1 },
643         { "janb", -1 },
644         { "janc", 1 },
645         { "jand", -1 },
646         { "jane", -1 },
647         { "janf", 1 },
648         { "jang", -1 },
649         { "janh", 1 },
650         { "jani", -1 },
651         { "janj", 1 },
652         { "jank", -1 },
653         { "janl", 1 },
654         { "janm", 1 },
655         { "jann", -1 },
656         { "jano", 1 },
657         { "janp", -1 },
658         { "janq", -1 },
659         { "janr", 1 },
660         { "janu", -1 },
661         { "july", 7 },
662         { "jun", 6 },
663         { "jun.", 6 },
664         { "june", 6 }
665     };
666     checkIterator(iter, data, LENGTHOF(data));
667     // Reset, and we should get the same result.
668     logln("after iter.reset()");
669     checkIterator(iter.reset(), data, LENGTHOF(data));
670 }
671 
TestTruncatingIteratorFromLinearMatchShort()672 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
673     static const StringAndValue data[]={
674         { "abcdef", 10 },
675         { "abcdepq", 200 },
676         { "abcdeyz", 3000 }
677     };
678     LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
679     if(trie.isNull()) {
680         return;  // buildTrie() reported an error
681     }
682     // Go into a linear-match node.
683     trie->next(u_a);
684     trie->next(u_b);
685     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
686     // Truncate within the linear-match node.
687     UCharsTrie::Iterator iter(*trie, 2, errorCode);
688     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
689         return;
690     }
691     static const StringAndValue expected[]={
692         { "cd", -1 }
693     };
694     checkIterator(iter, expected, LENGTHOF(expected));
695     // Reset, and we should get the same result.
696     logln("after iter.reset()");
697     checkIterator(iter.reset(), expected, LENGTHOF(expected));
698 }
699 
TestTruncatingIteratorFromLinearMatchLong()700 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
701     static const StringAndValue data[]={
702         { "abcdef", 10 },
703         { "abcdepq", 200 },
704         { "abcdeyz", 3000 }
705     };
706     LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
707     if(trie.isNull()) {
708         return;  // buildTrie() reported an error
709     }
710     // Go into a linear-match node.
711     trie->next(u_a);
712     trie->next(u_b);
713     trie->next(u_c);
714     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
715     // Truncate after the linear-match node.
716     UCharsTrie::Iterator iter(*trie, 3, errorCode);
717     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
718         return;
719     }
720     static const StringAndValue expected[]={
721         { "def", 10 },
722         { "dep", -1 },
723         { "dey", -1 }
724     };
725     checkIterator(iter, expected, LENGTHOF(expected));
726     // Reset, and we should get the same result.
727     logln("after iter.reset()");
728     checkIterator(iter.reset(), expected, LENGTHOF(expected));
729 }
730 
TestIteratorFromUChars()731 void UCharsTrieTest::TestIteratorFromUChars() {
732     static const StringAndValue data[]={
733         { "mm", 3 },
734         { "mmm", 33 },
735         { "mmnop", 333 }
736     };
737     builder_->clear();
738     IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()");
739     for(int32_t i=0; i<LENGTHOF(data); ++i) {
740         builder_->add(data[i].s, data[i].value, errorCode);
741     }
742     UnicodeString trieUChars;
743     builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
744     UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
745     checkIterator(iter, data, LENGTHOF(data));
746 }
747 
checkData(const StringAndValue data[],int32_t dataLength)748 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
749     logln("checkData(dataLength=%d, fast)", (int)dataLength);
750     checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
751     logln("checkData(dataLength=%d, small)", (int)dataLength);
752     checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
753 }
754 
checkData(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)755 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
756     LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption));
757     if(trie.isNull()) {
758         return;  // buildTrie() reported an error
759     }
760     checkFirst(*trie, data, dataLength);
761     checkNext(*trie, data, dataLength);
762     checkNextWithState(*trie, data, dataLength);
763     checkNextString(*trie, data, dataLength);
764     checkIterator(*trie, data, dataLength);
765 }
766 
buildTrie(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)767 UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
768                                       UStringTrieBuildOption buildOption) {
769     IcuTestErrorCode errorCode(*this, "buildTrie()");
770     // Add the items to the trie builder in an interesting (not trivial, not random) order.
771     int32_t index, step;
772     if(dataLength&1) {
773         // Odd number of items.
774         index=dataLength/2;
775         step=2;
776     } else if((dataLength%3)!=0) {
777         // Not a multiple of 3.
778         index=dataLength/5;
779         step=3;
780     } else {
781         index=dataLength-1;
782         step=-1;
783     }
784     builder_->clear();
785     for(int32_t i=0; i<dataLength; ++i) {
786         builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(),
787                       data[index].value, errorCode);
788         index=(index+step)%dataLength;
789     }
790     UnicodeString trieUChars;
791     builder_->buildUnicodeString(buildOption, trieUChars, errorCode);
792     LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode));
793     if(!errorCode.logIfFailureAndReset("add()/build()")) {
794         builder_->add("zzz", 999, errorCode);
795         if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
796             errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
797         }
798     }
799     logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
800     UnicodeString trieUChars2;
801     builder_->buildUnicodeString(buildOption, trieUChars2, errorCode);
802     if(trieUChars.getBuffer()==trieUChars2.getBuffer()) {
803         errln("builder.buildUnicodeString() before & after build() returned same array");
804     }
805     if(errorCode.isFailure()) {
806         return NULL;
807     }
808     // Tries from either build() method should be identical but
809     // UCharsTrie does not implement equals().
810     // We just return either one.
811     if((dataLength&1)!=0) {
812         return trie.orphan();
813     } else {
814         return new UCharsTrie(trieUChars2.getBuffer());
815     }
816 }
817 
checkFirst(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)818 void UCharsTrieTest::checkFirst(UCharsTrie &trie,
819                                 const StringAndValue data[], int32_t dataLength) {
820     for(int32_t i=0; i<dataLength; ++i) {
821         if(*data[i].s==0) {
822             continue;  // skip empty string
823         }
824         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
825         UChar32 c=expectedString[0];
826         UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
827         UStringTrieResult firstResult=trie.first(c);
828         int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
829         UStringTrieResult nextResult=trie.next(nextCp);
830         if(firstResult!=trie.reset().next(c) ||
831            firstResult!=trie.current() ||
832            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
833            nextResult!=trie.next(nextCp)
834         ) {
835             errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
836                   c, data[i].s);
837         }
838         c=expectedString.char32At(0);
839         int32_t cLength=U16_LENGTH(c);
840         nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
841         firstResult=trie.firstForCodePoint(c);
842         firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
843         nextResult=trie.nextForCodePoint(nextCp);
844         if(firstResult!=trie.reset().nextForCodePoint(c) ||
845            firstResult!=trie.current() ||
846            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
847            nextResult!=trie.nextForCodePoint(nextCp)
848         ) {
849             errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
850                   c, data[i].s);
851         }
852     }
853     trie.reset();
854 }
855 
checkNext(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)856 void UCharsTrieTest::checkNext(UCharsTrie &trie,
857                                const StringAndValue data[], int32_t dataLength) {
858     UCharsTrie::State state;
859     for(int32_t i=0; i<dataLength; ++i) {
860         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
861         int32_t stringLength= (i&1) ? -1 : expectedString.length();
862         UStringTrieResult result;
863         if( !USTRINGTRIE_HAS_VALUE(
864                 result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
865             result!=trie.current()
866         ) {
867             errln("trie does not seem to contain %s", data[i].s);
868         } else if(trie.getValue()!=data[i].value) {
869             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
870                   data[i].s,
871                   (long)trie.getValue(), (long)trie.getValue(),
872                   (long)data[i].value, (long)data[i].value);
873         } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
874             errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
875         }
876         trie.reset();
877         stringLength=expectedString.length();
878         result=trie.current();
879         for(int32_t j=0; j<stringLength; ++j) {
880             if(!USTRINGTRIE_HAS_NEXT(result)) {
881                 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
882                 break;
883             }
884             if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
885                 trie.getValue();
886                 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
887                     errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
888                     break;
889                 }
890             }
891             result=trie.next(expectedString[j]);
892             if(!USTRINGTRIE_MATCHES(result)) {
893                 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
894                 break;
895             }
896             if(result!=trie.current()) {
897                 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
898                 break;
899             }
900         }
901         if(!USTRINGTRIE_HAS_VALUE(result)) {
902             errln("trie.next()!=hasValue at the end of %s", data[i].s);
903             continue;
904         }
905         trie.getValue();
906         if(result!=trie.current()) {
907             errln("trie.current() != current()+getValue()+current() after end of %s",
908                   data[i].s);
909         }
910         // Compare the final current() with whether next() can actually continue.
911         trie.saveState(state);
912         UBool nextContinues=FALSE;
913         for(int32_t c=0x20; c<0xe000; ++c) {
914             if(c==0x80) {
915                 c=0xd800;  // Check for ASCII and surrogates but not all of the BMP.
916             }
917             if(trie.resetToState(state).next(c)) {
918                 nextContinues=TRUE;
919                 break;
920             }
921         }
922         if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
923             errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
924                   "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
925         }
926         trie.reset();
927     }
928 }
929 
checkNextWithState(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)930 void UCharsTrieTest::checkNextWithState(UCharsTrie &trie,
931                                         const StringAndValue data[], int32_t dataLength) {
932     UCharsTrie::State noState, state;
933     for(int32_t i=0; i<dataLength; ++i) {
934         if((i&1)==0) {
935             // This should have no effect.
936             trie.resetToState(noState);
937         }
938         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
939         int32_t stringLength=expectedString.length();
940         int32_t partialLength=stringLength/3;
941         for(int32_t j=0; j<partialLength; ++j) {
942             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
943                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
944                 return;
945             }
946         }
947         trie.saveState(state);
948         UStringTrieResult resultAtState=trie.current();
949         UStringTrieResult result;
950         int32_t valueAtState=-99;
951         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
952             valueAtState=trie.getValue();
953         }
954         result=trie.next(0);  // mismatch
955         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
956             errln("trie.next(0) matched after part of %s", data[i].s);
957         }
958         if( resultAtState!=trie.resetToState(state).current() ||
959             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
960         ) {
961             errln("trie.next(part of %s) changes current()/getValue() after "
962                   "saveState/next(0)/resetToState",
963                   data[i].s);
964         } else if(!USTRINGTRIE_HAS_VALUE(
965                       result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
966                                        stringLength-partialLength)) ||
967                   result!=trie.current()) {
968             errln("trie.next(rest of %s) does not seem to contain %s after "
969                   "saveState/next(0)/resetToState",
970                   data[i].s, data[i].s);
971         } else if(!USTRINGTRIE_HAS_VALUE(
972                       result=trie.resetToState(state).
973                                   next(expectedString.getTerminatedBuffer()+partialLength,
974                                        stringLength-partialLength)) ||
975                   result!=trie.current()) {
976             errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
977                   data[i].s);
978         } else if(trie.getValue()!=data[i].value) {
979             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
980                   data[i].s,
981                   (long)trie.getValue(), (long)trie.getValue(),
982                   (long)data[i].value, (long)data[i].value);
983         }
984         trie.reset();
985     }
986 }
987 
988 // next(string) is also tested in other functions,
989 // but here we try to go partway through the string, and then beyond it.
checkNextString(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)990 void UCharsTrieTest::checkNextString(UCharsTrie &trie,
991                                      const StringAndValue data[], int32_t dataLength) {
992     for(int32_t i=0; i<dataLength; ++i) {
993         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
994         int32_t stringLength=expectedString.length();
995         if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
996             errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
997             continue;
998         }
999         // Test that we stop properly at the end of the string.
1000         if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
1001                      stringLength+1-stringLength/2)) {
1002             errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
1003         }
1004         trie.reset();
1005     }
1006 }
1007 
checkIterator(UCharsTrie & trie,const StringAndValue data[],int32_t dataLength)1008 void UCharsTrieTest::checkIterator(UCharsTrie &trie,
1009                                    const StringAndValue data[], int32_t dataLength) {
1010     IcuTestErrorCode errorCode(*this, "checkIterator()");
1011     UCharsTrie::Iterator iter(trie, 0, errorCode);
1012     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
1013         return;
1014     }
1015     checkIterator(iter, data, dataLength);
1016 }
1017 
checkIterator(UCharsTrie::Iterator & iter,const StringAndValue data[],int32_t dataLength)1018 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter,
1019                                    const StringAndValue data[], int32_t dataLength) {
1020     IcuTestErrorCode errorCode(*this, "checkIterator()");
1021     for(int32_t i=0; i<dataLength; ++i) {
1022         if(!iter.hasNext()) {
1023             errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
1024             break;
1025         }
1026         UBool hasNext=iter.next(errorCode);
1027         if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
1028             break;
1029         }
1030         if(!hasNext) {
1031             errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
1032             break;
1033         }
1034         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
1035         if(iter.getString()!=expectedString) {
1036             char buffer[1000];
1037             UnicodeString invString(prettify(iter.getString()));
1038             invString.extract(0, invString.length(), buffer, LENGTHOF(buffer), US_INV);
1039             errln("trie iterator next().getString()=%s but expected %s for item %d",
1040                   buffer, data[i].s, (int)i);
1041         }
1042         if(iter.getValue()!=data[i].value) {
1043             errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
1044                   (long)iter.getValue(), (long)iter.getValue(),
1045                   (long)data[i].value, (long)data[i].value,
1046                   (int)i, data[i].s);
1047         }
1048     }
1049     if(iter.hasNext()) {
1050         errln("trie iterator hasNext()=TRUE after all items");
1051     }
1052     UBool hasNext=iter.next(errorCode);
1053     errorCode.logIfFailureAndReset("trie iterator next() after all items");
1054     if(hasNext) {
1055         errln("trie iterator next()=TRUE after all items");
1056     }
1057 }
1058