• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2010-2014, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   file name:  bytetrietest.cpp
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2010nov16
14 *   created by: Markus W. Scherer
15 */
16 
17 #include <string.h>
18 
19 #include "unicode/utypes.h"
20 #include "unicode/bytestrie.h"
21 #include "unicode/bytestriebuilder.h"
22 #include "unicode/localpointer.h"
23 #include "unicode/stringpiece.h"
24 #include "intltest.h"
25 #include "cmemory.h"
26 
27 struct StringAndValue {
28     const char *s;
29     int32_t value;
30 };
31 
32 class BytesTrieTest : public IntlTest {
33 public:
34     BytesTrieTest();
35     virtual ~BytesTrieTest();
36 
37     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
38     void TestBuilder();
39     void TestEmpty();
40     void Test_a();
41     void Test_a_ab();
42     void TestShortestBranch();
43     void TestBranches();
44     void TestLongSequence();
45     void TestLongBranch();
46     void TestValuesForState();
47     void TestCompact();
48 
49     BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
50     void TestHasUniqueValue();
51     void TestGetNextBytes();
52     void TestIteratorFromBranch();
53     void TestIteratorFromLinearMatch();
54     void TestTruncatingIteratorFromRoot();
55     void TestTruncatingIteratorFromLinearMatchShort();
56     void TestTruncatingIteratorFromLinearMatchLong();
57     void TestIteratorFromBytes();
58     void TestFailedIterator();
59 
60     void checkData(const StringAndValue data[], int32_t dataLength);
61     void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
62     BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
63                          UStringTrieBuildOption buildOption);
64     void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
65     void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
66     void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
67     void checkNextWithState64(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
68     void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
69     void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
70     void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
71 
72 private:
73     BytesTrieBuilder *builder_;
74 };
75 
createBytesTrieTest()76 extern IntlTest *createBytesTrieTest() {
77     return new BytesTrieTest();
78 }
79 
BytesTrieTest()80 BytesTrieTest::BytesTrieTest() : builder_(NULL) {
81     IcuTestErrorCode errorCode(*this, "BytesTrieTest()");
82     builder_=new BytesTrieBuilder(errorCode);
83 }
84 
~BytesTrieTest()85 BytesTrieTest::~BytesTrieTest() {
86     delete builder_;
87 }
88 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)89 void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
90     if(exec) {
91         logln("TestSuite BytesTrieTest: ");
92     }
93     TESTCASE_AUTO_BEGIN;
94     TESTCASE_AUTO(TestBuilder);
95     TESTCASE_AUTO(TestEmpty);
96     TESTCASE_AUTO(Test_a);
97     TESTCASE_AUTO(Test_a_ab);
98     TESTCASE_AUTO(TestShortestBranch);
99     TESTCASE_AUTO(TestBranches);
100     TESTCASE_AUTO(TestLongSequence);
101     TESTCASE_AUTO(TestLongBranch);
102     TESTCASE_AUTO(TestValuesForState);
103     TESTCASE_AUTO(TestCompact);
104     TESTCASE_AUTO(TestHasUniqueValue);
105     TESTCASE_AUTO(TestGetNextBytes);
106     TESTCASE_AUTO(TestIteratorFromBranch);
107     TESTCASE_AUTO(TestIteratorFromLinearMatch);
108     TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
109     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
110     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
111     TESTCASE_AUTO(TestIteratorFromBytes);
112     TESTCASE_AUTO(TestFailedIterator);
113     TESTCASE_AUTO_END;
114 }
115 
TestBuilder()116 void BytesTrieTest::TestBuilder() {
117     IcuTestErrorCode errorCode(*this, "TestBuilder()");
118     builder_->clear();
119     delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
120     if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
121         errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
122         return;
123     }
124     // TODO: remove .build(...) once add() checks for duplicates.
125     builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
126     if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
127         errln("BytesTrieBuilder.add() did not detect duplicates");
128         return;
129     }
130 }
131 
TestEmpty()132 void BytesTrieTest::TestEmpty() {
133     static const StringAndValue data[]={
134         { "", 0 }
135     };
136     checkData(data, UPRV_LENGTHOF(data));
137 }
138 
Test_a()139 void BytesTrieTest::Test_a() {
140     static const StringAndValue data[]={
141         { "a", 1 }
142     };
143     checkData(data, UPRV_LENGTHOF(data));
144 }
145 
Test_a_ab()146 void BytesTrieTest::Test_a_ab() {
147     static const StringAndValue data[]={
148         { "a", 1 },
149         { "ab", 100 }
150     };
151     checkData(data, UPRV_LENGTHOF(data));
152 }
153 
TestShortestBranch()154 void BytesTrieTest::TestShortestBranch() {
155     static const StringAndValue data[]={
156         { "a", 1000 },
157         { "b", 2000 }
158     };
159     checkData(data, UPRV_LENGTHOF(data));
160 }
161 
TestBranches()162 void BytesTrieTest::TestBranches() {
163     static const StringAndValue data[]={
164         { "a", 0x10 },
165         { "cc", 0x40 },
166         { "e", 0x100 },
167         { "ggg", 0x400 },
168         { "i", 0x1000 },
169         { "kkkk", 0x4000 },
170         { "n", 0x10000 },
171         { "ppppp", 0x40000 },
172         { "r", 0x100000 },
173         { "sss", 0x200000 },
174         { "t", 0x400000 },
175         { "uu", 0x800000 },
176         { "vv", 0x7fffffff },
177         { "zz", (int32_t)0x80000000 }
178     };
179     for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
180         logln("TestBranches length=%d", (int)length);
181         checkData(data, length);
182     }
183 }
184 
TestLongSequence()185 void BytesTrieTest::TestLongSequence() {
186     static const StringAndValue data[]={
187         { "a", -1 },
188         // sequence of linear-match nodes
189         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
190         // more than 256 bytes
191         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
192           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
193           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
194           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
195           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
196           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
197     };
198     checkData(data, UPRV_LENGTHOF(data));
199 }
200 
TestLongBranch()201 void BytesTrieTest::TestLongBranch() {
202     // Split-branch and interesting compact-integer values.
203     static const StringAndValue data[]={
204         { "a", -2 },
205         { "b", -1 },
206         { "c", 0 },
207         { "d2", 1 },
208         { "f", 0x3f },
209         { "g", 0x40 },
210         { "h", 0x41 },
211         { "j23", 0x1900 },
212         { "j24", 0x19ff },
213         { "j25", 0x1a00 },
214         { "k2", 0x1a80 },
215         { "k3", 0x1aff },
216         { "l234567890", 0x1b00 },
217         { "l234567890123", 0x1b01 },
218         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
219         { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
220         { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
221         { "r", 0x333333 },
222         { "s2345", 0x4444444 },
223         { "t234567890", 0x77777777 },
224         { "z", (int32_t)0x80000001 }
225     };
226     checkData(data, UPRV_LENGTHOF(data));
227 }
228 
TestValuesForState()229 void BytesTrieTest::TestValuesForState() {
230     // Check that saveState() and resetToState() interact properly
231     // with next() and current().
232     static const StringAndValue data[]={
233         { "a", -1 },
234         { "ab", -2 },
235         { "abc", -3 },
236         { "abcd", -4 },
237         { "abcde", -5 },
238         { "abcdef", -6 }
239     };
240     checkData(data, UPRV_LENGTHOF(data));
241 }
242 
TestCompact()243 void BytesTrieTest::TestCompact() {
244     // Duplicate trailing strings and values provide opportunities for compacting.
245     static const StringAndValue data[]={
246         { "+", 0 },
247         { "+august", 8 },
248         { "+december", 12 },
249         { "+july", 7 },
250         { "+june", 6 },
251         { "+november", 11 },
252         { "+october", 10 },
253         { "+september", 9 },
254         { "-", 0 },
255         { "-august", 8 },
256         { "-december", 12 },
257         { "-july", 7 },
258         { "-june", 6 },
259         { "-november", 11 },
260         { "-october", 10 },
261         { "-september", 9 },
262         // The l+n branch (with its sub-nodes) is a duplicate but will be written
263         // both times because each time it follows a different linear-match node.
264         { "xjuly", 7 },
265         { "xjune", 6 }
266     };
267     checkData(data, UPRV_LENGTHOF(data));
268 }
269 
buildMonthsTrie(UStringTrieBuildOption buildOption)270 BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
271     // All types of nodes leading to the same value,
272     // for code coverage of recursive functions.
273     // In particular, we need a lot of branches on some single level
274     // to exercise a split-branch node.
275     static const StringAndValue data[]={
276         { "august", 8 },
277         { "jan", 1 },
278         { "jan.", 1 },
279         { "jana", 1 },
280         { "janbb", 1 },
281         { "janc", 1 },
282         { "janddd", 1 },
283         { "janee", 1 },
284         { "janef", 1 },
285         { "janf", 1 },
286         { "jangg", 1 },
287         { "janh", 1 },
288         { "janiiii", 1 },
289         { "janj", 1 },
290         { "jankk", 1 },
291         { "jankl", 1 },
292         { "jankmm", 1 },
293         { "janl", 1 },
294         { "janm", 1 },
295         { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
296         { "jano", 1 },
297         { "janpp", 1 },
298         { "janqqq", 1 },
299         { "janr", 1 },
300         { "januar", 1 },
301         { "january", 1 },
302         { "july", 7 },
303         { "jun", 6 },
304         { "jun.", 6 },
305         { "june", 6 }
306     };
307     return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
308 }
309 
TestHasUniqueValue()310 void BytesTrieTest::TestHasUniqueValue() {
311     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
312     if(trie.isNull()) {
313         return;  // buildTrie() reported an error
314     }
315     int32_t uniqueValue;
316     if(trie->hasUniqueValue(uniqueValue)) {
317         errln("unique value at root");
318     }
319     trie->next('j');
320     trie->next('a');
321     trie->next('n');
322     // hasUniqueValue() directly after next()
323     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
324         errln("not unique value 1 after \"jan\"");
325     }
326     trie->first('j');
327     trie->next('u');
328     if(trie->hasUniqueValue(uniqueValue)) {
329         errln("unique value after \"ju\"");
330     }
331     if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
332         errln("not normal value 6 after \"jun\"");
333     }
334     // hasUniqueValue() after getValue()
335     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
336         errln("not unique value 6 after \"jun\"");
337     }
338     // hasUniqueValue() from within a linear-match node
339     trie->first('a');
340     trie->next('u');
341     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
342         errln("not unique value 8 after \"au\"");
343     }
344 }
345 
TestGetNextBytes()346 void BytesTrieTest::TestGetNextBytes() {
347     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
348     if(trie.isNull()) {
349         return;  // buildTrie() reported an error
350     }
351     char buffer[40];
352     CheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer));
353     int32_t count=trie->getNextBytes(sink);
354     if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
355         errln("months getNextBytes()!=[aj] at root");
356     }
357     trie->next('j');
358     trie->next('a');
359     trie->next('n');
360     // getNextBytes() directly after next()
361     count=trie->getNextBytes(sink.Reset());
362     buffer[count]=0;
363     if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
364         errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
365     }
366     // getNextBytes() after getValue()
367     trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
368     memset(buffer, 0, sizeof(buffer));
369     count=trie->getNextBytes(sink.Reset());
370     if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
371         errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
372     }
373     // getNextBytes() from a linear-match node
374     trie->next('u');
375     memset(buffer, 0, sizeof(buffer));
376     count=trie->getNextBytes(sink.Reset());
377     if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
378         errln("months getNextBytes()!=[a] after \"janu\"");
379     }
380     trie->next('a');
381     memset(buffer, 0, sizeof(buffer));
382     count=trie->getNextBytes(sink.Reset());
383     if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
384         errln("months getNextBytes()!=[r] after \"janua\"");
385     }
386     trie->next('r');
387     trie->next('y');
388     // getNextBytes() after a final match
389     count=trie->getNextBytes(sink.Reset());
390     if(count!=0 || sink.NumberOfBytesAppended()!=0) {
391         errln("months getNextBytes()!=[] after \"january\"");
392     }
393 }
394 
TestIteratorFromBranch()395 void BytesTrieTest::TestIteratorFromBranch() {
396     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
397     if(trie.isNull()) {
398         return;  // buildTrie() reported an error
399     }
400     // Go to a branch node.
401     trie->next('j');
402     trie->next('a');
403     trie->next('n');
404     IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
405     BytesTrie::Iterator iter(*trie, 0, errorCode);
406     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
407         return;
408     }
409     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
410     // following "jan".
411     static const StringAndValue data[]={
412         { "", 1 },
413         { ".", 1 },
414         { "a", 1 },
415         { "bb", 1 },
416         { "c", 1 },
417         { "ddd", 1 },
418         { "ee", 1 },
419         { "ef", 1 },
420         { "f", 1 },
421         { "gg", 1 },
422         { "h", 1 },
423         { "iiii", 1 },
424         { "j", 1 },
425         { "kk", 1 },
426         { "kl", 1 },
427         { "kmm", 1 },
428         { "l", 1 },
429         { "m", 1 },
430         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
431         { "o", 1 },
432         { "pp", 1 },
433         { "qqq", 1 },
434         { "r", 1 },
435         { "uar", 1 },
436         { "uary", 1 }
437     };
438     checkIterator(iter, data, UPRV_LENGTHOF(data));
439     // Reset, and we should get the same result.
440     logln("after iter.reset()");
441     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
442 }
443 
TestIteratorFromLinearMatch()444 void BytesTrieTest::TestIteratorFromLinearMatch() {
445     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
446     if(trie.isNull()) {
447         return;  // buildTrie() reported an error
448     }
449     // Go into a linear-match node.
450     trie->next('j');
451     trie->next('a');
452     trie->next('n');
453     trie->next('u');
454     trie->next('a');
455     IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
456     BytesTrie::Iterator iter(*trie, 0, errorCode);
457     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
458         return;
459     }
460     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
461     // following "janua".
462     static const StringAndValue data[]={
463         { "r", 1 },
464         { "ry", 1 }
465     };
466     checkIterator(iter, data, UPRV_LENGTHOF(data));
467     // Reset, and we should get the same result.
468     logln("after iter.reset()");
469     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
470 }
471 
TestTruncatingIteratorFromRoot()472 void BytesTrieTest::TestTruncatingIteratorFromRoot() {
473     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
474     if(trie.isNull()) {
475         return;  // buildTrie() reported an error
476     }
477     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
478     BytesTrie::Iterator iter(*trie, 4, errorCode);
479     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
480         return;
481     }
482     // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
483     // of each string, and no string duplicates from the truncation.
484     static const StringAndValue data[]={
485         { "augu", -1 },
486         { "jan", 1 },
487         { "jan.", 1 },
488         { "jana", 1 },
489         { "janb", -1 },
490         { "janc", 1 },
491         { "jand", -1 },
492         { "jane", -1 },
493         { "janf", 1 },
494         { "jang", -1 },
495         { "janh", 1 },
496         { "jani", -1 },
497         { "janj", 1 },
498         { "jank", -1 },
499         { "janl", 1 },
500         { "janm", 1 },
501         { "jann", -1 },
502         { "jano", 1 },
503         { "janp", -1 },
504         { "janq", -1 },
505         { "janr", 1 },
506         { "janu", -1 },
507         { "july", 7 },
508         { "jun", 6 },
509         { "jun.", 6 },
510         { "june", 6 }
511     };
512     checkIterator(iter, data, UPRV_LENGTHOF(data));
513     // Reset, and we should get the same result.
514     logln("after iter.reset()");
515     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
516 }
517 
TestTruncatingIteratorFromLinearMatchShort()518 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
519     static const StringAndValue data[]={
520         { "abcdef", 10 },
521         { "abcdepq", 200 },
522         { "abcdeyz", 3000 }
523     };
524     LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
525     if(trie.isNull()) {
526         return;  // buildTrie() reported an error
527     }
528     // Go into a linear-match node.
529     trie->next('a');
530     trie->next('b');
531     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
532     // Truncate within the linear-match node.
533     BytesTrie::Iterator iter(*trie, 2, errorCode);
534     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
535         return;
536     }
537     static const StringAndValue expected[]={
538         { "cd", -1 }
539     };
540     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
541     // Reset, and we should get the same result.
542     logln("after iter.reset()");
543     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
544 }
545 
TestTruncatingIteratorFromLinearMatchLong()546 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
547     static const StringAndValue data[]={
548         { "abcdef", 10 },
549         { "abcdepq", 200 },
550         { "abcdeyz", 3000 }
551     };
552     LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
553     if(trie.isNull()) {
554         return;  // buildTrie() reported an error
555     }
556     // Go into a linear-match node.
557     trie->next('a');
558     trie->next('b');
559     trie->next('c');
560     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
561     // Truncate after the linear-match node.
562     BytesTrie::Iterator iter(*trie, 3, errorCode);
563     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
564         return;
565     }
566     static const StringAndValue expected[]={
567         { "def", 10 },
568         { "dep", -1 },
569         { "dey", -1 }
570     };
571     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
572     // Reset, and we should get the same result.
573     logln("after iter.reset()");
574     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
575 }
576 
TestIteratorFromBytes()577 void BytesTrieTest::TestIteratorFromBytes() {
578     static const StringAndValue data[]={
579         { "mm", 3 },
580         { "mmm", 33 },
581         { "mmnop", 333 }
582     };
583     builder_->clear();
584     IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()");
585     for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
586         builder_->add(data[i].s, data[i].value, errorCode);
587     }
588     StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode);
589     BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
590     checkIterator(iter, data, UPRV_LENGTHOF(data));
591 }
592 
TestFailedIterator()593 void BytesTrieTest::TestFailedIterator() {
594     UErrorCode failure = U_ILLEGAL_ARGUMENT_ERROR;
595     BytesTrie::Iterator iter(NULL, 0, failure);
596     StringPiece sp = iter.getString();
597     if (!sp.empty()) {
598         errln("failed iterator returned garbage data");
599     }
600 }
601 
checkData(const StringAndValue data[],int32_t dataLength)602 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
603     logln("checkData(dataLength=%d, fast)", (int)dataLength);
604     checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
605     logln("checkData(dataLength=%d, small)", (int)dataLength);
606     checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
607 }
608 
checkData(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)609 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
610     LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
611     if(trie.isNull()) {
612         return;  // buildTrie() reported an error
613     }
614     checkFirst(*trie, data, dataLength);
615     checkNext(*trie, data, dataLength);
616     checkNextWithState(*trie, data, dataLength);
617     checkNextWithState64(*trie, data, dataLength);
618     checkNextString(*trie, data, dataLength);
619     checkIterator(*trie, data, dataLength);
620 }
621 
buildTrie(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)622 BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
623                                     UStringTrieBuildOption buildOption) {
624     IcuTestErrorCode errorCode(*this, "buildTrie()");
625     // Add the items to the trie builder in an interesting (not trivial, not random) order.
626     int32_t index, step;
627     if(dataLength&1) {
628         // Odd number of items.
629         index=dataLength/2;
630         step=2;
631     } else if((dataLength%3)!=0) {
632         // Not a multiple of 3.
633         index=dataLength/5;
634         step=3;
635     } else {
636         index=dataLength-1;
637         step=-1;
638     }
639     builder_->clear();
640     for(int32_t i=0; i<dataLength; ++i) {
641         builder_->add(data[index].s, data[index].value, errorCode);
642         index=(index+step)%dataLength;
643     }
644     StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
645     LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
646     if(!errorCode.errIfFailureAndReset("add()/build()")) {
647         builder_->add("zzz", 999, errorCode);
648         if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
649             errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
650         }
651     }
652     logln("serialized trie size: %ld bytes\n", (long)sp.length());
653     StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
654     if(sp.data()==sp2.data()) {
655         errln("builder.buildStringPiece() before & after build() returned same array");
656     }
657     if(errorCode.isFailure()) {
658         return NULL;
659     }
660     // Tries from either build() method should be identical but
661     // BytesTrie does not implement equals().
662     // We just return either one.
663     if((dataLength&1)!=0) {
664         return trie.orphan();
665     } else {
666         return new BytesTrie(sp2.data());
667     }
668 }
669 
checkFirst(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)670 void BytesTrieTest::checkFirst(BytesTrie &trie,
671                                const StringAndValue data[], int32_t dataLength) {
672     for(int32_t i=0; i<dataLength; ++i) {
673         int c=*data[i].s;
674         if(c==0) {
675             continue;  // skip empty string
676         }
677         UStringTrieResult firstResult=trie.first(c);
678         int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
679         UStringTrieResult nextResult=trie.next(data[i].s[1]);
680         if(firstResult!=trie.reset().next(c) ||
681            firstResult!=trie.current() ||
682            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
683            nextResult!=trie.next(data[i].s[1])
684         ) {
685             errln("trie.first(%c)!=trie.reset().next(same) for %s",
686                   c, data[i].s);
687         }
688     }
689     trie.reset();
690 }
691 
checkNext(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)692 void BytesTrieTest::checkNext(BytesTrie &trie,
693                               const StringAndValue data[], int32_t dataLength) {
694     BytesTrie::State state;
695     for(int32_t i=0; i<dataLength; ++i) {
696         int32_t stringLength= (i&1) ? -1 : static_cast<int32_t>(strlen(data[i].s));
697         UStringTrieResult result;
698         if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
699             result!=trie.current()
700         ) {
701             errln("trie does not seem to contain %s", data[i].s);
702         } else if(trie.getValue()!=data[i].value) {
703             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
704                   data[i].s,
705                   (long)trie.getValue(), (long)trie.getValue(),
706                   (long)data[i].value, (long)data[i].value);
707         } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
708             errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
709         }
710         trie.reset();
711         stringLength = static_cast<int32_t>(strlen(data[i].s));
712         result=trie.current();
713         for(int32_t j=0; j<stringLength; ++j) {
714             if(!USTRINGTRIE_HAS_NEXT(result)) {
715                 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
716                 break;
717             }
718             if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
719                 trie.getValue();
720                 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
721                     errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
722                     break;
723                 }
724             }
725             result=trie.next(data[i].s[j]);
726             if(!USTRINGTRIE_MATCHES(result)) {
727                 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
728                 break;
729             }
730             if(result!=trie.current()) {
731                 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
732                 break;
733             }
734         }
735         if(!USTRINGTRIE_HAS_VALUE(result)) {
736             errln("trie.next()!=hasValue at the end of %s", data[i].s);
737             continue;
738         }
739         trie.getValue();
740         if(result!=trie.current()) {
741             errln("trie.current() != current()+getValue()+current() after end of %s",
742                   data[i].s);
743         }
744         // Compare the final current() with whether next() can actually continue.
745         trie.saveState(state);
746         UBool nextContinues=FALSE;
747         // Try all graphic characters; we only use those in test strings in this file.
748 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
749         const int32_t minChar=0x20;
750         const int32_t maxChar=0x7e;
751 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
752         const int32_t minChar=0x40;
753         const int32_t maxChar=0xfe;
754 #else
755         const int32_t minChar=0;
756         const int32_t maxChar=0xff;
757 #endif
758         for(int32_t c=minChar; c<=maxChar; ++c) {
759             if(trie.resetToState(state).next(c)) {
760                 nextContinues=TRUE;
761                 break;
762             }
763         }
764         if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
765             errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
766                   "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
767         }
768         trie.reset();
769     }
770 }
771 
checkNextWithState(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)772 void BytesTrieTest::checkNextWithState(BytesTrie &trie,
773                                        const StringAndValue data[], int32_t dataLength) {
774     BytesTrie::State noState, state;
775     for(int32_t i=0; i<dataLength; ++i) {
776         if((i&1)==0) {
777             // This should have no effect.
778             trie.resetToState(noState);
779         }
780         const char *expectedString=data[i].s;
781         int32_t stringLength= static_cast<int32_t>(strlen(expectedString));
782         int32_t partialLength = stringLength / 3;
783         for(int32_t j=0; j<partialLength; ++j) {
784             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
785                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
786                 return;
787             }
788         }
789         trie.saveState(state);
790         UStringTrieResult resultAtState=trie.current();
791         UStringTrieResult result;
792         int32_t valueAtState=-99;
793         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
794             valueAtState=trie.getValue();
795         }
796         result=trie.next(0);  // mismatch
797         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
798             errln("trie.next(0) matched after part of %s", data[i].s);
799         }
800         if( resultAtState!=trie.resetToState(state).current() ||
801             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
802         ) {
803             errln("trie.next(part of %s) changes current()/getValue() after "
804                   "saveState/next(0)/resetToState",
805                   data[i].s);
806         } else if(!USTRINGTRIE_HAS_VALUE(
807                       result=trie.next(expectedString+partialLength,
808                                        stringLength-partialLength)) ||
809                   result!=trie.current()) {
810             errln("trie.next(rest of %s) does not seem to contain %s after "
811                   "saveState/next(0)/resetToState",
812                   data[i].s, data[i].s);
813         } else if(!USTRINGTRIE_HAS_VALUE(
814                       result=trie.resetToState(state).
815                                   next(expectedString+partialLength,
816                                        stringLength-partialLength)) ||
817                   result!=trie.current()) {
818             errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
819                   data[i].s);
820         } else if(trie.getValue()!=data[i].value) {
821             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
822                   data[i].s,
823                   (long)trie.getValue(), (long)trie.getValue(),
824                   (long)data[i].value, (long)data[i].value);
825         }
826         trie.reset();
827     }
828 }
829 
checkNextWithState64(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)830 void BytesTrieTest::checkNextWithState64(BytesTrie &trie,
831                                          const StringAndValue data[], int32_t dataLength) {
832     assertTrue("trie(initial state).getState64()!=0", trie.getState64() != 0);
833     for(int32_t i=0; i<dataLength; ++i) {
834         const char *expectedString=data[i].s;
835         int32_t stringLength= static_cast<int32_t>(strlen(expectedString));
836         int32_t partialLength = stringLength / 3;
837         for(int32_t j=0; j<partialLength; ++j) {
838             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
839                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
840                 return;
841             }
842         }
843         uint64_t state = trie.getState64();
844         assertTrue("trie.getState64()!=0", state != 0);
845         UStringTrieResult resultAtState=trie.current();
846         UStringTrieResult result;
847         int32_t valueAtState=-99;
848         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
849             valueAtState=trie.getValue();
850         }
851         result=trie.next(0);  // mismatch
852         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
853             errln("trie.next(0) matched after part of %s", data[i].s);
854         }
855         if( resultAtState!=trie.resetToState64(state).current() ||
856             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
857         ) {
858             errln("trie.next(part of %s) changes current()/getValue() after "
859                   "getState64/next(0)/resetToState64",
860                   data[i].s);
861         } else if(!USTRINGTRIE_HAS_VALUE(
862                       result=trie.next(expectedString+partialLength,
863                                        stringLength-partialLength)) ||
864                   result!=trie.current()) {
865             errln("trie.next(rest of %s) does not seem to contain %s after "
866                   "getState64/next(0)/resetToState64",
867                   data[i].s, data[i].s);
868         } else if(!USTRINGTRIE_HAS_VALUE(
869                       result=trie.resetToState64(state).
870                                   next(expectedString+partialLength,
871                                        stringLength-partialLength)) ||
872                   result!=trie.current()) {
873             errln("trie does not seem to contain %s after getState64/next(rest)/resetToState64",
874                   data[i].s);
875         } else if(trie.getValue()!=data[i].value) {
876             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
877                   data[i].s,
878                   (long)trie.getValue(), (long)trie.getValue(),
879                   (long)data[i].value, (long)data[i].value);
880         }
881         trie.reset();
882     }
883 }
884 
885 // next(string) is also tested in other functions,
886 // but here we try to go partway through the string, and then beyond it.
checkNextString(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)887 void BytesTrieTest::checkNextString(BytesTrie &trie,
888                                     const StringAndValue data[], int32_t dataLength) {
889     for(int32_t i=0; i<dataLength; ++i) {
890         const char *expectedString=data[i].s;
891         int32_t stringLength = static_cast<int32_t>(strlen(expectedString));
892         if(!trie.next(expectedString, stringLength/2)) {
893             errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
894             continue;
895         }
896         // Test that we stop properly at the end of the string.
897         if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
898             errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
899         }
900         trie.reset();
901     }
902 }
903 
checkIterator(const BytesTrie & trie,const StringAndValue data[],int32_t dataLength)904 void BytesTrieTest::checkIterator(const BytesTrie &trie,
905                                   const StringAndValue data[], int32_t dataLength) {
906     IcuTestErrorCode errorCode(*this, "checkIterator()");
907     BytesTrie::Iterator iter(trie, 0, errorCode);
908     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
909         return;
910     }
911     checkIterator(iter, data, dataLength);
912 }
913 
checkIterator(BytesTrie::Iterator & iter,const StringAndValue data[],int32_t dataLength)914 void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter,
915                                   const StringAndValue data[], int32_t dataLength) {
916     IcuTestErrorCode errorCode(*this, "checkIterator()");
917     for(int32_t i=0; i<dataLength; ++i) {
918         if(!iter.hasNext()) {
919             errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
920             break;
921         }
922         UBool hasNext=iter.next(errorCode);
923         if(errorCode.errIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
924             break;
925         }
926         if(!hasNext) {
927             errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
928             break;
929         }
930         if(iter.getString()!=StringPiece(data[i].s)) {
931             errln("trie iterator next().getString()=%s but expected %s for item %d",
932                   iter.getString().data(), data[i].s, (int)i);
933         }
934         if(iter.getValue()!=data[i].value) {
935             errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
936                   (long)iter.getValue(), (long)iter.getValue(),
937                   (long)data[i].value, (long)data[i].value,
938                   (int)i, data[i].s);
939         }
940     }
941     if(iter.hasNext()) {
942         errln("trie iterator hasNext()=TRUE after all items");
943     }
944     UBool hasNext=iter.next(errorCode);
945     errorCode.errIfFailureAndReset("trie iterator next() after all items");
946     if(hasNext) {
947         errln("trie iterator next()=TRUE after all items");
948     }
949 }
950