• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *   Copyright (C) 2010-2013, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   file name:  bytetrietest.cpp
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 2010nov16
12 *   created by: Markus W. Scherer
13 */
14 
15 #include <string.h>
16 
17 #include "unicode/utypes.h"
18 #include "unicode/bytestrie.h"
19 #include "unicode/bytestriebuilder.h"
20 #include "unicode/localpointer.h"
21 #include "unicode/stringpiece.h"
22 #include "intltest.h"
23 
24 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
25 
26 struct StringAndValue {
27     const char *s;
28     int32_t value;
29 };
30 
31 class BytesTrieTest : public IntlTest {
32 public:
33     BytesTrieTest();
34     virtual ~BytesTrieTest();
35 
36     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
37     void TestBuilder();
38     void TestEmpty();
39     void Test_a();
40     void Test_a_ab();
41     void TestShortestBranch();
42     void TestBranches();
43     void TestLongSequence();
44     void TestLongBranch();
45     void TestValuesForState();
46     void TestCompact();
47 
48     BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
49     void TestHasUniqueValue();
50     void TestGetNextBytes();
51     void TestIteratorFromBranch();
52     void TestIteratorFromLinearMatch();
53     void TestTruncatingIteratorFromRoot();
54     void TestTruncatingIteratorFromLinearMatchShort();
55     void TestTruncatingIteratorFromLinearMatchLong();
56     void TestIteratorFromBytes();
57 
58     void checkData(const StringAndValue data[], int32_t dataLength);
59     void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
60     BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
61                          UStringTrieBuildOption buildOption);
62     void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
63     void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
64     void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
65     void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
66     void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
67     void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
68 
69 private:
70     BytesTrieBuilder *builder_;
71 };
72 
createBytesTrieTest()73 extern IntlTest *createBytesTrieTest() {
74     return new BytesTrieTest();
75 }
76 
BytesTrieTest()77 BytesTrieTest::BytesTrieTest() : builder_(NULL) {
78     IcuTestErrorCode errorCode(*this, "BytesTrieTest()");
79     builder_=new BytesTrieBuilder(errorCode);
80 }
81 
~BytesTrieTest()82 BytesTrieTest::~BytesTrieTest() {
83     delete builder_;
84 }
85 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)86 void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
87     if(exec) {
88         logln("TestSuite BytesTrieTest: ");
89     }
90     TESTCASE_AUTO_BEGIN;
91     TESTCASE_AUTO(TestBuilder);
92     TESTCASE_AUTO(TestEmpty);
93     TESTCASE_AUTO(Test_a);
94     TESTCASE_AUTO(Test_a_ab);
95     TESTCASE_AUTO(TestShortestBranch);
96     TESTCASE_AUTO(TestBranches);
97     TESTCASE_AUTO(TestLongSequence);
98     TESTCASE_AUTO(TestLongBranch);
99     TESTCASE_AUTO(TestValuesForState);
100     TESTCASE_AUTO(TestCompact);
101     TESTCASE_AUTO(TestHasUniqueValue);
102     TESTCASE_AUTO(TestGetNextBytes);
103     TESTCASE_AUTO(TestIteratorFromBranch);
104     TESTCASE_AUTO(TestIteratorFromLinearMatch);
105     TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
106     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
107     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
108     TESTCASE_AUTO(TestIteratorFromBytes);
109     TESTCASE_AUTO_END;
110 }
111 
TestBuilder()112 void BytesTrieTest::TestBuilder() {
113     IcuTestErrorCode errorCode(*this, "TestBuilder()");
114     builder_->clear();
115     delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
116     if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
117         errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
118         return;
119     }
120     // TODO: remove .build(...) once add() checks for duplicates.
121     builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
122     if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
123         errln("BytesTrieBuilder.add() did not detect duplicates");
124         return;
125     }
126 }
127 
TestEmpty()128 void BytesTrieTest::TestEmpty() {
129     static const StringAndValue data[]={
130         { "", 0 }
131     };
132     checkData(data, LENGTHOF(data));
133 }
134 
Test_a()135 void BytesTrieTest::Test_a() {
136     static const StringAndValue data[]={
137         { "a", 1 }
138     };
139     checkData(data, LENGTHOF(data));
140 }
141 
Test_a_ab()142 void BytesTrieTest::Test_a_ab() {
143     static const StringAndValue data[]={
144         { "a", 1 },
145         { "ab", 100 }
146     };
147     checkData(data, LENGTHOF(data));
148 }
149 
TestShortestBranch()150 void BytesTrieTest::TestShortestBranch() {
151     static const StringAndValue data[]={
152         { "a", 1000 },
153         { "b", 2000 }
154     };
155     checkData(data, LENGTHOF(data));
156 }
157 
TestBranches()158 void BytesTrieTest::TestBranches() {
159     static const StringAndValue data[]={
160         { "a", 0x10 },
161         { "cc", 0x40 },
162         { "e", 0x100 },
163         { "ggg", 0x400 },
164         { "i", 0x1000 },
165         { "kkkk", 0x4000 },
166         { "n", 0x10000 },
167         { "ppppp", 0x40000 },
168         { "r", 0x100000 },
169         { "sss", 0x200000 },
170         { "t", 0x400000 },
171         { "uu", 0x800000 },
172         { "vv", 0x7fffffff },
173         { "zz", (int32_t)0x80000000 }
174     };
175     for(int32_t length=2; length<=LENGTHOF(data); ++length) {
176         logln("TestBranches length=%d", (int)length);
177         checkData(data, length);
178     }
179 }
180 
TestLongSequence()181 void BytesTrieTest::TestLongSequence() {
182     static const StringAndValue data[]={
183         { "a", -1 },
184         // sequence of linear-match nodes
185         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
186         // more than 256 bytes
187         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
188           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
189           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
190           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
191           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
192           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
193     };
194     checkData(data, LENGTHOF(data));
195 }
196 
TestLongBranch()197 void BytesTrieTest::TestLongBranch() {
198     // Split-branch and interesting compact-integer values.
199     static const StringAndValue data[]={
200         { "a", -2 },
201         { "b", -1 },
202         { "c", 0 },
203         { "d2", 1 },
204         { "f", 0x3f },
205         { "g", 0x40 },
206         { "h", 0x41 },
207         { "j23", 0x1900 },
208         { "j24", 0x19ff },
209         { "j25", 0x1a00 },
210         { "k2", 0x1a80 },
211         { "k3", 0x1aff },
212         { "l234567890", 0x1b00 },
213         { "l234567890123", 0x1b01 },
214         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
215         { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
216         { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
217         { "r", 0x333333 },
218         { "s2345", 0x4444444 },
219         { "t234567890", 0x77777777 },
220         { "z", (int32_t)0x80000001 }
221     };
222     checkData(data, LENGTHOF(data));
223 }
224 
TestValuesForState()225 void BytesTrieTest::TestValuesForState() {
226     // Check that saveState() and resetToState() interact properly
227     // with next() and current().
228     static const StringAndValue data[]={
229         { "a", -1 },
230         { "ab", -2 },
231         { "abc", -3 },
232         { "abcd", -4 },
233         { "abcde", -5 },
234         { "abcdef", -6 }
235     };
236     checkData(data, LENGTHOF(data));
237 }
238 
TestCompact()239 void BytesTrieTest::TestCompact() {
240     // Duplicate trailing strings and values provide opportunities for compacting.
241     static const StringAndValue data[]={
242         { "+", 0 },
243         { "+august", 8 },
244         { "+december", 12 },
245         { "+july", 7 },
246         { "+june", 6 },
247         { "+november", 11 },
248         { "+october", 10 },
249         { "+september", 9 },
250         { "-", 0 },
251         { "-august", 8 },
252         { "-december", 12 },
253         { "-july", 7 },
254         { "-june", 6 },
255         { "-november", 11 },
256         { "-october", 10 },
257         { "-september", 9 },
258         // The l+n branch (with its sub-nodes) is a duplicate but will be written
259         // both times because each time it follows a different linear-match node.
260         { "xjuly", 7 },
261         { "xjune", 6 }
262     };
263     checkData(data, LENGTHOF(data));
264 }
265 
buildMonthsTrie(UStringTrieBuildOption buildOption)266 BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
267     // All types of nodes leading to the same value,
268     // for code coverage of recursive functions.
269     // In particular, we need a lot of branches on some single level
270     // to exercise a split-branch node.
271     static const StringAndValue data[]={
272         { "august", 8 },
273         { "jan", 1 },
274         { "jan.", 1 },
275         { "jana", 1 },
276         { "janbb", 1 },
277         { "janc", 1 },
278         { "janddd", 1 },
279         { "janee", 1 },
280         { "janef", 1 },
281         { "janf", 1 },
282         { "jangg", 1 },
283         { "janh", 1 },
284         { "janiiii", 1 },
285         { "janj", 1 },
286         { "jankk", 1 },
287         { "jankl", 1 },
288         { "jankmm", 1 },
289         { "janl", 1 },
290         { "janm", 1 },
291         { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
292         { "jano", 1 },
293         { "janpp", 1 },
294         { "janqqq", 1 },
295         { "janr", 1 },
296         { "januar", 1 },
297         { "january", 1 },
298         { "july", 7 },
299         { "jun", 6 },
300         { "jun.", 6 },
301         { "june", 6 }
302     };
303     return buildTrie(data, LENGTHOF(data), buildOption);
304 }
305 
TestHasUniqueValue()306 void BytesTrieTest::TestHasUniqueValue() {
307     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
308     if(trie.isNull()) {
309         return;  // buildTrie() reported an error
310     }
311     int32_t uniqueValue;
312     if(trie->hasUniqueValue(uniqueValue)) {
313         errln("unique value at root");
314     }
315     trie->next('j');
316     trie->next('a');
317     trie->next('n');
318     // hasUniqueValue() directly after next()
319     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
320         errln("not unique value 1 after \"jan\"");
321     }
322     trie->first('j');
323     trie->next('u');
324     if(trie->hasUniqueValue(uniqueValue)) {
325         errln("unique value after \"ju\"");
326     }
327     if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
328         errln("not normal value 6 after \"jun\"");
329     }
330     // hasUniqueValue() after getValue()
331     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
332         errln("not unique value 6 after \"jun\"");
333     }
334     // hasUniqueValue() from within a linear-match node
335     trie->first('a');
336     trie->next('u');
337     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
338         errln("not unique value 8 after \"au\"");
339     }
340 }
341 
TestGetNextBytes()342 void BytesTrieTest::TestGetNextBytes() {
343     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
344     if(trie.isNull()) {
345         return;  // buildTrie() reported an error
346     }
347     char buffer[40];
348     CheckedArrayByteSink sink(buffer, LENGTHOF(buffer));
349     int32_t count=trie->getNextBytes(sink);
350     if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
351         errln("months getNextBytes()!=[aj] at root");
352     }
353     trie->next('j');
354     trie->next('a');
355     trie->next('n');
356     // getNextBytes() directly after next()
357     count=trie->getNextBytes(sink.Reset());
358     buffer[count]=0;
359     if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
360         errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
361     }
362     // getNextBytes() after getValue()
363     trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
364     memset(buffer, 0, sizeof(buffer));
365     count=trie->getNextBytes(sink.Reset());
366     if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
367         errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
368     }
369     // getNextBytes() from a linear-match node
370     trie->next('u');
371     memset(buffer, 0, sizeof(buffer));
372     count=trie->getNextBytes(sink.Reset());
373     if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
374         errln("months getNextBytes()!=[a] after \"janu\"");
375     }
376     trie->next('a');
377     memset(buffer, 0, sizeof(buffer));
378     count=trie->getNextBytes(sink.Reset());
379     if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
380         errln("months getNextBytes()!=[r] after \"janua\"");
381     }
382     trie->next('r');
383     trie->next('y');
384     // getNextBytes() after a final match
385     count=trie->getNextBytes(sink.Reset());
386     if(count!=0 || sink.NumberOfBytesAppended()!=0) {
387         errln("months getNextBytes()!=[] after \"january\"");
388     }
389 }
390 
TestIteratorFromBranch()391 void BytesTrieTest::TestIteratorFromBranch() {
392     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
393     if(trie.isNull()) {
394         return;  // buildTrie() reported an error
395     }
396     // Go to a branch node.
397     trie->next('j');
398     trie->next('a');
399     trie->next('n');
400     IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
401     BytesTrie::Iterator iter(*trie, 0, errorCode);
402     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
403         return;
404     }
405     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
406     // following "jan".
407     static const StringAndValue data[]={
408         { "", 1 },
409         { ".", 1 },
410         { "a", 1 },
411         { "bb", 1 },
412         { "c", 1 },
413         { "ddd", 1 },
414         { "ee", 1 },
415         { "ef", 1 },
416         { "f", 1 },
417         { "gg", 1 },
418         { "h", 1 },
419         { "iiii", 1 },
420         { "j", 1 },
421         { "kk", 1 },
422         { "kl", 1 },
423         { "kmm", 1 },
424         { "l", 1 },
425         { "m", 1 },
426         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
427         { "o", 1 },
428         { "pp", 1 },
429         { "qqq", 1 },
430         { "r", 1 },
431         { "uar", 1 },
432         { "uary", 1 }
433     };
434     checkIterator(iter, data, LENGTHOF(data));
435     // Reset, and we should get the same result.
436     logln("after iter.reset()");
437     checkIterator(iter.reset(), data, LENGTHOF(data));
438 }
439 
TestIteratorFromLinearMatch()440 void BytesTrieTest::TestIteratorFromLinearMatch() {
441     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
442     if(trie.isNull()) {
443         return;  // buildTrie() reported an error
444     }
445     // Go into a linear-match node.
446     trie->next('j');
447     trie->next('a');
448     trie->next('n');
449     trie->next('u');
450     trie->next('a');
451     IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
452     BytesTrie::Iterator iter(*trie, 0, errorCode);
453     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
454         return;
455     }
456     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
457     // following "janua".
458     static const StringAndValue data[]={
459         { "r", 1 },
460         { "ry", 1 }
461     };
462     checkIterator(iter, data, LENGTHOF(data));
463     // Reset, and we should get the same result.
464     logln("after iter.reset()");
465     checkIterator(iter.reset(), data, LENGTHOF(data));
466 }
467 
TestTruncatingIteratorFromRoot()468 void BytesTrieTest::TestTruncatingIteratorFromRoot() {
469     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
470     if(trie.isNull()) {
471         return;  // buildTrie() reported an error
472     }
473     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
474     BytesTrie::Iterator iter(*trie, 4, errorCode);
475     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
476         return;
477     }
478     // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
479     // of each string, and no string duplicates from the truncation.
480     static const StringAndValue data[]={
481         { "augu", -1 },
482         { "jan", 1 },
483         { "jan.", 1 },
484         { "jana", 1 },
485         { "janb", -1 },
486         { "janc", 1 },
487         { "jand", -1 },
488         { "jane", -1 },
489         { "janf", 1 },
490         { "jang", -1 },
491         { "janh", 1 },
492         { "jani", -1 },
493         { "janj", 1 },
494         { "jank", -1 },
495         { "janl", 1 },
496         { "janm", 1 },
497         { "jann", -1 },
498         { "jano", 1 },
499         { "janp", -1 },
500         { "janq", -1 },
501         { "janr", 1 },
502         { "janu", -1 },
503         { "july", 7 },
504         { "jun", 6 },
505         { "jun.", 6 },
506         { "june", 6 }
507     };
508     checkIterator(iter, data, LENGTHOF(data));
509     // Reset, and we should get the same result.
510     logln("after iter.reset()");
511     checkIterator(iter.reset(), data, LENGTHOF(data));
512 }
513 
TestTruncatingIteratorFromLinearMatchShort()514 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
515     static const StringAndValue data[]={
516         { "abcdef", 10 },
517         { "abcdepq", 200 },
518         { "abcdeyz", 3000 }
519     };
520     LocalPointer<BytesTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
521     if(trie.isNull()) {
522         return;  // buildTrie() reported an error
523     }
524     // Go into a linear-match node.
525     trie->next('a');
526     trie->next('b');
527     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
528     // Truncate within the linear-match node.
529     BytesTrie::Iterator iter(*trie, 2, errorCode);
530     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
531         return;
532     }
533     static const StringAndValue expected[]={
534         { "cd", -1 }
535     };
536     checkIterator(iter, expected, LENGTHOF(expected));
537     // Reset, and we should get the same result.
538     logln("after iter.reset()");
539     checkIterator(iter.reset(), expected, LENGTHOF(expected));
540 }
541 
TestTruncatingIteratorFromLinearMatchLong()542 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
543     static const StringAndValue data[]={
544         { "abcdef", 10 },
545         { "abcdepq", 200 },
546         { "abcdeyz", 3000 }
547     };
548     LocalPointer<BytesTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
549     if(trie.isNull()) {
550         return;  // buildTrie() reported an error
551     }
552     // Go into a linear-match node.
553     trie->next('a');
554     trie->next('b');
555     trie->next('c');
556     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
557     // Truncate after the linear-match node.
558     BytesTrie::Iterator iter(*trie, 3, errorCode);
559     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
560         return;
561     }
562     static const StringAndValue expected[]={
563         { "def", 10 },
564         { "dep", -1 },
565         { "dey", -1 }
566     };
567     checkIterator(iter, expected, LENGTHOF(expected));
568     // Reset, and we should get the same result.
569     logln("after iter.reset()");
570     checkIterator(iter.reset(), expected, LENGTHOF(expected));
571 }
572 
TestIteratorFromBytes()573 void BytesTrieTest::TestIteratorFromBytes() {
574     static const StringAndValue data[]={
575         { "mm", 3 },
576         { "mmm", 33 },
577         { "mmnop", 333 }
578     };
579     builder_->clear();
580     IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()");
581     for(int32_t i=0; i<LENGTHOF(data); ++i) {
582         builder_->add(data[i].s, data[i].value, errorCode);
583     }
584     StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode);
585     BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
586     checkIterator(iter, data, LENGTHOF(data));
587 }
588 
checkData(const StringAndValue data[],int32_t dataLength)589 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
590     logln("checkData(dataLength=%d, fast)", (int)dataLength);
591     checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
592     logln("checkData(dataLength=%d, small)", (int)dataLength);
593     checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
594 }
595 
checkData(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)596 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
597     LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
598     if(trie.isNull()) {
599         return;  // buildTrie() reported an error
600     }
601     checkFirst(*trie, data, dataLength);
602     checkNext(*trie, data, dataLength);
603     checkNextWithState(*trie, data, dataLength);
604     checkNextString(*trie, data, dataLength);
605     checkIterator(*trie, data, dataLength);
606 }
607 
buildTrie(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)608 BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
609                                     UStringTrieBuildOption buildOption) {
610     IcuTestErrorCode errorCode(*this, "buildTrie()");
611     // Add the items to the trie builder in an interesting (not trivial, not random) order.
612     int32_t index, step;
613     if(dataLength&1) {
614         // Odd number of items.
615         index=dataLength/2;
616         step=2;
617     } else if((dataLength%3)!=0) {
618         // Not a multiple of 3.
619         index=dataLength/5;
620         step=3;
621     } else {
622         index=dataLength-1;
623         step=-1;
624     }
625     builder_->clear();
626     for(int32_t i=0; i<dataLength; ++i) {
627         builder_->add(data[index].s, data[index].value, errorCode);
628         index=(index+step)%dataLength;
629     }
630     StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
631     LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
632     if(!errorCode.logIfFailureAndReset("add()/build()")) {
633         builder_->add("zzz", 999, errorCode);
634         if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
635             errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
636         }
637     }
638     logln("serialized trie size: %ld bytes\n", (long)sp.length());
639     StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
640     if(sp.data()==sp2.data()) {
641         errln("builder.buildStringPiece() before & after build() returned same array");
642     }
643     if(errorCode.isFailure()) {
644         return NULL;
645     }
646     // Tries from either build() method should be identical but
647     // BytesTrie does not implement equals().
648     // We just return either one.
649     if((dataLength&1)!=0) {
650         return trie.orphan();
651     } else {
652         return new BytesTrie(sp2.data());
653     }
654 }
655 
checkFirst(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)656 void BytesTrieTest::checkFirst(BytesTrie &trie,
657                                const StringAndValue data[], int32_t dataLength) {
658     for(int32_t i=0; i<dataLength; ++i) {
659         int c=*data[i].s;
660         if(c==0) {
661             continue;  // skip empty string
662         }
663         UStringTrieResult firstResult=trie.first(c);
664         int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
665         UStringTrieResult nextResult=trie.next(data[i].s[1]);
666         if(firstResult!=trie.reset().next(c) ||
667            firstResult!=trie.current() ||
668            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
669            nextResult!=trie.next(data[i].s[1])
670         ) {
671             errln("trie.first(%c)!=trie.reset().next(same) for %s",
672                   c, data[i].s);
673         }
674     }
675     trie.reset();
676 }
677 
checkNext(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)678 void BytesTrieTest::checkNext(BytesTrie &trie,
679                               const StringAndValue data[], int32_t dataLength) {
680     BytesTrie::State state;
681     for(int32_t i=0; i<dataLength; ++i) {
682         int32_t stringLength= (i&1) ? -1 : strlen(data[i].s);
683         UStringTrieResult result;
684         if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
685             result!=trie.current()
686         ) {
687             errln("trie does not seem to contain %s", data[i].s);
688         } else if(trie.getValue()!=data[i].value) {
689             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
690                   data[i].s,
691                   (long)trie.getValue(), (long)trie.getValue(),
692                   (long)data[i].value, (long)data[i].value);
693         } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
694             errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
695         }
696         trie.reset();
697         stringLength=strlen(data[i].s);
698         result=trie.current();
699         for(int32_t j=0; j<stringLength; ++j) {
700             if(!USTRINGTRIE_HAS_NEXT(result)) {
701                 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
702                 break;
703             }
704             if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
705                 trie.getValue();
706                 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
707                     errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
708                     break;
709                 }
710             }
711             result=trie.next(data[i].s[j]);
712             if(!USTRINGTRIE_MATCHES(result)) {
713                 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
714                 break;
715             }
716             if(result!=trie.current()) {
717                 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
718                 break;
719             }
720         }
721         if(!USTRINGTRIE_HAS_VALUE(result)) {
722             errln("trie.next()!=hasValue at the end of %s", data[i].s);
723             continue;
724         }
725         trie.getValue();
726         if(result!=trie.current()) {
727             errln("trie.current() != current()+getValue()+current() after end of %s",
728                   data[i].s);
729         }
730         // Compare the final current() with whether next() can actually continue.
731         trie.saveState(state);
732         UBool nextContinues=FALSE;
733         // Try all graphic characters; we only use those in test strings in this file.
734 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
735         const int32_t minChar=0x20;
736         const int32_t maxChar=0x7e;
737 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
738         const int32_t minChar=0x40;
739         const int32_t maxChar=0xfe;
740 #else
741         const int32_t minChar=0;
742         const int32_t maxChar=0xff;
743 #endif
744         for(int32_t c=minChar; c<=maxChar; ++c) {
745             if(trie.resetToState(state).next(c)) {
746                 nextContinues=TRUE;
747                 break;
748             }
749         }
750         if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
751             errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
752                   "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
753         }
754         trie.reset();
755     }
756 }
757 
checkNextWithState(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)758 void BytesTrieTest::checkNextWithState(BytesTrie &trie,
759                                        const StringAndValue data[], int32_t dataLength) {
760     BytesTrie::State noState, state;
761     for(int32_t i=0; i<dataLength; ++i) {
762         if((i&1)==0) {
763             // This should have no effect.
764             trie.resetToState(noState);
765         }
766         const char *expectedString=data[i].s;
767         int32_t stringLength=strlen(expectedString);
768         int32_t partialLength=stringLength/3;
769         for(int32_t j=0; j<partialLength; ++j) {
770             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
771                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
772                 return;
773             }
774         }
775         trie.saveState(state);
776         UStringTrieResult resultAtState=trie.current();
777         UStringTrieResult result;
778         int32_t valueAtState=-99;
779         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
780             valueAtState=trie.getValue();
781         }
782         result=trie.next(0);  // mismatch
783         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
784             errln("trie.next(0) matched after part of %s", data[i].s);
785         }
786         if( resultAtState!=trie.resetToState(state).current() ||
787             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
788         ) {
789             errln("trie.next(part of %s) changes current()/getValue() after "
790                   "saveState/next(0)/resetToState",
791                   data[i].s);
792         } else if(!USTRINGTRIE_HAS_VALUE(
793                       result=trie.next(expectedString+partialLength,
794                                        stringLength-partialLength)) ||
795                   result!=trie.current()) {
796             errln("trie.next(rest of %s) does not seem to contain %s after "
797                   "saveState/next(0)/resetToState",
798                   data[i].s, data[i].s);
799         } else if(!USTRINGTRIE_HAS_VALUE(
800                       result=trie.resetToState(state).
801                                   next(expectedString+partialLength,
802                                        stringLength-partialLength)) ||
803                   result!=trie.current()) {
804             errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
805                   data[i].s);
806         } else if(trie.getValue()!=data[i].value) {
807             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
808                   data[i].s,
809                   (long)trie.getValue(), (long)trie.getValue(),
810                   (long)data[i].value, (long)data[i].value);
811         }
812         trie.reset();
813     }
814 }
815 
816 // next(string) is also tested in other functions,
817 // but here we try to go partway through the string, and then beyond it.
checkNextString(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)818 void BytesTrieTest::checkNextString(BytesTrie &trie,
819                                     const StringAndValue data[], int32_t dataLength) {
820     for(int32_t i=0; i<dataLength; ++i) {
821         const char *expectedString=data[i].s;
822         int32_t stringLength=strlen(expectedString);
823         if(!trie.next(expectedString, stringLength/2)) {
824             errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
825             continue;
826         }
827         // Test that we stop properly at the end of the string.
828         if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
829             errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
830         }
831         trie.reset();
832     }
833 }
834 
checkIterator(const BytesTrie & trie,const StringAndValue data[],int32_t dataLength)835 void BytesTrieTest::checkIterator(const BytesTrie &trie,
836                                   const StringAndValue data[], int32_t dataLength) {
837     IcuTestErrorCode errorCode(*this, "checkIterator()");
838     BytesTrie::Iterator iter(trie, 0, errorCode);
839     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
840         return;
841     }
842     checkIterator(iter, data, dataLength);
843 }
844 
checkIterator(BytesTrie::Iterator & iter,const StringAndValue data[],int32_t dataLength)845 void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter,
846                                   const StringAndValue data[], int32_t dataLength) {
847     IcuTestErrorCode errorCode(*this, "checkIterator()");
848     for(int32_t i=0; i<dataLength; ++i) {
849         if(!iter.hasNext()) {
850             errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
851             break;
852         }
853         UBool hasNext=iter.next(errorCode);
854         if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
855             break;
856         }
857         if(!hasNext) {
858             errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
859             break;
860         }
861         if(iter.getString()!=StringPiece(data[i].s)) {
862             errln("trie iterator next().getString()=%s but expected %s for item %d",
863                   iter.getString().data(), data[i].s, (int)i);
864         }
865         if(iter.getValue()!=data[i].value) {
866             errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
867                   (long)iter.getValue(), (long)iter.getValue(),
868                   (long)data[i].value, (long)data[i].value,
869                   (int)i, data[i].s);
870         }
871     }
872     if(iter.hasNext()) {
873         errln("trie iterator hasNext()=TRUE after all items");
874     }
875     UBool hasNext=iter.next(errorCode);
876     errorCode.logIfFailureAndReset("trie iterator next() after all items");
877     if(hasNext) {
878         errln("trie iterator next()=TRUE after all items");
879     }
880 }
881