• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2010-2014, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   file name:  bytetrietest.cpp
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2010nov16
14 *   created by: Markus W. Scherer
15 */
16 
17 #include <string.h>
18 
19 #include "unicode/utypes.h"
20 #include "unicode/bytestrie.h"
21 #include "unicode/bytestriebuilder.h"
22 #include "unicode/localpointer.h"
23 #include "unicode/stringpiece.h"
24 #include "intltest.h"
25 #include "cmemory.h"
26 
27 struct StringAndValue {
28     const char *s;
29     int32_t value;
30 };
31 
32 class BytesTrieTest : public IntlTest {
33 public:
34     BytesTrieTest();
35     virtual ~BytesTrieTest();
36 
37     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL) override;
38     void TestBuilder();
39     void TestEmpty();
40     void Test_a();
41     void Test_a_ab();
42     void TestShortestBranch();
43     void TestBranches();
44     void TestLongSequence();
45     void TestLongBranch();
46     void TestValuesForState();
47     void TestCompact();
48 
49     BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
50     void TestHasUniqueValue();
51     void TestGetNextBytes();
52     void TestIteratorFromBranch();
53     void TestIteratorFromLinearMatch();
54     void TestTruncatingIteratorFromRoot();
55     void TestTruncatingIteratorFromLinearMatchShort();
56     void TestTruncatingIteratorFromLinearMatchLong();
57     void TestIteratorFromBytes();
58     void TestFailedIterator();
59     void TestDelta();
60 
61     void checkData(const StringAndValue data[], int32_t dataLength);
62     void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
63     BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
64                          UStringTrieBuildOption buildOption);
65     void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
66     void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
67     void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
68     void checkNextWithState64(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
69     void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
70     void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
71     void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
72 
73 private:
74     BytesTrieBuilder *builder_;
75 };
76 
createBytesTrieTest()77 extern IntlTest *createBytesTrieTest() {
78     return new BytesTrieTest();
79 }
80 
BytesTrieTest()81 BytesTrieTest::BytesTrieTest() : builder_(NULL) {
82     IcuTestErrorCode errorCode(*this, "BytesTrieTest()");
83     builder_=new BytesTrieBuilder(errorCode);
84 }
85 
~BytesTrieTest()86 BytesTrieTest::~BytesTrieTest() {
87     delete builder_;
88 }
89 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)90 void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
91     if(exec) {
92         logln("TestSuite BytesTrieTest: ");
93     }
94     TESTCASE_AUTO_BEGIN;
95     TESTCASE_AUTO(TestBuilder);
96     TESTCASE_AUTO(TestEmpty);
97     TESTCASE_AUTO(Test_a);
98     TESTCASE_AUTO(Test_a_ab);
99     TESTCASE_AUTO(TestShortestBranch);
100     TESTCASE_AUTO(TestBranches);
101     TESTCASE_AUTO(TestLongSequence);
102     TESTCASE_AUTO(TestLongBranch);
103     TESTCASE_AUTO(TestValuesForState);
104     TESTCASE_AUTO(TestCompact);
105     TESTCASE_AUTO(TestHasUniqueValue);
106     TESTCASE_AUTO(TestGetNextBytes);
107     TESTCASE_AUTO(TestIteratorFromBranch);
108     TESTCASE_AUTO(TestIteratorFromLinearMatch);
109     TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
110     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
111     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
112     TESTCASE_AUTO(TestIteratorFromBytes);
113     TESTCASE_AUTO(TestFailedIterator);
114     TESTCASE_AUTO(TestDelta);
115     TESTCASE_AUTO_END;
116 }
117 
TestBuilder()118 void BytesTrieTest::TestBuilder() {
119     IcuTestErrorCode errorCode(*this, "TestBuilder()");
120     builder_->clear();
121     delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
122     if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
123         errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
124         return;
125     }
126     // TODO: remove .build(...) once add() checks for duplicates.
127     builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
128     if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
129         errln("BytesTrieBuilder.add() did not detect duplicates");
130         return;
131     }
132 }
133 
TestEmpty()134 void BytesTrieTest::TestEmpty() {
135     static const StringAndValue data[]={
136         { "", 0 }
137     };
138     checkData(data, UPRV_LENGTHOF(data));
139 }
140 
Test_a()141 void BytesTrieTest::Test_a() {
142     static const StringAndValue data[]={
143         { "a", 1 }
144     };
145     checkData(data, UPRV_LENGTHOF(data));
146 }
147 
Test_a_ab()148 void BytesTrieTest::Test_a_ab() {
149     static const StringAndValue data[]={
150         { "a", 1 },
151         { "ab", 100 }
152     };
153     checkData(data, UPRV_LENGTHOF(data));
154 }
155 
TestShortestBranch()156 void BytesTrieTest::TestShortestBranch() {
157     static const StringAndValue data[]={
158         { "a", 1000 },
159         { "b", 2000 }
160     };
161     checkData(data, UPRV_LENGTHOF(data));
162 }
163 
TestBranches()164 void BytesTrieTest::TestBranches() {
165     static const StringAndValue data[]={
166         { "a", 0x10 },
167         { "cc", 0x40 },
168         { "e", 0x100 },
169         { "ggg", 0x400 },
170         { "i", 0x1000 },
171         { "kkkk", 0x4000 },
172         { "n", 0x10000 },
173         { "ppppp", 0x40000 },
174         { "r", 0x100000 },
175         { "sss", 0x200000 },
176         { "t", 0x400000 },
177         { "uu", 0x800000 },
178         { "vv", 0x7fffffff },
179         { "zz", (int32_t)0x80000000 }
180     };
181     for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
182         logln("TestBranches length=%d", (int)length);
183         checkData(data, length);
184     }
185 }
186 
TestLongSequence()187 void BytesTrieTest::TestLongSequence() {
188     static const StringAndValue data[]={
189         { "a", -1 },
190         // sequence of linear-match nodes
191         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
192         // more than 256 bytes
193         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
194           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
195           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
196           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
197           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
198           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
199     };
200     checkData(data, UPRV_LENGTHOF(data));
201 }
202 
TestLongBranch()203 void BytesTrieTest::TestLongBranch() {
204     // Split-branch and interesting compact-integer values.
205     static const StringAndValue data[]={
206         { "a", -2 },
207         { "b", -1 },
208         { "c", 0 },
209         { "d2", 1 },
210         { "f", 0x3f },
211         { "g", 0x40 },
212         { "h", 0x41 },
213         { "j23", 0x1900 },
214         { "j24", 0x19ff },
215         { "j25", 0x1a00 },
216         { "k2", 0x1a80 },
217         { "k3", 0x1aff },
218         { "l234567890", 0x1b00 },
219         { "l234567890123", 0x1b01 },
220         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
221         { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
222         { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
223         { "r", 0x333333 },
224         { "s2345", 0x4444444 },
225         { "t234567890", 0x77777777 },
226         { "z", (int32_t)0x80000001 }
227     };
228     checkData(data, UPRV_LENGTHOF(data));
229 }
230 
TestValuesForState()231 void BytesTrieTest::TestValuesForState() {
232     // Check that saveState() and resetToState() interact properly
233     // with next() and current().
234     static const StringAndValue data[]={
235         { "a", -1 },
236         { "ab", -2 },
237         { "abc", -3 },
238         { "abcd", -4 },
239         { "abcde", -5 },
240         { "abcdef", -6 }
241     };
242     checkData(data, UPRV_LENGTHOF(data));
243 }
244 
TestCompact()245 void BytesTrieTest::TestCompact() {
246     // Duplicate trailing strings and values provide opportunities for compacting.
247     static const StringAndValue data[]={
248         { "+", 0 },
249         { "+august", 8 },
250         { "+december", 12 },
251         { "+july", 7 },
252         { "+june", 6 },
253         { "+november", 11 },
254         { "+october", 10 },
255         { "+september", 9 },
256         { "-", 0 },
257         { "-august", 8 },
258         { "-december", 12 },
259         { "-july", 7 },
260         { "-june", 6 },
261         { "-november", 11 },
262         { "-october", 10 },
263         { "-september", 9 },
264         // The l+n branch (with its sub-nodes) is a duplicate but will be written
265         // both times because each time it follows a different linear-match node.
266         { "xjuly", 7 },
267         { "xjune", 6 }
268     };
269     checkData(data, UPRV_LENGTHOF(data));
270 }
271 
buildMonthsTrie(UStringTrieBuildOption buildOption)272 BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
273     // All types of nodes leading to the same value,
274     // for code coverage of recursive functions.
275     // In particular, we need a lot of branches on some single level
276     // to exercise a split-branch node.
277     static const StringAndValue data[]={
278         { "august", 8 },
279         { "jan", 1 },
280         { "jan.", 1 },
281         { "jana", 1 },
282         { "janbb", 1 },
283         { "janc", 1 },
284         { "janddd", 1 },
285         { "janee", 1 },
286         { "janef", 1 },
287         { "janf", 1 },
288         { "jangg", 1 },
289         { "janh", 1 },
290         { "janiiii", 1 },
291         { "janj", 1 },
292         { "jankk", 1 },
293         { "jankl", 1 },
294         { "jankmm", 1 },
295         { "janl", 1 },
296         { "janm", 1 },
297         { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
298         { "jano", 1 },
299         { "janpp", 1 },
300         { "janqqq", 1 },
301         { "janr", 1 },
302         { "januar", 1 },
303         { "january", 1 },
304         { "july", 7 },
305         { "jun", 6 },
306         { "jun.", 6 },
307         { "june", 6 }
308     };
309     return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
310 }
311 
TestHasUniqueValue()312 void BytesTrieTest::TestHasUniqueValue() {
313     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
314     if(trie.isNull()) {
315         return;  // buildTrie() reported an error
316     }
317     int32_t uniqueValue;
318     if(trie->hasUniqueValue(uniqueValue)) {
319         errln("unique value at root");
320     }
321     trie->next('j');
322     trie->next('a');
323     trie->next('n');
324     // hasUniqueValue() directly after next()
325     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
326         errln("not unique value 1 after \"jan\"");
327     }
328     trie->first('j');
329     trie->next('u');
330     if(trie->hasUniqueValue(uniqueValue)) {
331         errln("unique value after \"ju\"");
332     }
333     if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
334         errln("not normal value 6 after \"jun\"");
335     }
336     // hasUniqueValue() after getValue()
337     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
338         errln("not unique value 6 after \"jun\"");
339     }
340     // hasUniqueValue() from within a linear-match node
341     trie->first('a');
342     trie->next('u');
343     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
344         errln("not unique value 8 after \"au\"");
345     }
346 }
347 
TestGetNextBytes()348 void BytesTrieTest::TestGetNextBytes() {
349     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
350     if(trie.isNull()) {
351         return;  // buildTrie() reported an error
352     }
353     char buffer[40];
354     CheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer));
355     int32_t count=trie->getNextBytes(sink);
356     if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
357         errln("months getNextBytes()!=[aj] at root");
358     }
359     trie->next('j');
360     trie->next('a');
361     trie->next('n');
362     // getNextBytes() directly after next()
363     count=trie->getNextBytes(sink.Reset());
364     buffer[count]=0;
365     if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
366         errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
367     }
368     // getNextBytes() after getValue()
369     trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
370     memset(buffer, 0, sizeof(buffer));
371     count=trie->getNextBytes(sink.Reset());
372     if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
373         errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
374     }
375     // getNextBytes() from a linear-match node
376     trie->next('u');
377     memset(buffer, 0, sizeof(buffer));
378     count=trie->getNextBytes(sink.Reset());
379     if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
380         errln("months getNextBytes()!=[a] after \"janu\"");
381     }
382     trie->next('a');
383     memset(buffer, 0, sizeof(buffer));
384     count=trie->getNextBytes(sink.Reset());
385     if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
386         errln("months getNextBytes()!=[r] after \"janua\"");
387     }
388     trie->next('r');
389     trie->next('y');
390     // getNextBytes() after a final match
391     count=trie->getNextBytes(sink.Reset());
392     if(count!=0 || sink.NumberOfBytesAppended()!=0) {
393         errln("months getNextBytes()!=[] after \"january\"");
394     }
395 }
396 
TestIteratorFromBranch()397 void BytesTrieTest::TestIteratorFromBranch() {
398     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
399     if(trie.isNull()) {
400         return;  // buildTrie() reported an error
401     }
402     // Go to a branch node.
403     trie->next('j');
404     trie->next('a');
405     trie->next('n');
406     IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
407     BytesTrie::Iterator iter(*trie, 0, errorCode);
408     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
409         return;
410     }
411     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
412     // following "jan".
413     static const StringAndValue data[]={
414         { "", 1 },
415         { ".", 1 },
416         { "a", 1 },
417         { "bb", 1 },
418         { "c", 1 },
419         { "ddd", 1 },
420         { "ee", 1 },
421         { "ef", 1 },
422         { "f", 1 },
423         { "gg", 1 },
424         { "h", 1 },
425         { "iiii", 1 },
426         { "j", 1 },
427         { "kk", 1 },
428         { "kl", 1 },
429         { "kmm", 1 },
430         { "l", 1 },
431         { "m", 1 },
432         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
433         { "o", 1 },
434         { "pp", 1 },
435         { "qqq", 1 },
436         { "r", 1 },
437         { "uar", 1 },
438         { "uary", 1 }
439     };
440     checkIterator(iter, data, UPRV_LENGTHOF(data));
441     // Reset, and we should get the same result.
442     logln("after iter.reset()");
443     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
444 }
445 
TestIteratorFromLinearMatch()446 void BytesTrieTest::TestIteratorFromLinearMatch() {
447     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
448     if(trie.isNull()) {
449         return;  // buildTrie() reported an error
450     }
451     // Go into a linear-match node.
452     trie->next('j');
453     trie->next('a');
454     trie->next('n');
455     trie->next('u');
456     trie->next('a');
457     IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
458     BytesTrie::Iterator iter(*trie, 0, errorCode);
459     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
460         return;
461     }
462     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
463     // following "janua".
464     static const StringAndValue data[]={
465         { "r", 1 },
466         { "ry", 1 }
467     };
468     checkIterator(iter, data, UPRV_LENGTHOF(data));
469     // Reset, and we should get the same result.
470     logln("after iter.reset()");
471     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
472 }
473 
TestTruncatingIteratorFromRoot()474 void BytesTrieTest::TestTruncatingIteratorFromRoot() {
475     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
476     if(trie.isNull()) {
477         return;  // buildTrie() reported an error
478     }
479     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
480     BytesTrie::Iterator iter(*trie, 4, errorCode);
481     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
482         return;
483     }
484     // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
485     // of each string, and no string duplicates from the truncation.
486     static const StringAndValue data[]={
487         { "augu", -1 },
488         { "jan", 1 },
489         { "jan.", 1 },
490         { "jana", 1 },
491         { "janb", -1 },
492         { "janc", 1 },
493         { "jand", -1 },
494         { "jane", -1 },
495         { "janf", 1 },
496         { "jang", -1 },
497         { "janh", 1 },
498         { "jani", -1 },
499         { "janj", 1 },
500         { "jank", -1 },
501         { "janl", 1 },
502         { "janm", 1 },
503         { "jann", -1 },
504         { "jano", 1 },
505         { "janp", -1 },
506         { "janq", -1 },
507         { "janr", 1 },
508         { "janu", -1 },
509         { "july", 7 },
510         { "jun", 6 },
511         { "jun.", 6 },
512         { "june", 6 }
513     };
514     checkIterator(iter, data, UPRV_LENGTHOF(data));
515     // Reset, and we should get the same result.
516     logln("after iter.reset()");
517     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
518 }
519 
TestTruncatingIteratorFromLinearMatchShort()520 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
521     static const StringAndValue data[]={
522         { "abcdef", 10 },
523         { "abcdepq", 200 },
524         { "abcdeyz", 3000 }
525     };
526     LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
527     if(trie.isNull()) {
528         return;  // buildTrie() reported an error
529     }
530     // Go into a linear-match node.
531     trie->next('a');
532     trie->next('b');
533     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
534     // Truncate within the linear-match node.
535     BytesTrie::Iterator iter(*trie, 2, errorCode);
536     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
537         return;
538     }
539     static const StringAndValue expected[]={
540         { "cd", -1 }
541     };
542     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
543     // Reset, and we should get the same result.
544     logln("after iter.reset()");
545     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
546 }
547 
TestTruncatingIteratorFromLinearMatchLong()548 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
549     static const StringAndValue data[]={
550         { "abcdef", 10 },
551         { "abcdepq", 200 },
552         { "abcdeyz", 3000 }
553     };
554     LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
555     if(trie.isNull()) {
556         return;  // buildTrie() reported an error
557     }
558     // Go into a linear-match node.
559     trie->next('a');
560     trie->next('b');
561     trie->next('c');
562     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
563     // Truncate after the linear-match node.
564     BytesTrie::Iterator iter(*trie, 3, errorCode);
565     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
566         return;
567     }
568     static const StringAndValue expected[]={
569         { "def", 10 },
570         { "dep", -1 },
571         { "dey", -1 }
572     };
573     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
574     // Reset, and we should get the same result.
575     logln("after iter.reset()");
576     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
577 }
578 
TestIteratorFromBytes()579 void BytesTrieTest::TestIteratorFromBytes() {
580     static const StringAndValue data[]={
581         { "mm", 3 },
582         { "mmm", 33 },
583         { "mmnop", 333 }
584     };
585     builder_->clear();
586     IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()");
587     for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
588         builder_->add(data[i].s, data[i].value, errorCode);
589     }
590     StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode);
591     BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
592     checkIterator(iter, data, UPRV_LENGTHOF(data));
593 }
594 
TestFailedIterator()595 void BytesTrieTest::TestFailedIterator() {
596     UErrorCode failure = U_ILLEGAL_ARGUMENT_ERROR;
597     BytesTrie::Iterator iter(NULL, 0, failure);
598     StringPiece sp = iter.getString();
599     if (!sp.empty()) {
600         errln("failed iterator returned garbage data");
601     }
602 }
603 
TestDelta()604 void BytesTrieTest::TestDelta() {
605     char intBytes0[5];
606     char intBytes1[5];
607     static constexpr int32_t sampleDeltas[] = {
608         -1, 0, 1, 2, 3, 0xa5, 0xbe, 0xbf,
609         -2, 0xc0, 0xc1, 0xeee, 0x1234, 0x2ffe, 0x2fff,
610         -3, 0x3000, 0x3001, 0x3003, 0x50005, 0xdfffe, 0xdffff,
611         -4, 0xe0000, 0xe0001, 0xef0123, 0xfffffe, 0xffffff,
612         -5, 0x1000000, 0x1000001, 0x7fffffff
613     };
614     int32_t expectedLength = 0;
615     for (int32_t delta : sampleDeltas) {
616         if (delta < 0) {
617             expectedLength = -delta;
618             continue;
619         }
620         // Encoding twice into differently-initialized arrays
621         // catches bytes that are not written to.
622         memset(intBytes0, 0, sizeof(intBytes0));
623         memset(intBytes1, 1, sizeof(intBytes1));
624         int32_t length0 = BytesTrieBuilder::internalEncodeDelta(delta, intBytes0);
625         int32_t length1 = BytesTrieBuilder::internalEncodeDelta(delta, intBytes1);
626         assertTrue(UnicodeString(u"non-zero length to encode delta ") + delta, length0 > 0);
627         assertEquals(UnicodeString(u"consistent length to encode delta ") + delta, length0, length1);
628         assertEquals(UnicodeString(u"expected length to encode delta ") + delta,
629                      expectedLength, length0);
630         for (int32_t i = 0; i < length0; ++i) {
631             uint8_t b0 = intBytes0[i];
632             uint8_t b1 = intBytes1[i];
633             assertEquals(UnicodeString(u"differently encoded delta ") + delta +
634                             u" at byte index " + i, b0, b1);
635         }
636         const uint8_t *start = (const uint8_t *)intBytes0;
637         const uint8_t *pos = BytesTrie::jumpByDelta(start);
638         assertEquals(UnicodeString(u"roundtrip for delta ") + delta,
639                      delta, (int32_t)(pos - start) - length0);
640     }
641 }
642 
checkData(const StringAndValue data[],int32_t dataLength)643 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
644     logln("checkData(dataLength=%d, fast)", (int)dataLength);
645     checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
646     logln("checkData(dataLength=%d, small)", (int)dataLength);
647     checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
648 }
649 
checkData(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)650 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
651     LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
652     if(trie.isNull()) {
653         return;  // buildTrie() reported an error
654     }
655     checkFirst(*trie, data, dataLength);
656     checkNext(*trie, data, dataLength);
657     checkNextWithState(*trie, data, dataLength);
658     checkNextWithState64(*trie, data, dataLength);
659     checkNextString(*trie, data, dataLength);
660     checkIterator(*trie, data, dataLength);
661 }
662 
buildTrie(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)663 BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
664                                     UStringTrieBuildOption buildOption) {
665     IcuTestErrorCode errorCode(*this, "buildTrie()");
666     // Add the items to the trie builder in an interesting (not trivial, not random) order.
667     int32_t index, step;
668     if(dataLength&1) {
669         // Odd number of items.
670         index=dataLength/2;
671         step=2;
672     } else if((dataLength%3)!=0) {
673         // Not a multiple of 3.
674         index=dataLength/5;
675         step=3;
676     } else {
677         index=dataLength-1;
678         step=-1;
679     }
680     builder_->clear();
681     for(int32_t i=0; i<dataLength; ++i) {
682         builder_->add(data[index].s, data[index].value, errorCode);
683         index=(index+step)%dataLength;
684     }
685     StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
686     LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
687     if(!errorCode.errIfFailureAndReset("add()/build()")) {
688         builder_->add("zzz", 999, errorCode);
689         if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
690             errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
691         }
692     }
693     logln("serialized trie size: %ld bytes\n", (long)sp.length());
694     StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
695     if(sp.data()==sp2.data()) {
696         errln("builder.buildStringPiece() before & after build() returned same array");
697     }
698     if(errorCode.isFailure()) {
699         return NULL;
700     }
701     // Tries from either build() method should be identical but
702     // BytesTrie does not implement equals().
703     // We just return either one.
704     if((dataLength&1)!=0) {
705         return trie.orphan();
706     } else {
707         return new BytesTrie(sp2.data());
708     }
709 }
710 
checkFirst(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)711 void BytesTrieTest::checkFirst(BytesTrie &trie,
712                                const StringAndValue data[], int32_t dataLength) {
713     for(int32_t i=0; i<dataLength; ++i) {
714         int c=*data[i].s;
715         if(c==0) {
716             continue;  // skip empty string
717         }
718         UStringTrieResult firstResult=trie.first(c);
719         int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
720         UStringTrieResult nextResult=trie.next(data[i].s[1]);
721         if(firstResult!=trie.reset().next(c) ||
722            firstResult!=trie.current() ||
723            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
724            nextResult!=trie.next(data[i].s[1])
725         ) {
726             errln("trie.first(%c)!=trie.reset().next(same) for %s",
727                   c, data[i].s);
728         }
729     }
730     trie.reset();
731 }
732 
checkNext(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)733 void BytesTrieTest::checkNext(BytesTrie &trie,
734                               const StringAndValue data[], int32_t dataLength) {
735     BytesTrie::State state;
736     for(int32_t i=0; i<dataLength; ++i) {
737         int32_t stringLength= (i&1) ? -1 : static_cast<int32_t>(strlen(data[i].s));
738         UStringTrieResult result;
739         if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
740             result!=trie.current()
741         ) {
742             errln("trie does not seem to contain %s", data[i].s);
743         } else if(trie.getValue()!=data[i].value) {
744             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
745                   data[i].s,
746                   (long)trie.getValue(), (long)trie.getValue(),
747                   (long)data[i].value, (long)data[i].value);
748         } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
749             errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
750         }
751         trie.reset();
752         stringLength = static_cast<int32_t>(strlen(data[i].s));
753         result=trie.current();
754         for(int32_t j=0; j<stringLength; ++j) {
755             if(!USTRINGTRIE_HAS_NEXT(result)) {
756                 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
757                 break;
758             }
759             if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
760                 trie.getValue();
761                 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
762                     errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
763                     break;
764                 }
765             }
766             result=trie.next(data[i].s[j]);
767             if(!USTRINGTRIE_MATCHES(result)) {
768                 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
769                 break;
770             }
771             if(result!=trie.current()) {
772                 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
773                 break;
774             }
775         }
776         if(!USTRINGTRIE_HAS_VALUE(result)) {
777             errln("trie.next()!=hasValue at the end of %s", data[i].s);
778             continue;
779         }
780         trie.getValue();
781         if(result!=trie.current()) {
782             errln("trie.current() != current()+getValue()+current() after end of %s",
783                   data[i].s);
784         }
785         // Compare the final current() with whether next() can actually continue.
786         trie.saveState(state);
787         UBool nextContinues=false;
788         // Try all graphic characters; we only use those in test strings in this file.
789 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
790         const int32_t minChar=0x20;
791         const int32_t maxChar=0x7e;
792 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
793         const int32_t minChar=0x40;
794         const int32_t maxChar=0xfe;
795 #else
796         const int32_t minChar=0;
797         const int32_t maxChar=0xff;
798 #endif
799         for(int32_t c=minChar; c<=maxChar; ++c) {
800             if(trie.resetToState(state).next(c)) {
801                 nextContinues=true;
802                 break;
803             }
804         }
805         if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
806             errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
807                   "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
808         }
809         trie.reset();
810     }
811 }
812 
checkNextWithState(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)813 void BytesTrieTest::checkNextWithState(BytesTrie &trie,
814                                        const StringAndValue data[], int32_t dataLength) {
815     BytesTrie::State noState, state;
816     for(int32_t i=0; i<dataLength; ++i) {
817         if((i&1)==0) {
818             // This should have no effect.
819             trie.resetToState(noState);
820         }
821         const char *expectedString=data[i].s;
822         int32_t stringLength= static_cast<int32_t>(strlen(expectedString));
823         int32_t partialLength = stringLength / 3;
824         for(int32_t j=0; j<partialLength; ++j) {
825             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
826                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
827                 return;
828             }
829         }
830         trie.saveState(state);
831         UStringTrieResult resultAtState=trie.current();
832         UStringTrieResult result;
833         int32_t valueAtState=-99;
834         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
835             valueAtState=trie.getValue();
836         }
837         result=trie.next(0);  // mismatch
838         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
839             errln("trie.next(0) matched after part of %s", data[i].s);
840         }
841         if( resultAtState!=trie.resetToState(state).current() ||
842             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
843         ) {
844             errln("trie.next(part of %s) changes current()/getValue() after "
845                   "saveState/next(0)/resetToState",
846                   data[i].s);
847         } else if(!USTRINGTRIE_HAS_VALUE(
848                       result=trie.next(expectedString+partialLength,
849                                        stringLength-partialLength)) ||
850                   result!=trie.current()) {
851             errln("trie.next(rest of %s) does not seem to contain %s after "
852                   "saveState/next(0)/resetToState",
853                   data[i].s, data[i].s);
854         } else if(!USTRINGTRIE_HAS_VALUE(
855                       result=trie.resetToState(state).
856                                   next(expectedString+partialLength,
857                                        stringLength-partialLength)) ||
858                   result!=trie.current()) {
859             errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
860                   data[i].s);
861         } else if(trie.getValue()!=data[i].value) {
862             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
863                   data[i].s,
864                   (long)trie.getValue(), (long)trie.getValue(),
865                   (long)data[i].value, (long)data[i].value);
866         }
867         trie.reset();
868     }
869 }
870 
checkNextWithState64(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)871 void BytesTrieTest::checkNextWithState64(BytesTrie &trie,
872                                          const StringAndValue data[], int32_t dataLength) {
873     assertTrue("trie(initial state).getState64()!=0", trie.getState64() != 0);
874     for(int32_t i=0; i<dataLength; ++i) {
875         const char *expectedString=data[i].s;
876         int32_t stringLength= static_cast<int32_t>(strlen(expectedString));
877         int32_t partialLength = stringLength / 3;
878         for(int32_t j=0; j<partialLength; ++j) {
879             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
880                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
881                 return;
882             }
883         }
884         uint64_t state = trie.getState64();
885         assertTrue("trie.getState64()!=0", state != 0);
886         UStringTrieResult resultAtState=trie.current();
887         UStringTrieResult result;
888         int32_t valueAtState=-99;
889         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
890             valueAtState=trie.getValue();
891         }
892         result=trie.next(0);  // mismatch
893         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
894             errln("trie.next(0) matched after part of %s", data[i].s);
895         }
896         if( resultAtState!=trie.resetToState64(state).current() ||
897             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
898         ) {
899             errln("trie.next(part of %s) changes current()/getValue() after "
900                   "getState64/next(0)/resetToState64",
901                   data[i].s);
902         } else if(!USTRINGTRIE_HAS_VALUE(
903                       result=trie.next(expectedString+partialLength,
904                                        stringLength-partialLength)) ||
905                   result!=trie.current()) {
906             errln("trie.next(rest of %s) does not seem to contain %s after "
907                   "getState64/next(0)/resetToState64",
908                   data[i].s, data[i].s);
909         } else if(!USTRINGTRIE_HAS_VALUE(
910                       result=trie.resetToState64(state).
911                                   next(expectedString+partialLength,
912                                        stringLength-partialLength)) ||
913                   result!=trie.current()) {
914             errln("trie does not seem to contain %s after getState64/next(rest)/resetToState64",
915                   data[i].s);
916         } else if(trie.getValue()!=data[i].value) {
917             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
918                   data[i].s,
919                   (long)trie.getValue(), (long)trie.getValue(),
920                   (long)data[i].value, (long)data[i].value);
921         }
922         trie.reset();
923     }
924 }
925 
926 // next(string) is also tested in other functions,
927 // but here we try to go partway through the string, and then beyond it.
checkNextString(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)928 void BytesTrieTest::checkNextString(BytesTrie &trie,
929                                     const StringAndValue data[], int32_t dataLength) {
930     for(int32_t i=0; i<dataLength; ++i) {
931         const char *expectedString=data[i].s;
932         int32_t stringLength = static_cast<int32_t>(strlen(expectedString));
933         if(!trie.next(expectedString, stringLength/2)) {
934             errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
935             continue;
936         }
937         // Test that we stop properly at the end of the string.
938         if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
939             errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
940         }
941         trie.reset();
942     }
943 }
944 
checkIterator(const BytesTrie & trie,const StringAndValue data[],int32_t dataLength)945 void BytesTrieTest::checkIterator(const BytesTrie &trie,
946                                   const StringAndValue data[], int32_t dataLength) {
947     IcuTestErrorCode errorCode(*this, "checkIterator()");
948     BytesTrie::Iterator iter(trie, 0, errorCode);
949     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
950         return;
951     }
952     checkIterator(iter, data, dataLength);
953 }
954 
checkIterator(BytesTrie::Iterator & iter,const StringAndValue data[],int32_t dataLength)955 void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter,
956                                   const StringAndValue data[], int32_t dataLength) {
957     IcuTestErrorCode errorCode(*this, "checkIterator()");
958     for(int32_t i=0; i<dataLength; ++i) {
959         if(!iter.hasNext()) {
960             errln("trie iterator hasNext()=false for item %d: %s", (int)i, data[i].s);
961             break;
962         }
963         UBool hasNext=iter.next(errorCode);
964         if(errorCode.errIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
965             break;
966         }
967         if(!hasNext) {
968             errln("trie iterator next()=false for item %d: %s", (int)i, data[i].s);
969             break;
970         }
971         if(iter.getString()!=StringPiece(data[i].s)) {
972             errln("trie iterator next().getString()=%s but expected %s for item %d",
973                   iter.getString().data(), data[i].s, (int)i);
974         }
975         if(iter.getValue()!=data[i].value) {
976             errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
977                   (long)iter.getValue(), (long)iter.getValue(),
978                   (long)data[i].value, (long)data[i].value,
979                   (int)i, data[i].s);
980         }
981     }
982     if(iter.hasNext()) {
983         errln("trie iterator hasNext()=true after all items");
984     }
985     UBool hasNext=iter.next(errorCode);
986     errorCode.errIfFailureAndReset("trie iterator next() after all items");
987     if(hasNext) {
988         errln("trie iterator next()=true after all items");
989     }
990 }
991