1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2010-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: bytetrietest.cpp
9 * encoding: UTF-8
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2010nov16
14 * created by: Markus W. Scherer
15 */
16
17 #include <string.h>
18
19 #include "unicode/utypes.h"
20 #include "unicode/bytestrie.h"
21 #include "unicode/bytestriebuilder.h"
22 #include "unicode/localpointer.h"
23 #include "unicode/stringpiece.h"
24 #include "intltest.h"
25 #include "cmemory.h"
26
27 struct StringAndValue {
28 const char *s;
29 int32_t value;
30 };
31
32 class BytesTrieTest : public IntlTest {
33 public:
34 BytesTrieTest();
35 virtual ~BytesTrieTest();
36
37 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
38 void TestBuilder();
39 void TestEmpty();
40 void Test_a();
41 void Test_a_ab();
42 void TestShortestBranch();
43 void TestBranches();
44 void TestLongSequence();
45 void TestLongBranch();
46 void TestValuesForState();
47 void TestCompact();
48
49 BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
50 void TestHasUniqueValue();
51 void TestGetNextBytes();
52 void TestIteratorFromBranch();
53 void TestIteratorFromLinearMatch();
54 void TestTruncatingIteratorFromRoot();
55 void TestTruncatingIteratorFromLinearMatchShort();
56 void TestTruncatingIteratorFromLinearMatchLong();
57 void TestIteratorFromBytes();
58 void TestFailedIterator();
59
60 void checkData(const StringAndValue data[], int32_t dataLength);
61 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
62 BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
63 UStringTrieBuildOption buildOption);
64 void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
65 void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
66 void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
67 void checkNextWithState64(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
68 void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
69 void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
70 void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
71
72 private:
73 BytesTrieBuilder *builder_;
74 };
75
createBytesTrieTest()76 extern IntlTest *createBytesTrieTest() {
77 return new BytesTrieTest();
78 }
79
BytesTrieTest()80 BytesTrieTest::BytesTrieTest() : builder_(NULL) {
81 IcuTestErrorCode errorCode(*this, "BytesTrieTest()");
82 builder_=new BytesTrieBuilder(errorCode);
83 }
84
~BytesTrieTest()85 BytesTrieTest::~BytesTrieTest() {
86 delete builder_;
87 }
88
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)89 void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
90 if(exec) {
91 logln("TestSuite BytesTrieTest: ");
92 }
93 TESTCASE_AUTO_BEGIN;
94 TESTCASE_AUTO(TestBuilder);
95 TESTCASE_AUTO(TestEmpty);
96 TESTCASE_AUTO(Test_a);
97 TESTCASE_AUTO(Test_a_ab);
98 TESTCASE_AUTO(TestShortestBranch);
99 TESTCASE_AUTO(TestBranches);
100 TESTCASE_AUTO(TestLongSequence);
101 TESTCASE_AUTO(TestLongBranch);
102 TESTCASE_AUTO(TestValuesForState);
103 TESTCASE_AUTO(TestCompact);
104 TESTCASE_AUTO(TestHasUniqueValue);
105 TESTCASE_AUTO(TestGetNextBytes);
106 TESTCASE_AUTO(TestIteratorFromBranch);
107 TESTCASE_AUTO(TestIteratorFromLinearMatch);
108 TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
109 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
110 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
111 TESTCASE_AUTO(TestIteratorFromBytes);
112 TESTCASE_AUTO(TestFailedIterator);
113 TESTCASE_AUTO_END;
114 }
115
TestBuilder()116 void BytesTrieTest::TestBuilder() {
117 IcuTestErrorCode errorCode(*this, "TestBuilder()");
118 builder_->clear();
119 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
120 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
121 errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
122 return;
123 }
124 // TODO: remove .build(...) once add() checks for duplicates.
125 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
126 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
127 errln("BytesTrieBuilder.add() did not detect duplicates");
128 return;
129 }
130 }
131
TestEmpty()132 void BytesTrieTest::TestEmpty() {
133 static const StringAndValue data[]={
134 { "", 0 }
135 };
136 checkData(data, UPRV_LENGTHOF(data));
137 }
138
Test_a()139 void BytesTrieTest::Test_a() {
140 static const StringAndValue data[]={
141 { "a", 1 }
142 };
143 checkData(data, UPRV_LENGTHOF(data));
144 }
145
Test_a_ab()146 void BytesTrieTest::Test_a_ab() {
147 static const StringAndValue data[]={
148 { "a", 1 },
149 { "ab", 100 }
150 };
151 checkData(data, UPRV_LENGTHOF(data));
152 }
153
TestShortestBranch()154 void BytesTrieTest::TestShortestBranch() {
155 static const StringAndValue data[]={
156 { "a", 1000 },
157 { "b", 2000 }
158 };
159 checkData(data, UPRV_LENGTHOF(data));
160 }
161
TestBranches()162 void BytesTrieTest::TestBranches() {
163 static const StringAndValue data[]={
164 { "a", 0x10 },
165 { "cc", 0x40 },
166 { "e", 0x100 },
167 { "ggg", 0x400 },
168 { "i", 0x1000 },
169 { "kkkk", 0x4000 },
170 { "n", 0x10000 },
171 { "ppppp", 0x40000 },
172 { "r", 0x100000 },
173 { "sss", 0x200000 },
174 { "t", 0x400000 },
175 { "uu", 0x800000 },
176 { "vv", 0x7fffffff },
177 { "zz", (int32_t)0x80000000 }
178 };
179 for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
180 logln("TestBranches length=%d", (int)length);
181 checkData(data, length);
182 }
183 }
184
TestLongSequence()185 void BytesTrieTest::TestLongSequence() {
186 static const StringAndValue data[]={
187 { "a", -1 },
188 // sequence of linear-match nodes
189 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
190 // more than 256 bytes
191 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
192 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
193 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
194 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
195 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
196 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
197 };
198 checkData(data, UPRV_LENGTHOF(data));
199 }
200
TestLongBranch()201 void BytesTrieTest::TestLongBranch() {
202 // Split-branch and interesting compact-integer values.
203 static const StringAndValue data[]={
204 { "a", -2 },
205 { "b", -1 },
206 { "c", 0 },
207 { "d2", 1 },
208 { "f", 0x3f },
209 { "g", 0x40 },
210 { "h", 0x41 },
211 { "j23", 0x1900 },
212 { "j24", 0x19ff },
213 { "j25", 0x1a00 },
214 { "k2", 0x1a80 },
215 { "k3", 0x1aff },
216 { "l234567890", 0x1b00 },
217 { "l234567890123", 0x1b01 },
218 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
219 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
220 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
221 { "r", 0x333333 },
222 { "s2345", 0x4444444 },
223 { "t234567890", 0x77777777 },
224 { "z", (int32_t)0x80000001 }
225 };
226 checkData(data, UPRV_LENGTHOF(data));
227 }
228
TestValuesForState()229 void BytesTrieTest::TestValuesForState() {
230 // Check that saveState() and resetToState() interact properly
231 // with next() and current().
232 static const StringAndValue data[]={
233 { "a", -1 },
234 { "ab", -2 },
235 { "abc", -3 },
236 { "abcd", -4 },
237 { "abcde", -5 },
238 { "abcdef", -6 }
239 };
240 checkData(data, UPRV_LENGTHOF(data));
241 }
242
TestCompact()243 void BytesTrieTest::TestCompact() {
244 // Duplicate trailing strings and values provide opportunities for compacting.
245 static const StringAndValue data[]={
246 { "+", 0 },
247 { "+august", 8 },
248 { "+december", 12 },
249 { "+july", 7 },
250 { "+june", 6 },
251 { "+november", 11 },
252 { "+october", 10 },
253 { "+september", 9 },
254 { "-", 0 },
255 { "-august", 8 },
256 { "-december", 12 },
257 { "-july", 7 },
258 { "-june", 6 },
259 { "-november", 11 },
260 { "-october", 10 },
261 { "-september", 9 },
262 // The l+n branch (with its sub-nodes) is a duplicate but will be written
263 // both times because each time it follows a different linear-match node.
264 { "xjuly", 7 },
265 { "xjune", 6 }
266 };
267 checkData(data, UPRV_LENGTHOF(data));
268 }
269
buildMonthsTrie(UStringTrieBuildOption buildOption)270 BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
271 // All types of nodes leading to the same value,
272 // for code coverage of recursive functions.
273 // In particular, we need a lot of branches on some single level
274 // to exercise a split-branch node.
275 static const StringAndValue data[]={
276 { "august", 8 },
277 { "jan", 1 },
278 { "jan.", 1 },
279 { "jana", 1 },
280 { "janbb", 1 },
281 { "janc", 1 },
282 { "janddd", 1 },
283 { "janee", 1 },
284 { "janef", 1 },
285 { "janf", 1 },
286 { "jangg", 1 },
287 { "janh", 1 },
288 { "janiiii", 1 },
289 { "janj", 1 },
290 { "jankk", 1 },
291 { "jankl", 1 },
292 { "jankmm", 1 },
293 { "janl", 1 },
294 { "janm", 1 },
295 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
296 { "jano", 1 },
297 { "janpp", 1 },
298 { "janqqq", 1 },
299 { "janr", 1 },
300 { "januar", 1 },
301 { "january", 1 },
302 { "july", 7 },
303 { "jun", 6 },
304 { "jun.", 6 },
305 { "june", 6 }
306 };
307 return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
308 }
309
TestHasUniqueValue()310 void BytesTrieTest::TestHasUniqueValue() {
311 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
312 if(trie.isNull()) {
313 return; // buildTrie() reported an error
314 }
315 int32_t uniqueValue;
316 if(trie->hasUniqueValue(uniqueValue)) {
317 errln("unique value at root");
318 }
319 trie->next('j');
320 trie->next('a');
321 trie->next('n');
322 // hasUniqueValue() directly after next()
323 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
324 errln("not unique value 1 after \"jan\"");
325 }
326 trie->first('j');
327 trie->next('u');
328 if(trie->hasUniqueValue(uniqueValue)) {
329 errln("unique value after \"ju\"");
330 }
331 if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
332 errln("not normal value 6 after \"jun\"");
333 }
334 // hasUniqueValue() after getValue()
335 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
336 errln("not unique value 6 after \"jun\"");
337 }
338 // hasUniqueValue() from within a linear-match node
339 trie->first('a');
340 trie->next('u');
341 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
342 errln("not unique value 8 after \"au\"");
343 }
344 }
345
TestGetNextBytes()346 void BytesTrieTest::TestGetNextBytes() {
347 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
348 if(trie.isNull()) {
349 return; // buildTrie() reported an error
350 }
351 char buffer[40];
352 CheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer));
353 int32_t count=trie->getNextBytes(sink);
354 if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
355 errln("months getNextBytes()!=[aj] at root");
356 }
357 trie->next('j');
358 trie->next('a');
359 trie->next('n');
360 // getNextBytes() directly after next()
361 count=trie->getNextBytes(sink.Reset());
362 buffer[count]=0;
363 if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
364 errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
365 }
366 // getNextBytes() after getValue()
367 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
368 memset(buffer, 0, sizeof(buffer));
369 count=trie->getNextBytes(sink.Reset());
370 if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
371 errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
372 }
373 // getNextBytes() from a linear-match node
374 trie->next('u');
375 memset(buffer, 0, sizeof(buffer));
376 count=trie->getNextBytes(sink.Reset());
377 if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
378 errln("months getNextBytes()!=[a] after \"janu\"");
379 }
380 trie->next('a');
381 memset(buffer, 0, sizeof(buffer));
382 count=trie->getNextBytes(sink.Reset());
383 if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
384 errln("months getNextBytes()!=[r] after \"janua\"");
385 }
386 trie->next('r');
387 trie->next('y');
388 // getNextBytes() after a final match
389 count=trie->getNextBytes(sink.Reset());
390 if(count!=0 || sink.NumberOfBytesAppended()!=0) {
391 errln("months getNextBytes()!=[] after \"january\"");
392 }
393 }
394
TestIteratorFromBranch()395 void BytesTrieTest::TestIteratorFromBranch() {
396 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
397 if(trie.isNull()) {
398 return; // buildTrie() reported an error
399 }
400 // Go to a branch node.
401 trie->next('j');
402 trie->next('a');
403 trie->next('n');
404 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
405 BytesTrie::Iterator iter(*trie, 0, errorCode);
406 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
407 return;
408 }
409 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
410 // following "jan".
411 static const StringAndValue data[]={
412 { "", 1 },
413 { ".", 1 },
414 { "a", 1 },
415 { "bb", 1 },
416 { "c", 1 },
417 { "ddd", 1 },
418 { "ee", 1 },
419 { "ef", 1 },
420 { "f", 1 },
421 { "gg", 1 },
422 { "h", 1 },
423 { "iiii", 1 },
424 { "j", 1 },
425 { "kk", 1 },
426 { "kl", 1 },
427 { "kmm", 1 },
428 { "l", 1 },
429 { "m", 1 },
430 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
431 { "o", 1 },
432 { "pp", 1 },
433 { "qqq", 1 },
434 { "r", 1 },
435 { "uar", 1 },
436 { "uary", 1 }
437 };
438 checkIterator(iter, data, UPRV_LENGTHOF(data));
439 // Reset, and we should get the same result.
440 logln("after iter.reset()");
441 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
442 }
443
TestIteratorFromLinearMatch()444 void BytesTrieTest::TestIteratorFromLinearMatch() {
445 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
446 if(trie.isNull()) {
447 return; // buildTrie() reported an error
448 }
449 // Go into a linear-match node.
450 trie->next('j');
451 trie->next('a');
452 trie->next('n');
453 trie->next('u');
454 trie->next('a');
455 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
456 BytesTrie::Iterator iter(*trie, 0, errorCode);
457 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
458 return;
459 }
460 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
461 // following "janua".
462 static const StringAndValue data[]={
463 { "r", 1 },
464 { "ry", 1 }
465 };
466 checkIterator(iter, data, UPRV_LENGTHOF(data));
467 // Reset, and we should get the same result.
468 logln("after iter.reset()");
469 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
470 }
471
TestTruncatingIteratorFromRoot()472 void BytesTrieTest::TestTruncatingIteratorFromRoot() {
473 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
474 if(trie.isNull()) {
475 return; // buildTrie() reported an error
476 }
477 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
478 BytesTrie::Iterator iter(*trie, 4, errorCode);
479 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
480 return;
481 }
482 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
483 // of each string, and no string duplicates from the truncation.
484 static const StringAndValue data[]={
485 { "augu", -1 },
486 { "jan", 1 },
487 { "jan.", 1 },
488 { "jana", 1 },
489 { "janb", -1 },
490 { "janc", 1 },
491 { "jand", -1 },
492 { "jane", -1 },
493 { "janf", 1 },
494 { "jang", -1 },
495 { "janh", 1 },
496 { "jani", -1 },
497 { "janj", 1 },
498 { "jank", -1 },
499 { "janl", 1 },
500 { "janm", 1 },
501 { "jann", -1 },
502 { "jano", 1 },
503 { "janp", -1 },
504 { "janq", -1 },
505 { "janr", 1 },
506 { "janu", -1 },
507 { "july", 7 },
508 { "jun", 6 },
509 { "jun.", 6 },
510 { "june", 6 }
511 };
512 checkIterator(iter, data, UPRV_LENGTHOF(data));
513 // Reset, and we should get the same result.
514 logln("after iter.reset()");
515 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
516 }
517
TestTruncatingIteratorFromLinearMatchShort()518 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
519 static const StringAndValue data[]={
520 { "abcdef", 10 },
521 { "abcdepq", 200 },
522 { "abcdeyz", 3000 }
523 };
524 LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
525 if(trie.isNull()) {
526 return; // buildTrie() reported an error
527 }
528 // Go into a linear-match node.
529 trie->next('a');
530 trie->next('b');
531 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
532 // Truncate within the linear-match node.
533 BytesTrie::Iterator iter(*trie, 2, errorCode);
534 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
535 return;
536 }
537 static const StringAndValue expected[]={
538 { "cd", -1 }
539 };
540 checkIterator(iter, expected, UPRV_LENGTHOF(expected));
541 // Reset, and we should get the same result.
542 logln("after iter.reset()");
543 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
544 }
545
TestTruncatingIteratorFromLinearMatchLong()546 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
547 static const StringAndValue data[]={
548 { "abcdef", 10 },
549 { "abcdepq", 200 },
550 { "abcdeyz", 3000 }
551 };
552 LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
553 if(trie.isNull()) {
554 return; // buildTrie() reported an error
555 }
556 // Go into a linear-match node.
557 trie->next('a');
558 trie->next('b');
559 trie->next('c');
560 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
561 // Truncate after the linear-match node.
562 BytesTrie::Iterator iter(*trie, 3, errorCode);
563 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
564 return;
565 }
566 static const StringAndValue expected[]={
567 { "def", 10 },
568 { "dep", -1 },
569 { "dey", -1 }
570 };
571 checkIterator(iter, expected, UPRV_LENGTHOF(expected));
572 // Reset, and we should get the same result.
573 logln("after iter.reset()");
574 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
575 }
576
TestIteratorFromBytes()577 void BytesTrieTest::TestIteratorFromBytes() {
578 static const StringAndValue data[]={
579 { "mm", 3 },
580 { "mmm", 33 },
581 { "mmnop", 333 }
582 };
583 builder_->clear();
584 IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()");
585 for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
586 builder_->add(data[i].s, data[i].value, errorCode);
587 }
588 StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode);
589 BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
590 checkIterator(iter, data, UPRV_LENGTHOF(data));
591 }
592
TestFailedIterator()593 void BytesTrieTest::TestFailedIterator() {
594 UErrorCode failure = U_ILLEGAL_ARGUMENT_ERROR;
595 BytesTrie::Iterator iter(NULL, 0, failure);
596 StringPiece sp = iter.getString();
597 if (!sp.empty()) {
598 errln("failed iterator returned garbage data");
599 }
600 }
601
checkData(const StringAndValue data[],int32_t dataLength)602 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
603 logln("checkData(dataLength=%d, fast)", (int)dataLength);
604 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
605 logln("checkData(dataLength=%d, small)", (int)dataLength);
606 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
607 }
608
checkData(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)609 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
610 LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
611 if(trie.isNull()) {
612 return; // buildTrie() reported an error
613 }
614 checkFirst(*trie, data, dataLength);
615 checkNext(*trie, data, dataLength);
616 checkNextWithState(*trie, data, dataLength);
617 checkNextWithState64(*trie, data, dataLength);
618 checkNextString(*trie, data, dataLength);
619 checkIterator(*trie, data, dataLength);
620 }
621
buildTrie(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)622 BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
623 UStringTrieBuildOption buildOption) {
624 IcuTestErrorCode errorCode(*this, "buildTrie()");
625 // Add the items to the trie builder in an interesting (not trivial, not random) order.
626 int32_t index, step;
627 if(dataLength&1) {
628 // Odd number of items.
629 index=dataLength/2;
630 step=2;
631 } else if((dataLength%3)!=0) {
632 // Not a multiple of 3.
633 index=dataLength/5;
634 step=3;
635 } else {
636 index=dataLength-1;
637 step=-1;
638 }
639 builder_->clear();
640 for(int32_t i=0; i<dataLength; ++i) {
641 builder_->add(data[index].s, data[index].value, errorCode);
642 index=(index+step)%dataLength;
643 }
644 StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
645 LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
646 if(!errorCode.errIfFailureAndReset("add()/build()")) {
647 builder_->add("zzz", 999, errorCode);
648 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
649 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
650 }
651 }
652 logln("serialized trie size: %ld bytes\n", (long)sp.length());
653 StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
654 if(sp.data()==sp2.data()) {
655 errln("builder.buildStringPiece() before & after build() returned same array");
656 }
657 if(errorCode.isFailure()) {
658 return NULL;
659 }
660 // Tries from either build() method should be identical but
661 // BytesTrie does not implement equals().
662 // We just return either one.
663 if((dataLength&1)!=0) {
664 return trie.orphan();
665 } else {
666 return new BytesTrie(sp2.data());
667 }
668 }
669
checkFirst(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)670 void BytesTrieTest::checkFirst(BytesTrie &trie,
671 const StringAndValue data[], int32_t dataLength) {
672 for(int32_t i=0; i<dataLength; ++i) {
673 int c=*data[i].s;
674 if(c==0) {
675 continue; // skip empty string
676 }
677 UStringTrieResult firstResult=trie.first(c);
678 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
679 UStringTrieResult nextResult=trie.next(data[i].s[1]);
680 if(firstResult!=trie.reset().next(c) ||
681 firstResult!=trie.current() ||
682 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
683 nextResult!=trie.next(data[i].s[1])
684 ) {
685 errln("trie.first(%c)!=trie.reset().next(same) for %s",
686 c, data[i].s);
687 }
688 }
689 trie.reset();
690 }
691
checkNext(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)692 void BytesTrieTest::checkNext(BytesTrie &trie,
693 const StringAndValue data[], int32_t dataLength) {
694 BytesTrie::State state;
695 for(int32_t i=0; i<dataLength; ++i) {
696 int32_t stringLength= (i&1) ? -1 : static_cast<int32_t>(strlen(data[i].s));
697 UStringTrieResult result;
698 if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
699 result!=trie.current()
700 ) {
701 errln("trie does not seem to contain %s", data[i].s);
702 } else if(trie.getValue()!=data[i].value) {
703 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
704 data[i].s,
705 (long)trie.getValue(), (long)trie.getValue(),
706 (long)data[i].value, (long)data[i].value);
707 } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
708 errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
709 }
710 trie.reset();
711 stringLength = static_cast<int32_t>(strlen(data[i].s));
712 result=trie.current();
713 for(int32_t j=0; j<stringLength; ++j) {
714 if(!USTRINGTRIE_HAS_NEXT(result)) {
715 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
716 break;
717 }
718 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
719 trie.getValue();
720 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
721 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
722 break;
723 }
724 }
725 result=trie.next(data[i].s[j]);
726 if(!USTRINGTRIE_MATCHES(result)) {
727 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
728 break;
729 }
730 if(result!=trie.current()) {
731 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
732 break;
733 }
734 }
735 if(!USTRINGTRIE_HAS_VALUE(result)) {
736 errln("trie.next()!=hasValue at the end of %s", data[i].s);
737 continue;
738 }
739 trie.getValue();
740 if(result!=trie.current()) {
741 errln("trie.current() != current()+getValue()+current() after end of %s",
742 data[i].s);
743 }
744 // Compare the final current() with whether next() can actually continue.
745 trie.saveState(state);
746 UBool nextContinues=FALSE;
747 // Try all graphic characters; we only use those in test strings in this file.
748 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
749 const int32_t minChar=0x20;
750 const int32_t maxChar=0x7e;
751 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
752 const int32_t minChar=0x40;
753 const int32_t maxChar=0xfe;
754 #else
755 const int32_t minChar=0;
756 const int32_t maxChar=0xff;
757 #endif
758 for(int32_t c=minChar; c<=maxChar; ++c) {
759 if(trie.resetToState(state).next(c)) {
760 nextContinues=TRUE;
761 break;
762 }
763 }
764 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
765 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
766 "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
767 }
768 trie.reset();
769 }
770 }
771
checkNextWithState(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)772 void BytesTrieTest::checkNextWithState(BytesTrie &trie,
773 const StringAndValue data[], int32_t dataLength) {
774 BytesTrie::State noState, state;
775 for(int32_t i=0; i<dataLength; ++i) {
776 if((i&1)==0) {
777 // This should have no effect.
778 trie.resetToState(noState);
779 }
780 const char *expectedString=data[i].s;
781 int32_t stringLength= static_cast<int32_t>(strlen(expectedString));
782 int32_t partialLength = stringLength / 3;
783 for(int32_t j=0; j<partialLength; ++j) {
784 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
785 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
786 return;
787 }
788 }
789 trie.saveState(state);
790 UStringTrieResult resultAtState=trie.current();
791 UStringTrieResult result;
792 int32_t valueAtState=-99;
793 if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
794 valueAtState=trie.getValue();
795 }
796 result=trie.next(0); // mismatch
797 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
798 errln("trie.next(0) matched after part of %s", data[i].s);
799 }
800 if( resultAtState!=trie.resetToState(state).current() ||
801 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
802 ) {
803 errln("trie.next(part of %s) changes current()/getValue() after "
804 "saveState/next(0)/resetToState",
805 data[i].s);
806 } else if(!USTRINGTRIE_HAS_VALUE(
807 result=trie.next(expectedString+partialLength,
808 stringLength-partialLength)) ||
809 result!=trie.current()) {
810 errln("trie.next(rest of %s) does not seem to contain %s after "
811 "saveState/next(0)/resetToState",
812 data[i].s, data[i].s);
813 } else if(!USTRINGTRIE_HAS_VALUE(
814 result=trie.resetToState(state).
815 next(expectedString+partialLength,
816 stringLength-partialLength)) ||
817 result!=trie.current()) {
818 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
819 data[i].s);
820 } else if(trie.getValue()!=data[i].value) {
821 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
822 data[i].s,
823 (long)trie.getValue(), (long)trie.getValue(),
824 (long)data[i].value, (long)data[i].value);
825 }
826 trie.reset();
827 }
828 }
829
checkNextWithState64(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)830 void BytesTrieTest::checkNextWithState64(BytesTrie &trie,
831 const StringAndValue data[], int32_t dataLength) {
832 assertTrue("trie(initial state).getState64()!=0", trie.getState64() != 0);
833 for(int32_t i=0; i<dataLength; ++i) {
834 const char *expectedString=data[i].s;
835 int32_t stringLength= static_cast<int32_t>(strlen(expectedString));
836 int32_t partialLength = stringLength / 3;
837 for(int32_t j=0; j<partialLength; ++j) {
838 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
839 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
840 return;
841 }
842 }
843 uint64_t state = trie.getState64();
844 assertTrue("trie.getState64()!=0", state != 0);
845 UStringTrieResult resultAtState=trie.current();
846 UStringTrieResult result;
847 int32_t valueAtState=-99;
848 if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
849 valueAtState=trie.getValue();
850 }
851 result=trie.next(0); // mismatch
852 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
853 errln("trie.next(0) matched after part of %s", data[i].s);
854 }
855 if( resultAtState!=trie.resetToState64(state).current() ||
856 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
857 ) {
858 errln("trie.next(part of %s) changes current()/getValue() after "
859 "getState64/next(0)/resetToState64",
860 data[i].s);
861 } else if(!USTRINGTRIE_HAS_VALUE(
862 result=trie.next(expectedString+partialLength,
863 stringLength-partialLength)) ||
864 result!=trie.current()) {
865 errln("trie.next(rest of %s) does not seem to contain %s after "
866 "getState64/next(0)/resetToState64",
867 data[i].s, data[i].s);
868 } else if(!USTRINGTRIE_HAS_VALUE(
869 result=trie.resetToState64(state).
870 next(expectedString+partialLength,
871 stringLength-partialLength)) ||
872 result!=trie.current()) {
873 errln("trie does not seem to contain %s after getState64/next(rest)/resetToState64",
874 data[i].s);
875 } else if(trie.getValue()!=data[i].value) {
876 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
877 data[i].s,
878 (long)trie.getValue(), (long)trie.getValue(),
879 (long)data[i].value, (long)data[i].value);
880 }
881 trie.reset();
882 }
883 }
884
885 // next(string) is also tested in other functions,
886 // but here we try to go partway through the string, and then beyond it.
checkNextString(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)887 void BytesTrieTest::checkNextString(BytesTrie &trie,
888 const StringAndValue data[], int32_t dataLength) {
889 for(int32_t i=0; i<dataLength; ++i) {
890 const char *expectedString=data[i].s;
891 int32_t stringLength = static_cast<int32_t>(strlen(expectedString));
892 if(!trie.next(expectedString, stringLength/2)) {
893 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
894 continue;
895 }
896 // Test that we stop properly at the end of the string.
897 if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
898 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
899 }
900 trie.reset();
901 }
902 }
903
checkIterator(const BytesTrie & trie,const StringAndValue data[],int32_t dataLength)904 void BytesTrieTest::checkIterator(const BytesTrie &trie,
905 const StringAndValue data[], int32_t dataLength) {
906 IcuTestErrorCode errorCode(*this, "checkIterator()");
907 BytesTrie::Iterator iter(trie, 0, errorCode);
908 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
909 return;
910 }
911 checkIterator(iter, data, dataLength);
912 }
913
checkIterator(BytesTrie::Iterator & iter,const StringAndValue data[],int32_t dataLength)914 void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter,
915 const StringAndValue data[], int32_t dataLength) {
916 IcuTestErrorCode errorCode(*this, "checkIterator()");
917 for(int32_t i=0; i<dataLength; ++i) {
918 if(!iter.hasNext()) {
919 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
920 break;
921 }
922 UBool hasNext=iter.next(errorCode);
923 if(errorCode.errIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
924 break;
925 }
926 if(!hasNext) {
927 errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
928 break;
929 }
930 if(iter.getString()!=StringPiece(data[i].s)) {
931 errln("trie iterator next().getString()=%s but expected %s for item %d",
932 iter.getString().data(), data[i].s, (int)i);
933 }
934 if(iter.getValue()!=data[i].value) {
935 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
936 (long)iter.getValue(), (long)iter.getValue(),
937 (long)data[i].value, (long)data[i].value,
938 (int)i, data[i].s);
939 }
940 }
941 if(iter.hasNext()) {
942 errln("trie iterator hasNext()=TRUE after all items");
943 }
944 UBool hasNext=iter.next(errorCode);
945 errorCode.errIfFailureAndReset("trie iterator next() after all items");
946 if(hasNext) {
947 errln("trie iterator next()=TRUE after all items");
948 }
949 }
950