1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2010-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: bytetrietest.cpp
9 * encoding: UTF-8
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2010nov16
14 * created by: Markus W. Scherer
15 */
16
17 #include <string.h>
18
19 #include "unicode/utypes.h"
20 #include "unicode/bytestrie.h"
21 #include "unicode/bytestriebuilder.h"
22 #include "unicode/localpointer.h"
23 #include "unicode/stringpiece.h"
24 #include "intltest.h"
25 #include "cmemory.h"
26
27 struct StringAndValue {
28 const char *s;
29 int32_t value;
30 };
31
32 class BytesTrieTest : public IntlTest {
33 public:
34 BytesTrieTest();
35 virtual ~BytesTrieTest();
36
37 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL) override;
38 void TestBuilder();
39 void TestEmpty();
40 void Test_a();
41 void Test_a_ab();
42 void TestShortestBranch();
43 void TestBranches();
44 void TestLongSequence();
45 void TestLongBranch();
46 void TestValuesForState();
47 void TestCompact();
48
49 BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
50 void TestHasUniqueValue();
51 void TestGetNextBytes();
52 void TestIteratorFromBranch();
53 void TestIteratorFromLinearMatch();
54 void TestTruncatingIteratorFromRoot();
55 void TestTruncatingIteratorFromLinearMatchShort();
56 void TestTruncatingIteratorFromLinearMatchLong();
57 void TestIteratorFromBytes();
58 void TestFailedIterator();
59 void TestDelta();
60
61 void checkData(const StringAndValue data[], int32_t dataLength);
62 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
63 BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
64 UStringTrieBuildOption buildOption);
65 void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
66 void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
67 void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
68 void checkNextWithState64(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
69 void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
70 void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
71 void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
72
73 private:
74 BytesTrieBuilder *builder_;
75 };
76
createBytesTrieTest()77 extern IntlTest *createBytesTrieTest() {
78 return new BytesTrieTest();
79 }
80
BytesTrieTest()81 BytesTrieTest::BytesTrieTest() : builder_(NULL) {
82 IcuTestErrorCode errorCode(*this, "BytesTrieTest()");
83 builder_=new BytesTrieBuilder(errorCode);
84 }
85
~BytesTrieTest()86 BytesTrieTest::~BytesTrieTest() {
87 delete builder_;
88 }
89
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)90 void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
91 if(exec) {
92 logln("TestSuite BytesTrieTest: ");
93 }
94 TESTCASE_AUTO_BEGIN;
95 TESTCASE_AUTO(TestBuilder);
96 TESTCASE_AUTO(TestEmpty);
97 TESTCASE_AUTO(Test_a);
98 TESTCASE_AUTO(Test_a_ab);
99 TESTCASE_AUTO(TestShortestBranch);
100 TESTCASE_AUTO(TestBranches);
101 TESTCASE_AUTO(TestLongSequence);
102 TESTCASE_AUTO(TestLongBranch);
103 TESTCASE_AUTO(TestValuesForState);
104 TESTCASE_AUTO(TestCompact);
105 TESTCASE_AUTO(TestHasUniqueValue);
106 TESTCASE_AUTO(TestGetNextBytes);
107 TESTCASE_AUTO(TestIteratorFromBranch);
108 TESTCASE_AUTO(TestIteratorFromLinearMatch);
109 TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
110 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
111 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
112 TESTCASE_AUTO(TestIteratorFromBytes);
113 TESTCASE_AUTO(TestFailedIterator);
114 TESTCASE_AUTO(TestDelta);
115 TESTCASE_AUTO_END;
116 }
117
TestBuilder()118 void BytesTrieTest::TestBuilder() {
119 IcuTestErrorCode errorCode(*this, "TestBuilder()");
120 builder_->clear();
121 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
122 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
123 errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
124 return;
125 }
126 // TODO: remove .build(...) once add() checks for duplicates.
127 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
128 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
129 errln("BytesTrieBuilder.add() did not detect duplicates");
130 return;
131 }
132 }
133
TestEmpty()134 void BytesTrieTest::TestEmpty() {
135 static const StringAndValue data[]={
136 { "", 0 }
137 };
138 checkData(data, UPRV_LENGTHOF(data));
139 }
140
Test_a()141 void BytesTrieTest::Test_a() {
142 static const StringAndValue data[]={
143 { "a", 1 }
144 };
145 checkData(data, UPRV_LENGTHOF(data));
146 }
147
Test_a_ab()148 void BytesTrieTest::Test_a_ab() {
149 static const StringAndValue data[]={
150 { "a", 1 },
151 { "ab", 100 }
152 };
153 checkData(data, UPRV_LENGTHOF(data));
154 }
155
TestShortestBranch()156 void BytesTrieTest::TestShortestBranch() {
157 static const StringAndValue data[]={
158 { "a", 1000 },
159 { "b", 2000 }
160 };
161 checkData(data, UPRV_LENGTHOF(data));
162 }
163
TestBranches()164 void BytesTrieTest::TestBranches() {
165 static const StringAndValue data[]={
166 { "a", 0x10 },
167 { "cc", 0x40 },
168 { "e", 0x100 },
169 { "ggg", 0x400 },
170 { "i", 0x1000 },
171 { "kkkk", 0x4000 },
172 { "n", 0x10000 },
173 { "ppppp", 0x40000 },
174 { "r", 0x100000 },
175 { "sss", 0x200000 },
176 { "t", 0x400000 },
177 { "uu", 0x800000 },
178 { "vv", 0x7fffffff },
179 { "zz", (int32_t)0x80000000 }
180 };
181 for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
182 logln("TestBranches length=%d", (int)length);
183 checkData(data, length);
184 }
185 }
186
TestLongSequence()187 void BytesTrieTest::TestLongSequence() {
188 static const StringAndValue data[]={
189 { "a", -1 },
190 // sequence of linear-match nodes
191 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
192 // more than 256 bytes
193 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
194 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
195 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
196 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
197 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
198 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
199 };
200 checkData(data, UPRV_LENGTHOF(data));
201 }
202
TestLongBranch()203 void BytesTrieTest::TestLongBranch() {
204 // Split-branch and interesting compact-integer values.
205 static const StringAndValue data[]={
206 { "a", -2 },
207 { "b", -1 },
208 { "c", 0 },
209 { "d2", 1 },
210 { "f", 0x3f },
211 { "g", 0x40 },
212 { "h", 0x41 },
213 { "j23", 0x1900 },
214 { "j24", 0x19ff },
215 { "j25", 0x1a00 },
216 { "k2", 0x1a80 },
217 { "k3", 0x1aff },
218 { "l234567890", 0x1b00 },
219 { "l234567890123", 0x1b01 },
220 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
221 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
222 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
223 { "r", 0x333333 },
224 { "s2345", 0x4444444 },
225 { "t234567890", 0x77777777 },
226 { "z", (int32_t)0x80000001 }
227 };
228 checkData(data, UPRV_LENGTHOF(data));
229 }
230
TestValuesForState()231 void BytesTrieTest::TestValuesForState() {
232 // Check that saveState() and resetToState() interact properly
233 // with next() and current().
234 static const StringAndValue data[]={
235 { "a", -1 },
236 { "ab", -2 },
237 { "abc", -3 },
238 { "abcd", -4 },
239 { "abcde", -5 },
240 { "abcdef", -6 }
241 };
242 checkData(data, UPRV_LENGTHOF(data));
243 }
244
TestCompact()245 void BytesTrieTest::TestCompact() {
246 // Duplicate trailing strings and values provide opportunities for compacting.
247 static const StringAndValue data[]={
248 { "+", 0 },
249 { "+august", 8 },
250 { "+december", 12 },
251 { "+july", 7 },
252 { "+june", 6 },
253 { "+november", 11 },
254 { "+october", 10 },
255 { "+september", 9 },
256 { "-", 0 },
257 { "-august", 8 },
258 { "-december", 12 },
259 { "-july", 7 },
260 { "-june", 6 },
261 { "-november", 11 },
262 { "-october", 10 },
263 { "-september", 9 },
264 // The l+n branch (with its sub-nodes) is a duplicate but will be written
265 // both times because each time it follows a different linear-match node.
266 { "xjuly", 7 },
267 { "xjune", 6 }
268 };
269 checkData(data, UPRV_LENGTHOF(data));
270 }
271
buildMonthsTrie(UStringTrieBuildOption buildOption)272 BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
273 // All types of nodes leading to the same value,
274 // for code coverage of recursive functions.
275 // In particular, we need a lot of branches on some single level
276 // to exercise a split-branch node.
277 static const StringAndValue data[]={
278 { "august", 8 },
279 { "jan", 1 },
280 { "jan.", 1 },
281 { "jana", 1 },
282 { "janbb", 1 },
283 { "janc", 1 },
284 { "janddd", 1 },
285 { "janee", 1 },
286 { "janef", 1 },
287 { "janf", 1 },
288 { "jangg", 1 },
289 { "janh", 1 },
290 { "janiiii", 1 },
291 { "janj", 1 },
292 { "jankk", 1 },
293 { "jankl", 1 },
294 { "jankmm", 1 },
295 { "janl", 1 },
296 { "janm", 1 },
297 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
298 { "jano", 1 },
299 { "janpp", 1 },
300 { "janqqq", 1 },
301 { "janr", 1 },
302 { "januar", 1 },
303 { "january", 1 },
304 { "july", 7 },
305 { "jun", 6 },
306 { "jun.", 6 },
307 { "june", 6 }
308 };
309 return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
310 }
311
TestHasUniqueValue()312 void BytesTrieTest::TestHasUniqueValue() {
313 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
314 if(trie.isNull()) {
315 return; // buildTrie() reported an error
316 }
317 int32_t uniqueValue;
318 if(trie->hasUniqueValue(uniqueValue)) {
319 errln("unique value at root");
320 }
321 trie->next('j');
322 trie->next('a');
323 trie->next('n');
324 // hasUniqueValue() directly after next()
325 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
326 errln("not unique value 1 after \"jan\"");
327 }
328 trie->first('j');
329 trie->next('u');
330 if(trie->hasUniqueValue(uniqueValue)) {
331 errln("unique value after \"ju\"");
332 }
333 if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
334 errln("not normal value 6 after \"jun\"");
335 }
336 // hasUniqueValue() after getValue()
337 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
338 errln("not unique value 6 after \"jun\"");
339 }
340 // hasUniqueValue() from within a linear-match node
341 trie->first('a');
342 trie->next('u');
343 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
344 errln("not unique value 8 after \"au\"");
345 }
346 }
347
TestGetNextBytes()348 void BytesTrieTest::TestGetNextBytes() {
349 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
350 if(trie.isNull()) {
351 return; // buildTrie() reported an error
352 }
353 char buffer[40];
354 CheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer));
355 int32_t count=trie->getNextBytes(sink);
356 if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
357 errln("months getNextBytes()!=[aj] at root");
358 }
359 trie->next('j');
360 trie->next('a');
361 trie->next('n');
362 // getNextBytes() directly after next()
363 count=trie->getNextBytes(sink.Reset());
364 buffer[count]=0;
365 if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
366 errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
367 }
368 // getNextBytes() after getValue()
369 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
370 memset(buffer, 0, sizeof(buffer));
371 count=trie->getNextBytes(sink.Reset());
372 if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
373 errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
374 }
375 // getNextBytes() from a linear-match node
376 trie->next('u');
377 memset(buffer, 0, sizeof(buffer));
378 count=trie->getNextBytes(sink.Reset());
379 if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
380 errln("months getNextBytes()!=[a] after \"janu\"");
381 }
382 trie->next('a');
383 memset(buffer, 0, sizeof(buffer));
384 count=trie->getNextBytes(sink.Reset());
385 if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
386 errln("months getNextBytes()!=[r] after \"janua\"");
387 }
388 trie->next('r');
389 trie->next('y');
390 // getNextBytes() after a final match
391 count=trie->getNextBytes(sink.Reset());
392 if(count!=0 || sink.NumberOfBytesAppended()!=0) {
393 errln("months getNextBytes()!=[] after \"january\"");
394 }
395 }
396
TestIteratorFromBranch()397 void BytesTrieTest::TestIteratorFromBranch() {
398 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
399 if(trie.isNull()) {
400 return; // buildTrie() reported an error
401 }
402 // Go to a branch node.
403 trie->next('j');
404 trie->next('a');
405 trie->next('n');
406 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
407 BytesTrie::Iterator iter(*trie, 0, errorCode);
408 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
409 return;
410 }
411 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
412 // following "jan".
413 static const StringAndValue data[]={
414 { "", 1 },
415 { ".", 1 },
416 { "a", 1 },
417 { "bb", 1 },
418 { "c", 1 },
419 { "ddd", 1 },
420 { "ee", 1 },
421 { "ef", 1 },
422 { "f", 1 },
423 { "gg", 1 },
424 { "h", 1 },
425 { "iiii", 1 },
426 { "j", 1 },
427 { "kk", 1 },
428 { "kl", 1 },
429 { "kmm", 1 },
430 { "l", 1 },
431 { "m", 1 },
432 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
433 { "o", 1 },
434 { "pp", 1 },
435 { "qqq", 1 },
436 { "r", 1 },
437 { "uar", 1 },
438 { "uary", 1 }
439 };
440 checkIterator(iter, data, UPRV_LENGTHOF(data));
441 // Reset, and we should get the same result.
442 logln("after iter.reset()");
443 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
444 }
445
TestIteratorFromLinearMatch()446 void BytesTrieTest::TestIteratorFromLinearMatch() {
447 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
448 if(trie.isNull()) {
449 return; // buildTrie() reported an error
450 }
451 // Go into a linear-match node.
452 trie->next('j');
453 trie->next('a');
454 trie->next('n');
455 trie->next('u');
456 trie->next('a');
457 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
458 BytesTrie::Iterator iter(*trie, 0, errorCode);
459 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
460 return;
461 }
462 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
463 // following "janua".
464 static const StringAndValue data[]={
465 { "r", 1 },
466 { "ry", 1 }
467 };
468 checkIterator(iter, data, UPRV_LENGTHOF(data));
469 // Reset, and we should get the same result.
470 logln("after iter.reset()");
471 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
472 }
473
TestTruncatingIteratorFromRoot()474 void BytesTrieTest::TestTruncatingIteratorFromRoot() {
475 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
476 if(trie.isNull()) {
477 return; // buildTrie() reported an error
478 }
479 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
480 BytesTrie::Iterator iter(*trie, 4, errorCode);
481 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
482 return;
483 }
484 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
485 // of each string, and no string duplicates from the truncation.
486 static const StringAndValue data[]={
487 { "augu", -1 },
488 { "jan", 1 },
489 { "jan.", 1 },
490 { "jana", 1 },
491 { "janb", -1 },
492 { "janc", 1 },
493 { "jand", -1 },
494 { "jane", -1 },
495 { "janf", 1 },
496 { "jang", -1 },
497 { "janh", 1 },
498 { "jani", -1 },
499 { "janj", 1 },
500 { "jank", -1 },
501 { "janl", 1 },
502 { "janm", 1 },
503 { "jann", -1 },
504 { "jano", 1 },
505 { "janp", -1 },
506 { "janq", -1 },
507 { "janr", 1 },
508 { "janu", -1 },
509 { "july", 7 },
510 { "jun", 6 },
511 { "jun.", 6 },
512 { "june", 6 }
513 };
514 checkIterator(iter, data, UPRV_LENGTHOF(data));
515 // Reset, and we should get the same result.
516 logln("after iter.reset()");
517 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
518 }
519
TestTruncatingIteratorFromLinearMatchShort()520 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
521 static const StringAndValue data[]={
522 { "abcdef", 10 },
523 { "abcdepq", 200 },
524 { "abcdeyz", 3000 }
525 };
526 LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
527 if(trie.isNull()) {
528 return; // buildTrie() reported an error
529 }
530 // Go into a linear-match node.
531 trie->next('a');
532 trie->next('b');
533 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
534 // Truncate within the linear-match node.
535 BytesTrie::Iterator iter(*trie, 2, errorCode);
536 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
537 return;
538 }
539 static const StringAndValue expected[]={
540 { "cd", -1 }
541 };
542 checkIterator(iter, expected, UPRV_LENGTHOF(expected));
543 // Reset, and we should get the same result.
544 logln("after iter.reset()");
545 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
546 }
547
TestTruncatingIteratorFromLinearMatchLong()548 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
549 static const StringAndValue data[]={
550 { "abcdef", 10 },
551 { "abcdepq", 200 },
552 { "abcdeyz", 3000 }
553 };
554 LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
555 if(trie.isNull()) {
556 return; // buildTrie() reported an error
557 }
558 // Go into a linear-match node.
559 trie->next('a');
560 trie->next('b');
561 trie->next('c');
562 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
563 // Truncate after the linear-match node.
564 BytesTrie::Iterator iter(*trie, 3, errorCode);
565 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
566 return;
567 }
568 static const StringAndValue expected[]={
569 { "def", 10 },
570 { "dep", -1 },
571 { "dey", -1 }
572 };
573 checkIterator(iter, expected, UPRV_LENGTHOF(expected));
574 // Reset, and we should get the same result.
575 logln("after iter.reset()");
576 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
577 }
578
TestIteratorFromBytes()579 void BytesTrieTest::TestIteratorFromBytes() {
580 static const StringAndValue data[]={
581 { "mm", 3 },
582 { "mmm", 33 },
583 { "mmnop", 333 }
584 };
585 builder_->clear();
586 IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()");
587 for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
588 builder_->add(data[i].s, data[i].value, errorCode);
589 }
590 StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode);
591 BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
592 checkIterator(iter, data, UPRV_LENGTHOF(data));
593 }
594
TestFailedIterator()595 void BytesTrieTest::TestFailedIterator() {
596 UErrorCode failure = U_ILLEGAL_ARGUMENT_ERROR;
597 BytesTrie::Iterator iter(NULL, 0, failure);
598 StringPiece sp = iter.getString();
599 if (!sp.empty()) {
600 errln("failed iterator returned garbage data");
601 }
602 }
603
TestDelta()604 void BytesTrieTest::TestDelta() {
605 char intBytes0[5];
606 char intBytes1[5];
607 static constexpr int32_t sampleDeltas[] = {
608 -1, 0, 1, 2, 3, 0xa5, 0xbe, 0xbf,
609 -2, 0xc0, 0xc1, 0xeee, 0x1234, 0x2ffe, 0x2fff,
610 -3, 0x3000, 0x3001, 0x3003, 0x50005, 0xdfffe, 0xdffff,
611 -4, 0xe0000, 0xe0001, 0xef0123, 0xfffffe, 0xffffff,
612 -5, 0x1000000, 0x1000001, 0x7fffffff
613 };
614 int32_t expectedLength = 0;
615 for (int32_t delta : sampleDeltas) {
616 if (delta < 0) {
617 expectedLength = -delta;
618 continue;
619 }
620 // Encoding twice into differently-initialized arrays
621 // catches bytes that are not written to.
622 memset(intBytes0, 0, sizeof(intBytes0));
623 memset(intBytes1, 1, sizeof(intBytes1));
624 int32_t length0 = BytesTrieBuilder::internalEncodeDelta(delta, intBytes0);
625 int32_t length1 = BytesTrieBuilder::internalEncodeDelta(delta, intBytes1);
626 assertTrue(UnicodeString(u"non-zero length to encode delta ") + delta, length0 > 0);
627 assertEquals(UnicodeString(u"consistent length to encode delta ") + delta, length0, length1);
628 assertEquals(UnicodeString(u"expected length to encode delta ") + delta,
629 expectedLength, length0);
630 for (int32_t i = 0; i < length0; ++i) {
631 uint8_t b0 = intBytes0[i];
632 uint8_t b1 = intBytes1[i];
633 assertEquals(UnicodeString(u"differently encoded delta ") + delta +
634 u" at byte index " + i, b0, b1);
635 }
636 const uint8_t *start = (const uint8_t *)intBytes0;
637 const uint8_t *pos = BytesTrie::jumpByDelta(start);
638 assertEquals(UnicodeString(u"roundtrip for delta ") + delta,
639 delta, (int32_t)(pos - start) - length0);
640 }
641 }
642
checkData(const StringAndValue data[],int32_t dataLength)643 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
644 logln("checkData(dataLength=%d, fast)", (int)dataLength);
645 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
646 logln("checkData(dataLength=%d, small)", (int)dataLength);
647 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
648 }
649
checkData(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)650 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
651 LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
652 if(trie.isNull()) {
653 return; // buildTrie() reported an error
654 }
655 checkFirst(*trie, data, dataLength);
656 checkNext(*trie, data, dataLength);
657 checkNextWithState(*trie, data, dataLength);
658 checkNextWithState64(*trie, data, dataLength);
659 checkNextString(*trie, data, dataLength);
660 checkIterator(*trie, data, dataLength);
661 }
662
buildTrie(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)663 BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
664 UStringTrieBuildOption buildOption) {
665 IcuTestErrorCode errorCode(*this, "buildTrie()");
666 // Add the items to the trie builder in an interesting (not trivial, not random) order.
667 int32_t index, step;
668 if(dataLength&1) {
669 // Odd number of items.
670 index=dataLength/2;
671 step=2;
672 } else if((dataLength%3)!=0) {
673 // Not a multiple of 3.
674 index=dataLength/5;
675 step=3;
676 } else {
677 index=dataLength-1;
678 step=-1;
679 }
680 builder_->clear();
681 for(int32_t i=0; i<dataLength; ++i) {
682 builder_->add(data[index].s, data[index].value, errorCode);
683 index=(index+step)%dataLength;
684 }
685 StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
686 LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
687 if(!errorCode.errIfFailureAndReset("add()/build()")) {
688 builder_->add("zzz", 999, errorCode);
689 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
690 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
691 }
692 }
693 logln("serialized trie size: %ld bytes\n", (long)sp.length());
694 StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
695 if(sp.data()==sp2.data()) {
696 errln("builder.buildStringPiece() before & after build() returned same array");
697 }
698 if(errorCode.isFailure()) {
699 return NULL;
700 }
701 // Tries from either build() method should be identical but
702 // BytesTrie does not implement equals().
703 // We just return either one.
704 if((dataLength&1)!=0) {
705 return trie.orphan();
706 } else {
707 return new BytesTrie(sp2.data());
708 }
709 }
710
checkFirst(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)711 void BytesTrieTest::checkFirst(BytesTrie &trie,
712 const StringAndValue data[], int32_t dataLength) {
713 for(int32_t i=0; i<dataLength; ++i) {
714 int c=*data[i].s;
715 if(c==0) {
716 continue; // skip empty string
717 }
718 UStringTrieResult firstResult=trie.first(c);
719 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
720 UStringTrieResult nextResult=trie.next(data[i].s[1]);
721 if(firstResult!=trie.reset().next(c) ||
722 firstResult!=trie.current() ||
723 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
724 nextResult!=trie.next(data[i].s[1])
725 ) {
726 errln("trie.first(%c)!=trie.reset().next(same) for %s",
727 c, data[i].s);
728 }
729 }
730 trie.reset();
731 }
732
checkNext(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)733 void BytesTrieTest::checkNext(BytesTrie &trie,
734 const StringAndValue data[], int32_t dataLength) {
735 BytesTrie::State state;
736 for(int32_t i=0; i<dataLength; ++i) {
737 int32_t stringLength= (i&1) ? -1 : static_cast<int32_t>(strlen(data[i].s));
738 UStringTrieResult result;
739 if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
740 result!=trie.current()
741 ) {
742 errln("trie does not seem to contain %s", data[i].s);
743 } else if(trie.getValue()!=data[i].value) {
744 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
745 data[i].s,
746 (long)trie.getValue(), (long)trie.getValue(),
747 (long)data[i].value, (long)data[i].value);
748 } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
749 errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
750 }
751 trie.reset();
752 stringLength = static_cast<int32_t>(strlen(data[i].s));
753 result=trie.current();
754 for(int32_t j=0; j<stringLength; ++j) {
755 if(!USTRINGTRIE_HAS_NEXT(result)) {
756 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
757 break;
758 }
759 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
760 trie.getValue();
761 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
762 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
763 break;
764 }
765 }
766 result=trie.next(data[i].s[j]);
767 if(!USTRINGTRIE_MATCHES(result)) {
768 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
769 break;
770 }
771 if(result!=trie.current()) {
772 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
773 break;
774 }
775 }
776 if(!USTRINGTRIE_HAS_VALUE(result)) {
777 errln("trie.next()!=hasValue at the end of %s", data[i].s);
778 continue;
779 }
780 trie.getValue();
781 if(result!=trie.current()) {
782 errln("trie.current() != current()+getValue()+current() after end of %s",
783 data[i].s);
784 }
785 // Compare the final current() with whether next() can actually continue.
786 trie.saveState(state);
787 UBool nextContinues=false;
788 // Try all graphic characters; we only use those in test strings in this file.
789 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
790 const int32_t minChar=0x20;
791 const int32_t maxChar=0x7e;
792 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
793 const int32_t minChar=0x40;
794 const int32_t maxChar=0xfe;
795 #else
796 const int32_t minChar=0;
797 const int32_t maxChar=0xff;
798 #endif
799 for(int32_t c=minChar; c<=maxChar; ++c) {
800 if(trie.resetToState(state).next(c)) {
801 nextContinues=true;
802 break;
803 }
804 }
805 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
806 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
807 "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
808 }
809 trie.reset();
810 }
811 }
812
checkNextWithState(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)813 void BytesTrieTest::checkNextWithState(BytesTrie &trie,
814 const StringAndValue data[], int32_t dataLength) {
815 BytesTrie::State noState, state;
816 for(int32_t i=0; i<dataLength; ++i) {
817 if((i&1)==0) {
818 // This should have no effect.
819 trie.resetToState(noState);
820 }
821 const char *expectedString=data[i].s;
822 int32_t stringLength= static_cast<int32_t>(strlen(expectedString));
823 int32_t partialLength = stringLength / 3;
824 for(int32_t j=0; j<partialLength; ++j) {
825 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
826 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
827 return;
828 }
829 }
830 trie.saveState(state);
831 UStringTrieResult resultAtState=trie.current();
832 UStringTrieResult result;
833 int32_t valueAtState=-99;
834 if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
835 valueAtState=trie.getValue();
836 }
837 result=trie.next(0); // mismatch
838 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
839 errln("trie.next(0) matched after part of %s", data[i].s);
840 }
841 if( resultAtState!=trie.resetToState(state).current() ||
842 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
843 ) {
844 errln("trie.next(part of %s) changes current()/getValue() after "
845 "saveState/next(0)/resetToState",
846 data[i].s);
847 } else if(!USTRINGTRIE_HAS_VALUE(
848 result=trie.next(expectedString+partialLength,
849 stringLength-partialLength)) ||
850 result!=trie.current()) {
851 errln("trie.next(rest of %s) does not seem to contain %s after "
852 "saveState/next(0)/resetToState",
853 data[i].s, data[i].s);
854 } else if(!USTRINGTRIE_HAS_VALUE(
855 result=trie.resetToState(state).
856 next(expectedString+partialLength,
857 stringLength-partialLength)) ||
858 result!=trie.current()) {
859 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
860 data[i].s);
861 } else if(trie.getValue()!=data[i].value) {
862 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
863 data[i].s,
864 (long)trie.getValue(), (long)trie.getValue(),
865 (long)data[i].value, (long)data[i].value);
866 }
867 trie.reset();
868 }
869 }
870
checkNextWithState64(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)871 void BytesTrieTest::checkNextWithState64(BytesTrie &trie,
872 const StringAndValue data[], int32_t dataLength) {
873 assertTrue("trie(initial state).getState64()!=0", trie.getState64() != 0);
874 for(int32_t i=0; i<dataLength; ++i) {
875 const char *expectedString=data[i].s;
876 int32_t stringLength= static_cast<int32_t>(strlen(expectedString));
877 int32_t partialLength = stringLength / 3;
878 for(int32_t j=0; j<partialLength; ++j) {
879 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
880 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
881 return;
882 }
883 }
884 uint64_t state = trie.getState64();
885 assertTrue("trie.getState64()!=0", state != 0);
886 UStringTrieResult resultAtState=trie.current();
887 UStringTrieResult result;
888 int32_t valueAtState=-99;
889 if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
890 valueAtState=trie.getValue();
891 }
892 result=trie.next(0); // mismatch
893 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
894 errln("trie.next(0) matched after part of %s", data[i].s);
895 }
896 if( resultAtState!=trie.resetToState64(state).current() ||
897 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
898 ) {
899 errln("trie.next(part of %s) changes current()/getValue() after "
900 "getState64/next(0)/resetToState64",
901 data[i].s);
902 } else if(!USTRINGTRIE_HAS_VALUE(
903 result=trie.next(expectedString+partialLength,
904 stringLength-partialLength)) ||
905 result!=trie.current()) {
906 errln("trie.next(rest of %s) does not seem to contain %s after "
907 "getState64/next(0)/resetToState64",
908 data[i].s, data[i].s);
909 } else if(!USTRINGTRIE_HAS_VALUE(
910 result=trie.resetToState64(state).
911 next(expectedString+partialLength,
912 stringLength-partialLength)) ||
913 result!=trie.current()) {
914 errln("trie does not seem to contain %s after getState64/next(rest)/resetToState64",
915 data[i].s);
916 } else if(trie.getValue()!=data[i].value) {
917 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
918 data[i].s,
919 (long)trie.getValue(), (long)trie.getValue(),
920 (long)data[i].value, (long)data[i].value);
921 }
922 trie.reset();
923 }
924 }
925
926 // next(string) is also tested in other functions,
927 // but here we try to go partway through the string, and then beyond it.
checkNextString(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)928 void BytesTrieTest::checkNextString(BytesTrie &trie,
929 const StringAndValue data[], int32_t dataLength) {
930 for(int32_t i=0; i<dataLength; ++i) {
931 const char *expectedString=data[i].s;
932 int32_t stringLength = static_cast<int32_t>(strlen(expectedString));
933 if(!trie.next(expectedString, stringLength/2)) {
934 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
935 continue;
936 }
937 // Test that we stop properly at the end of the string.
938 if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
939 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
940 }
941 trie.reset();
942 }
943 }
944
checkIterator(const BytesTrie & trie,const StringAndValue data[],int32_t dataLength)945 void BytesTrieTest::checkIterator(const BytesTrie &trie,
946 const StringAndValue data[], int32_t dataLength) {
947 IcuTestErrorCode errorCode(*this, "checkIterator()");
948 BytesTrie::Iterator iter(trie, 0, errorCode);
949 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
950 return;
951 }
952 checkIterator(iter, data, dataLength);
953 }
954
checkIterator(BytesTrie::Iterator & iter,const StringAndValue data[],int32_t dataLength)955 void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter,
956 const StringAndValue data[], int32_t dataLength) {
957 IcuTestErrorCode errorCode(*this, "checkIterator()");
958 for(int32_t i=0; i<dataLength; ++i) {
959 if(!iter.hasNext()) {
960 errln("trie iterator hasNext()=false for item %d: %s", (int)i, data[i].s);
961 break;
962 }
963 UBool hasNext=iter.next(errorCode);
964 if(errorCode.errIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
965 break;
966 }
967 if(!hasNext) {
968 errln("trie iterator next()=false for item %d: %s", (int)i, data[i].s);
969 break;
970 }
971 if(iter.getString()!=StringPiece(data[i].s)) {
972 errln("trie iterator next().getString()=%s but expected %s for item %d",
973 iter.getString().data(), data[i].s, (int)i);
974 }
975 if(iter.getValue()!=data[i].value) {
976 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
977 (long)iter.getValue(), (long)iter.getValue(),
978 (long)data[i].value, (long)data[i].value,
979 (int)i, data[i].s);
980 }
981 }
982 if(iter.hasNext()) {
983 errln("trie iterator hasNext()=true after all items");
984 }
985 UBool hasNext=iter.next(errorCode);
986 errorCode.errIfFailureAndReset("trie iterator next() after all items");
987 if(hasNext) {
988 errln("trie iterator next()=true after all items");
989 }
990 }
991