1 /*
2 *******************************************************************************
3 * Copyright (C) 2010-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: bytetrietest.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2010nov16
12 * created by: Markus W. Scherer
13 */
14
15 #include <string.h>
16
17 #include "unicode/utypes.h"
18 #include "unicode/bytestrie.h"
19 #include "unicode/bytestriebuilder.h"
20 #include "unicode/localpointer.h"
21 #include "unicode/stringpiece.h"
22 #include "intltest.h"
23 #include "cmemory.h"
24
25 struct StringAndValue {
26 const char *s;
27 int32_t value;
28 };
29
30 class BytesTrieTest : public IntlTest {
31 public:
32 BytesTrieTest();
33 virtual ~BytesTrieTest();
34
35 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
36 void TestBuilder();
37 void TestEmpty();
38 void Test_a();
39 void Test_a_ab();
40 void TestShortestBranch();
41 void TestBranches();
42 void TestLongSequence();
43 void TestLongBranch();
44 void TestValuesForState();
45 void TestCompact();
46
47 BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
48 void TestHasUniqueValue();
49 void TestGetNextBytes();
50 void TestIteratorFromBranch();
51 void TestIteratorFromLinearMatch();
52 void TestTruncatingIteratorFromRoot();
53 void TestTruncatingIteratorFromLinearMatchShort();
54 void TestTruncatingIteratorFromLinearMatchLong();
55 void TestIteratorFromBytes();
56
57 void checkData(const StringAndValue data[], int32_t dataLength);
58 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
59 BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
60 UStringTrieBuildOption buildOption);
61 void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
62 void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
63 void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
64 void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
65 void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
66 void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
67
68 private:
69 BytesTrieBuilder *builder_;
70 };
71
createBytesTrieTest()72 extern IntlTest *createBytesTrieTest() {
73 return new BytesTrieTest();
74 }
75
BytesTrieTest()76 BytesTrieTest::BytesTrieTest() : builder_(NULL) {
77 IcuTestErrorCode errorCode(*this, "BytesTrieTest()");
78 builder_=new BytesTrieBuilder(errorCode);
79 }
80
~BytesTrieTest()81 BytesTrieTest::~BytesTrieTest() {
82 delete builder_;
83 }
84
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)85 void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
86 if(exec) {
87 logln("TestSuite BytesTrieTest: ");
88 }
89 TESTCASE_AUTO_BEGIN;
90 TESTCASE_AUTO(TestBuilder);
91 TESTCASE_AUTO(TestEmpty);
92 TESTCASE_AUTO(Test_a);
93 TESTCASE_AUTO(Test_a_ab);
94 TESTCASE_AUTO(TestShortestBranch);
95 TESTCASE_AUTO(TestBranches);
96 TESTCASE_AUTO(TestLongSequence);
97 TESTCASE_AUTO(TestLongBranch);
98 TESTCASE_AUTO(TestValuesForState);
99 TESTCASE_AUTO(TestCompact);
100 TESTCASE_AUTO(TestHasUniqueValue);
101 TESTCASE_AUTO(TestGetNextBytes);
102 TESTCASE_AUTO(TestIteratorFromBranch);
103 TESTCASE_AUTO(TestIteratorFromLinearMatch);
104 TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
105 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
106 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
107 TESTCASE_AUTO(TestIteratorFromBytes);
108 TESTCASE_AUTO_END;
109 }
110
TestBuilder()111 void BytesTrieTest::TestBuilder() {
112 IcuTestErrorCode errorCode(*this, "TestBuilder()");
113 builder_->clear();
114 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
115 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
116 errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
117 return;
118 }
119 // TODO: remove .build(...) once add() checks for duplicates.
120 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
121 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
122 errln("BytesTrieBuilder.add() did not detect duplicates");
123 return;
124 }
125 }
126
TestEmpty()127 void BytesTrieTest::TestEmpty() {
128 static const StringAndValue data[]={
129 { "", 0 }
130 };
131 checkData(data, UPRV_LENGTHOF(data));
132 }
133
Test_a()134 void BytesTrieTest::Test_a() {
135 static const StringAndValue data[]={
136 { "a", 1 }
137 };
138 checkData(data, UPRV_LENGTHOF(data));
139 }
140
Test_a_ab()141 void BytesTrieTest::Test_a_ab() {
142 static const StringAndValue data[]={
143 { "a", 1 },
144 { "ab", 100 }
145 };
146 checkData(data, UPRV_LENGTHOF(data));
147 }
148
TestShortestBranch()149 void BytesTrieTest::TestShortestBranch() {
150 static const StringAndValue data[]={
151 { "a", 1000 },
152 { "b", 2000 }
153 };
154 checkData(data, UPRV_LENGTHOF(data));
155 }
156
TestBranches()157 void BytesTrieTest::TestBranches() {
158 static const StringAndValue data[]={
159 { "a", 0x10 },
160 { "cc", 0x40 },
161 { "e", 0x100 },
162 { "ggg", 0x400 },
163 { "i", 0x1000 },
164 { "kkkk", 0x4000 },
165 { "n", 0x10000 },
166 { "ppppp", 0x40000 },
167 { "r", 0x100000 },
168 { "sss", 0x200000 },
169 { "t", 0x400000 },
170 { "uu", 0x800000 },
171 { "vv", 0x7fffffff },
172 { "zz", (int32_t)0x80000000 }
173 };
174 for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
175 logln("TestBranches length=%d", (int)length);
176 checkData(data, length);
177 }
178 }
179
TestLongSequence()180 void BytesTrieTest::TestLongSequence() {
181 static const StringAndValue data[]={
182 { "a", -1 },
183 // sequence of linear-match nodes
184 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
185 // more than 256 bytes
186 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
187 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
188 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
189 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
190 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
191 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
192 };
193 checkData(data, UPRV_LENGTHOF(data));
194 }
195
TestLongBranch()196 void BytesTrieTest::TestLongBranch() {
197 // Split-branch and interesting compact-integer values.
198 static const StringAndValue data[]={
199 { "a", -2 },
200 { "b", -1 },
201 { "c", 0 },
202 { "d2", 1 },
203 { "f", 0x3f },
204 { "g", 0x40 },
205 { "h", 0x41 },
206 { "j23", 0x1900 },
207 { "j24", 0x19ff },
208 { "j25", 0x1a00 },
209 { "k2", 0x1a80 },
210 { "k3", 0x1aff },
211 { "l234567890", 0x1b00 },
212 { "l234567890123", 0x1b01 },
213 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
214 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
215 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
216 { "r", 0x333333 },
217 { "s2345", 0x4444444 },
218 { "t234567890", 0x77777777 },
219 { "z", (int32_t)0x80000001 }
220 };
221 checkData(data, UPRV_LENGTHOF(data));
222 }
223
TestValuesForState()224 void BytesTrieTest::TestValuesForState() {
225 // Check that saveState() and resetToState() interact properly
226 // with next() and current().
227 static const StringAndValue data[]={
228 { "a", -1 },
229 { "ab", -2 },
230 { "abc", -3 },
231 { "abcd", -4 },
232 { "abcde", -5 },
233 { "abcdef", -6 }
234 };
235 checkData(data, UPRV_LENGTHOF(data));
236 }
237
TestCompact()238 void BytesTrieTest::TestCompact() {
239 // Duplicate trailing strings and values provide opportunities for compacting.
240 static const StringAndValue data[]={
241 { "+", 0 },
242 { "+august", 8 },
243 { "+december", 12 },
244 { "+july", 7 },
245 { "+june", 6 },
246 { "+november", 11 },
247 { "+october", 10 },
248 { "+september", 9 },
249 { "-", 0 },
250 { "-august", 8 },
251 { "-december", 12 },
252 { "-july", 7 },
253 { "-june", 6 },
254 { "-november", 11 },
255 { "-october", 10 },
256 { "-september", 9 },
257 // The l+n branch (with its sub-nodes) is a duplicate but will be written
258 // both times because each time it follows a different linear-match node.
259 { "xjuly", 7 },
260 { "xjune", 6 }
261 };
262 checkData(data, UPRV_LENGTHOF(data));
263 }
264
buildMonthsTrie(UStringTrieBuildOption buildOption)265 BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
266 // All types of nodes leading to the same value,
267 // for code coverage of recursive functions.
268 // In particular, we need a lot of branches on some single level
269 // to exercise a split-branch node.
270 static const StringAndValue data[]={
271 { "august", 8 },
272 { "jan", 1 },
273 { "jan.", 1 },
274 { "jana", 1 },
275 { "janbb", 1 },
276 { "janc", 1 },
277 { "janddd", 1 },
278 { "janee", 1 },
279 { "janef", 1 },
280 { "janf", 1 },
281 { "jangg", 1 },
282 { "janh", 1 },
283 { "janiiii", 1 },
284 { "janj", 1 },
285 { "jankk", 1 },
286 { "jankl", 1 },
287 { "jankmm", 1 },
288 { "janl", 1 },
289 { "janm", 1 },
290 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
291 { "jano", 1 },
292 { "janpp", 1 },
293 { "janqqq", 1 },
294 { "janr", 1 },
295 { "januar", 1 },
296 { "january", 1 },
297 { "july", 7 },
298 { "jun", 6 },
299 { "jun.", 6 },
300 { "june", 6 }
301 };
302 return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
303 }
304
TestHasUniqueValue()305 void BytesTrieTest::TestHasUniqueValue() {
306 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
307 if(trie.isNull()) {
308 return; // buildTrie() reported an error
309 }
310 int32_t uniqueValue;
311 if(trie->hasUniqueValue(uniqueValue)) {
312 errln("unique value at root");
313 }
314 trie->next('j');
315 trie->next('a');
316 trie->next('n');
317 // hasUniqueValue() directly after next()
318 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
319 errln("not unique value 1 after \"jan\"");
320 }
321 trie->first('j');
322 trie->next('u');
323 if(trie->hasUniqueValue(uniqueValue)) {
324 errln("unique value after \"ju\"");
325 }
326 if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
327 errln("not normal value 6 after \"jun\"");
328 }
329 // hasUniqueValue() after getValue()
330 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
331 errln("not unique value 6 after \"jun\"");
332 }
333 // hasUniqueValue() from within a linear-match node
334 trie->first('a');
335 trie->next('u');
336 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
337 errln("not unique value 8 after \"au\"");
338 }
339 }
340
TestGetNextBytes()341 void BytesTrieTest::TestGetNextBytes() {
342 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
343 if(trie.isNull()) {
344 return; // buildTrie() reported an error
345 }
346 char buffer[40];
347 CheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer));
348 int32_t count=trie->getNextBytes(sink);
349 if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
350 errln("months getNextBytes()!=[aj] at root");
351 }
352 trie->next('j');
353 trie->next('a');
354 trie->next('n');
355 // getNextBytes() directly after next()
356 count=trie->getNextBytes(sink.Reset());
357 buffer[count]=0;
358 if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
359 errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
360 }
361 // getNextBytes() after getValue()
362 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
363 memset(buffer, 0, sizeof(buffer));
364 count=trie->getNextBytes(sink.Reset());
365 if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
366 errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
367 }
368 // getNextBytes() from a linear-match node
369 trie->next('u');
370 memset(buffer, 0, sizeof(buffer));
371 count=trie->getNextBytes(sink.Reset());
372 if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
373 errln("months getNextBytes()!=[a] after \"janu\"");
374 }
375 trie->next('a');
376 memset(buffer, 0, sizeof(buffer));
377 count=trie->getNextBytes(sink.Reset());
378 if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
379 errln("months getNextBytes()!=[r] after \"janua\"");
380 }
381 trie->next('r');
382 trie->next('y');
383 // getNextBytes() after a final match
384 count=trie->getNextBytes(sink.Reset());
385 if(count!=0 || sink.NumberOfBytesAppended()!=0) {
386 errln("months getNextBytes()!=[] after \"january\"");
387 }
388 }
389
TestIteratorFromBranch()390 void BytesTrieTest::TestIteratorFromBranch() {
391 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
392 if(trie.isNull()) {
393 return; // buildTrie() reported an error
394 }
395 // Go to a branch node.
396 trie->next('j');
397 trie->next('a');
398 trie->next('n');
399 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
400 BytesTrie::Iterator iter(*trie, 0, errorCode);
401 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
402 return;
403 }
404 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
405 // following "jan".
406 static const StringAndValue data[]={
407 { "", 1 },
408 { ".", 1 },
409 { "a", 1 },
410 { "bb", 1 },
411 { "c", 1 },
412 { "ddd", 1 },
413 { "ee", 1 },
414 { "ef", 1 },
415 { "f", 1 },
416 { "gg", 1 },
417 { "h", 1 },
418 { "iiii", 1 },
419 { "j", 1 },
420 { "kk", 1 },
421 { "kl", 1 },
422 { "kmm", 1 },
423 { "l", 1 },
424 { "m", 1 },
425 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
426 { "o", 1 },
427 { "pp", 1 },
428 { "qqq", 1 },
429 { "r", 1 },
430 { "uar", 1 },
431 { "uary", 1 }
432 };
433 checkIterator(iter, data, UPRV_LENGTHOF(data));
434 // Reset, and we should get the same result.
435 logln("after iter.reset()");
436 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
437 }
438
TestIteratorFromLinearMatch()439 void BytesTrieTest::TestIteratorFromLinearMatch() {
440 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
441 if(trie.isNull()) {
442 return; // buildTrie() reported an error
443 }
444 // Go into a linear-match node.
445 trie->next('j');
446 trie->next('a');
447 trie->next('n');
448 trie->next('u');
449 trie->next('a');
450 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
451 BytesTrie::Iterator iter(*trie, 0, errorCode);
452 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
453 return;
454 }
455 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
456 // following "janua".
457 static const StringAndValue data[]={
458 { "r", 1 },
459 { "ry", 1 }
460 };
461 checkIterator(iter, data, UPRV_LENGTHOF(data));
462 // Reset, and we should get the same result.
463 logln("after iter.reset()");
464 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
465 }
466
TestTruncatingIteratorFromRoot()467 void BytesTrieTest::TestTruncatingIteratorFromRoot() {
468 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
469 if(trie.isNull()) {
470 return; // buildTrie() reported an error
471 }
472 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
473 BytesTrie::Iterator iter(*trie, 4, errorCode);
474 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
475 return;
476 }
477 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
478 // of each string, and no string duplicates from the truncation.
479 static const StringAndValue data[]={
480 { "augu", -1 },
481 { "jan", 1 },
482 { "jan.", 1 },
483 { "jana", 1 },
484 { "janb", -1 },
485 { "janc", 1 },
486 { "jand", -1 },
487 { "jane", -1 },
488 { "janf", 1 },
489 { "jang", -1 },
490 { "janh", 1 },
491 { "jani", -1 },
492 { "janj", 1 },
493 { "jank", -1 },
494 { "janl", 1 },
495 { "janm", 1 },
496 { "jann", -1 },
497 { "jano", 1 },
498 { "janp", -1 },
499 { "janq", -1 },
500 { "janr", 1 },
501 { "janu", -1 },
502 { "july", 7 },
503 { "jun", 6 },
504 { "jun.", 6 },
505 { "june", 6 }
506 };
507 checkIterator(iter, data, UPRV_LENGTHOF(data));
508 // Reset, and we should get the same result.
509 logln("after iter.reset()");
510 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
511 }
512
TestTruncatingIteratorFromLinearMatchShort()513 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
514 static const StringAndValue data[]={
515 { "abcdef", 10 },
516 { "abcdepq", 200 },
517 { "abcdeyz", 3000 }
518 };
519 LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
520 if(trie.isNull()) {
521 return; // buildTrie() reported an error
522 }
523 // Go into a linear-match node.
524 trie->next('a');
525 trie->next('b');
526 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
527 // Truncate within the linear-match node.
528 BytesTrie::Iterator iter(*trie, 2, errorCode);
529 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
530 return;
531 }
532 static const StringAndValue expected[]={
533 { "cd", -1 }
534 };
535 checkIterator(iter, expected, UPRV_LENGTHOF(expected));
536 // Reset, and we should get the same result.
537 logln("after iter.reset()");
538 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
539 }
540
TestTruncatingIteratorFromLinearMatchLong()541 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
542 static const StringAndValue data[]={
543 { "abcdef", 10 },
544 { "abcdepq", 200 },
545 { "abcdeyz", 3000 }
546 };
547 LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
548 if(trie.isNull()) {
549 return; // buildTrie() reported an error
550 }
551 // Go into a linear-match node.
552 trie->next('a');
553 trie->next('b');
554 trie->next('c');
555 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
556 // Truncate after the linear-match node.
557 BytesTrie::Iterator iter(*trie, 3, errorCode);
558 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
559 return;
560 }
561 static const StringAndValue expected[]={
562 { "def", 10 },
563 { "dep", -1 },
564 { "dey", -1 }
565 };
566 checkIterator(iter, expected, UPRV_LENGTHOF(expected));
567 // Reset, and we should get the same result.
568 logln("after iter.reset()");
569 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
570 }
571
TestIteratorFromBytes()572 void BytesTrieTest::TestIteratorFromBytes() {
573 static const StringAndValue data[]={
574 { "mm", 3 },
575 { "mmm", 33 },
576 { "mmnop", 333 }
577 };
578 builder_->clear();
579 IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()");
580 for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
581 builder_->add(data[i].s, data[i].value, errorCode);
582 }
583 StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode);
584 BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
585 checkIterator(iter, data, UPRV_LENGTHOF(data));
586 }
587
checkData(const StringAndValue data[],int32_t dataLength)588 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
589 logln("checkData(dataLength=%d, fast)", (int)dataLength);
590 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
591 logln("checkData(dataLength=%d, small)", (int)dataLength);
592 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
593 }
594
checkData(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)595 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
596 LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
597 if(trie.isNull()) {
598 return; // buildTrie() reported an error
599 }
600 checkFirst(*trie, data, dataLength);
601 checkNext(*trie, data, dataLength);
602 checkNextWithState(*trie, data, dataLength);
603 checkNextString(*trie, data, dataLength);
604 checkIterator(*trie, data, dataLength);
605 }
606
buildTrie(const StringAndValue data[],int32_t dataLength,UStringTrieBuildOption buildOption)607 BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
608 UStringTrieBuildOption buildOption) {
609 IcuTestErrorCode errorCode(*this, "buildTrie()");
610 // Add the items to the trie builder in an interesting (not trivial, not random) order.
611 int32_t index, step;
612 if(dataLength&1) {
613 // Odd number of items.
614 index=dataLength/2;
615 step=2;
616 } else if((dataLength%3)!=0) {
617 // Not a multiple of 3.
618 index=dataLength/5;
619 step=3;
620 } else {
621 index=dataLength-1;
622 step=-1;
623 }
624 builder_->clear();
625 for(int32_t i=0; i<dataLength; ++i) {
626 builder_->add(data[index].s, data[index].value, errorCode);
627 index=(index+step)%dataLength;
628 }
629 StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
630 LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
631 if(!errorCode.logIfFailureAndReset("add()/build()")) {
632 builder_->add("zzz", 999, errorCode);
633 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
634 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
635 }
636 }
637 logln("serialized trie size: %ld bytes\n", (long)sp.length());
638 StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
639 if(sp.data()==sp2.data()) {
640 errln("builder.buildStringPiece() before & after build() returned same array");
641 }
642 if(errorCode.isFailure()) {
643 return NULL;
644 }
645 // Tries from either build() method should be identical but
646 // BytesTrie does not implement equals().
647 // We just return either one.
648 if((dataLength&1)!=0) {
649 return trie.orphan();
650 } else {
651 return new BytesTrie(sp2.data());
652 }
653 }
654
checkFirst(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)655 void BytesTrieTest::checkFirst(BytesTrie &trie,
656 const StringAndValue data[], int32_t dataLength) {
657 for(int32_t i=0; i<dataLength; ++i) {
658 int c=*data[i].s;
659 if(c==0) {
660 continue; // skip empty string
661 }
662 UStringTrieResult firstResult=trie.first(c);
663 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
664 UStringTrieResult nextResult=trie.next(data[i].s[1]);
665 if(firstResult!=trie.reset().next(c) ||
666 firstResult!=trie.current() ||
667 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
668 nextResult!=trie.next(data[i].s[1])
669 ) {
670 errln("trie.first(%c)!=trie.reset().next(same) for %s",
671 c, data[i].s);
672 }
673 }
674 trie.reset();
675 }
676
checkNext(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)677 void BytesTrieTest::checkNext(BytesTrie &trie,
678 const StringAndValue data[], int32_t dataLength) {
679 BytesTrie::State state;
680 for(int32_t i=0; i<dataLength; ++i) {
681 int32_t stringLength= (i&1) ? -1 : strlen(data[i].s);
682 UStringTrieResult result;
683 if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
684 result!=trie.current()
685 ) {
686 errln("trie does not seem to contain %s", data[i].s);
687 } else if(trie.getValue()!=data[i].value) {
688 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
689 data[i].s,
690 (long)trie.getValue(), (long)trie.getValue(),
691 (long)data[i].value, (long)data[i].value);
692 } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
693 errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
694 }
695 trie.reset();
696 stringLength=strlen(data[i].s);
697 result=trie.current();
698 for(int32_t j=0; j<stringLength; ++j) {
699 if(!USTRINGTRIE_HAS_NEXT(result)) {
700 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
701 break;
702 }
703 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
704 trie.getValue();
705 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
706 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
707 break;
708 }
709 }
710 result=trie.next(data[i].s[j]);
711 if(!USTRINGTRIE_MATCHES(result)) {
712 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
713 break;
714 }
715 if(result!=trie.current()) {
716 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
717 break;
718 }
719 }
720 if(!USTRINGTRIE_HAS_VALUE(result)) {
721 errln("trie.next()!=hasValue at the end of %s", data[i].s);
722 continue;
723 }
724 trie.getValue();
725 if(result!=trie.current()) {
726 errln("trie.current() != current()+getValue()+current() after end of %s",
727 data[i].s);
728 }
729 // Compare the final current() with whether next() can actually continue.
730 trie.saveState(state);
731 UBool nextContinues=FALSE;
732 // Try all graphic characters; we only use those in test strings in this file.
733 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
734 const int32_t minChar=0x20;
735 const int32_t maxChar=0x7e;
736 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
737 const int32_t minChar=0x40;
738 const int32_t maxChar=0xfe;
739 #else
740 const int32_t minChar=0;
741 const int32_t maxChar=0xff;
742 #endif
743 for(int32_t c=minChar; c<=maxChar; ++c) {
744 if(trie.resetToState(state).next(c)) {
745 nextContinues=TRUE;
746 break;
747 }
748 }
749 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
750 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
751 "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
752 }
753 trie.reset();
754 }
755 }
756
checkNextWithState(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)757 void BytesTrieTest::checkNextWithState(BytesTrie &trie,
758 const StringAndValue data[], int32_t dataLength) {
759 BytesTrie::State noState, state;
760 for(int32_t i=0; i<dataLength; ++i) {
761 if((i&1)==0) {
762 // This should have no effect.
763 trie.resetToState(noState);
764 }
765 const char *expectedString=data[i].s;
766 int32_t stringLength=strlen(expectedString);
767 int32_t partialLength=stringLength/3;
768 for(int32_t j=0; j<partialLength; ++j) {
769 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
770 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
771 return;
772 }
773 }
774 trie.saveState(state);
775 UStringTrieResult resultAtState=trie.current();
776 UStringTrieResult result;
777 int32_t valueAtState=-99;
778 if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
779 valueAtState=trie.getValue();
780 }
781 result=trie.next(0); // mismatch
782 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
783 errln("trie.next(0) matched after part of %s", data[i].s);
784 }
785 if( resultAtState!=trie.resetToState(state).current() ||
786 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
787 ) {
788 errln("trie.next(part of %s) changes current()/getValue() after "
789 "saveState/next(0)/resetToState",
790 data[i].s);
791 } else if(!USTRINGTRIE_HAS_VALUE(
792 result=trie.next(expectedString+partialLength,
793 stringLength-partialLength)) ||
794 result!=trie.current()) {
795 errln("trie.next(rest of %s) does not seem to contain %s after "
796 "saveState/next(0)/resetToState",
797 data[i].s, data[i].s);
798 } else if(!USTRINGTRIE_HAS_VALUE(
799 result=trie.resetToState(state).
800 next(expectedString+partialLength,
801 stringLength-partialLength)) ||
802 result!=trie.current()) {
803 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
804 data[i].s);
805 } else if(trie.getValue()!=data[i].value) {
806 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
807 data[i].s,
808 (long)trie.getValue(), (long)trie.getValue(),
809 (long)data[i].value, (long)data[i].value);
810 }
811 trie.reset();
812 }
813 }
814
815 // next(string) is also tested in other functions,
816 // but here we try to go partway through the string, and then beyond it.
checkNextString(BytesTrie & trie,const StringAndValue data[],int32_t dataLength)817 void BytesTrieTest::checkNextString(BytesTrie &trie,
818 const StringAndValue data[], int32_t dataLength) {
819 for(int32_t i=0; i<dataLength; ++i) {
820 const char *expectedString=data[i].s;
821 int32_t stringLength=strlen(expectedString);
822 if(!trie.next(expectedString, stringLength/2)) {
823 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
824 continue;
825 }
826 // Test that we stop properly at the end of the string.
827 if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
828 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
829 }
830 trie.reset();
831 }
832 }
833
checkIterator(const BytesTrie & trie,const StringAndValue data[],int32_t dataLength)834 void BytesTrieTest::checkIterator(const BytesTrie &trie,
835 const StringAndValue data[], int32_t dataLength) {
836 IcuTestErrorCode errorCode(*this, "checkIterator()");
837 BytesTrie::Iterator iter(trie, 0, errorCode);
838 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
839 return;
840 }
841 checkIterator(iter, data, dataLength);
842 }
843
checkIterator(BytesTrie::Iterator & iter,const StringAndValue data[],int32_t dataLength)844 void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter,
845 const StringAndValue data[], int32_t dataLength) {
846 IcuTestErrorCode errorCode(*this, "checkIterator()");
847 for(int32_t i=0; i<dataLength; ++i) {
848 if(!iter.hasNext()) {
849 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
850 break;
851 }
852 UBool hasNext=iter.next(errorCode);
853 if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
854 break;
855 }
856 if(!hasNext) {
857 errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
858 break;
859 }
860 if(iter.getString()!=StringPiece(data[i].s)) {
861 errln("trie iterator next().getString()=%s but expected %s for item %d",
862 iter.getString().data(), data[i].s, (int)i);
863 }
864 if(iter.getValue()!=data[i].value) {
865 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
866 (long)iter.getValue(), (long)iter.getValue(),
867 (long)data[i].value, (long)data[i].value,
868 (int)i, data[i].s);
869 }
870 }
871 if(iter.hasNext()) {
872 errln("trie iterator hasNext()=TRUE after all items");
873 }
874 UBool hasNext=iter.next(errorCode);
875 errorCode.logIfFailureAndReset("trie iterator next() after all items");
876 if(hasNext) {
877 errln("trie iterator next()=TRUE after all items");
878 }
879 }
880