• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // ucptrietest.c (modified from trie2test.c)
5 // created: 2017dec29 Markus W. Scherer
6 
7 #include <stdbool.h>
8 #include <stdio.h>
9 #include "unicode/utypes.h"
10 #include "unicode/ucptrie.h"
11 #include "unicode/umutablecptrie.h"
12 #include "unicode/utf.h"
13 #include "unicode/utf16.h"
14 #include "unicode/utf8.h"
15 #include "uassert.h"
16 #include "utrie.h"
17 #include "cstring.h"
18 #include "cmemory.h"
19 #include "udataswp.h"
20 #include "cintltst.h"
21 #include "writesrc.h"
22 #include "toolutil.h"
23 
24 void addUCPTrieTest(TestNode** root);
25 
26 /* Values for setting possibly overlapping, out-of-order ranges of values */
27 typedef struct SetRange {
28     UChar32 start, limit;
29     uint32_t value;
30 } SetRange;
31 
32 /*
33  * Values for testing:
34  * value is set from the previous boundary's limit to before
35  * this boundary's limit
36  *
37  * There must be an entry with limit 0 and the intialValue.
38  * It may be preceded by an entry with negative limit and the errorValue.
39  */
40 typedef struct CheckRange {
41     UChar32 limit;
42     uint32_t value;
43 } CheckRange;
44 
45 static int32_t
skipSpecialValues(const CheckRange checkRanges[],int32_t countCheckRanges)46 skipSpecialValues(const CheckRange checkRanges[], int32_t countCheckRanges) {
47     int32_t i;
48     for(i=0; i<countCheckRanges && checkRanges[i].limit<=0; ++i) {}
49     return i;
50 }
51 
52 static int32_t
getSpecialValues(const CheckRange checkRanges[],int32_t countCheckRanges,uint32_t * pInitialValue,uint32_t * pErrorValue)53 getSpecialValues(const CheckRange checkRanges[], int32_t countCheckRanges,
54                  uint32_t *pInitialValue, uint32_t *pErrorValue) {
55     int32_t i=0;
56     if(i<countCheckRanges && checkRanges[i].limit<0) {
57         *pErrorValue=checkRanges[i++].value;
58     } else {
59         *pErrorValue=0xad;
60     }
61     if(i<countCheckRanges && checkRanges[i].limit==0) {
62         *pInitialValue=checkRanges[i++].value;
63     } else {
64         *pInitialValue=0;
65     }
66     return i;
67 }
68 
69 /* ucptrie_enum() callback, modifies a value */
70 static uint32_t U_CALLCONV
testFilter(const void * context,uint32_t value)71 testFilter(const void *context, uint32_t value) {
72     (void)context; // suppress compiler warnings about unused variable
73     return value ^ 0x5555;
74 }
75 
76 static UBool
doCheckRange(const char * name,const char * variant,UChar32 start,UChar32 end,uint32_t value,UChar32 expEnd,uint32_t expValue)77 doCheckRange(const char *name, const char *variant,
78              UChar32 start, UChar32 end, uint32_t value,
79              UChar32 expEnd, uint32_t expValue) {
80     if (end < 0) {
81         if (expEnd >= 0) {
82             log_err("error: %s getRanges (%s) fails to deliver range [U+%04lx..U+%04lx].0x%lx\n",
83                     name, variant, (long)start, (long)expEnd, (long)expValue);
84         }
85         return false;
86     }
87     if (expEnd < 0) {
88         log_err("error: %s getRanges (%s) delivers unexpected range [U+%04lx..U+%04lx].0x%lx\n",
89                 name, variant, (long)start, (long)end, (long)value);
90         return false;
91     }
92     if (end != expEnd || value != expValue) {
93         log_err("error: %s getRanges (%s) delivers wrong range [U+%04lx..U+%04lx].0x%lx "
94                 "instead of [U+%04lx..U+%04lx].0x%lx\n",
95                 name, variant, (long)start, (long)end, (long)value,
96                 (long)start, (long)expEnd, (long)expValue);
97         return false;
98     }
99     return true;
100 }
101 
102 // Test iteration starting from various UTF-8/16 and trie structure boundaries.
103 // Also test starting partway through lead & trail surrogates for fixed-surrogate-value options,
104 // and partway through supplementary code points.
105 static UChar32 iterStarts[] = {
106     0, 0x7f, 0x80, 0x7ff, 0x800, 0xfff, 0x1000,
107     0xd7ff, 0xd800, 0xd888, 0xdddd, 0xdfff, 0xe000,
108     0xffff, 0x10000, 0x12345, 0x10ffff, 0x110000
109 };
110 
111 static void
testTrieGetRanges(const char * testName,const UCPTrie * trie,const UMutableCPTrie * mutableTrie,UCPMapRangeOption option,uint32_t surrValue,const CheckRange checkRanges[],int32_t countCheckRanges)112 testTrieGetRanges(const char *testName, const UCPTrie *trie, const UMutableCPTrie *mutableTrie,
113                   UCPMapRangeOption option, uint32_t surrValue,
114                   const CheckRange checkRanges[], int32_t countCheckRanges) {
115     const char *const typeName = trie == NULL ? "mutableTrie" : "trie";
116     const char *const optionName = option == UCPMAP_RANGE_NORMAL ? "normal" :
117         option == UCPMAP_RANGE_FIXED_LEAD_SURROGATES ? "fixedLeadSurr" : "fixedAllSurr";
118     char name[80];
119     int32_t s;
120     for (s = 0; s < UPRV_LENGTHOF(iterStarts); ++s) {
121         UChar32 start = iterStarts[s];
122         int32_t i, i0;
123         UChar32 end, expEnd;
124         uint32_t value, expValue;
125         // No need to go from each iteration start to the very end.
126         int32_t innerLoopCount;
127 
128         sprintf(name, "%s/%s(%s) min=U+%04lx", typeName, optionName, testName, (long)start);
129 
130         // Skip over special values and low ranges.
131         for (i = 0; i < countCheckRanges && checkRanges[i].limit <= start; ++i) {}
132         i0 = i;
133         // without value handler
134         for (innerLoopCount = 0;; ++i, start = end + 1) {
135             if (i < countCheckRanges) {
136                 expEnd = checkRanges[i].limit - 1;
137                 expValue = checkRanges[i].value;
138             } else {
139                 expEnd = -1;
140                 expValue = value = 0x5005;
141             }
142             end = trie != NULL ?
143                 ucptrie_getRange(trie, start, option, surrValue, NULL, NULL, &value) :
144                 umutablecptrie_getRange(mutableTrie, start, option, surrValue, NULL, NULL, &value);
145             if (!doCheckRange(name, "without value handler", start, end, value, expEnd, expValue)) {
146                 break;
147             }
148             if (s != 0 && ++innerLoopCount == 5) { break; }
149         }
150         // with value handler
151         for (i = i0, start = iterStarts[s], innerLoopCount = 0;; ++i, start = end + 1) {
152             if (i < countCheckRanges) {
153                 expEnd = checkRanges[i].limit - 1;
154                 expValue = checkRanges[i].value ^ 0x5555;
155             } else {
156                 expEnd = -1;
157                 expValue = value = 0x5005;
158             }
159             end = trie != NULL ?
160                 ucptrie_getRange(trie, start, option, surrValue ^ 0x5555, testFilter, NULL, &value) :
161                 umutablecptrie_getRange(mutableTrie, start, option, surrValue ^ 0x5555,
162                                         testFilter, NULL, &value);
163             if (!doCheckRange(name, "with value handler", start, end, value, expEnd, expValue)) {
164                 break;
165             }
166             if (s != 0 && ++innerLoopCount == 5) { break; }
167         }
168         // without value
169         for (i = i0, start = iterStarts[s], innerLoopCount = 0;; ++i, start = end + 1) {
170             if (i < countCheckRanges) {
171                 expEnd = checkRanges[i].limit - 1;
172             } else {
173                 expEnd = -1;
174             }
175             end = trie != NULL ?
176                 ucptrie_getRange(trie, start, option, surrValue, NULL, NULL, NULL) :
177                 umutablecptrie_getRange(mutableTrie, start, option, surrValue, NULL, NULL, NULL);
178             if (!doCheckRange(name, "without value", start, end, 0, expEnd, 0)) {
179                 break;
180             }
181             if (s != 0 && ++innerLoopCount == 5) { break; }
182         }
183     }
184 }
185 
186 static void
testTrieGetters(const char * testName,const UCPTrie * trie,UCPTrieType type,UCPTrieValueWidth valueWidth,const CheckRange checkRanges[],int32_t countCheckRanges)187 testTrieGetters(const char *testName, const UCPTrie *trie,
188                 UCPTrieType type, UCPTrieValueWidth valueWidth,
189                 const CheckRange checkRanges[], int32_t countCheckRanges) {
190     uint32_t initialValue, errorValue;
191     uint32_t value, value2;
192     UChar32 start, limit;
193     int32_t i, countSpecials;
194     int32_t countErrors=0;
195 
196     const char *const typeName = "trie";
197 
198     countSpecials=getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue);
199 
200     start=0;
201     for(i=countSpecials; i<countCheckRanges; ++i) {
202         limit=checkRanges[i].limit;
203         value=checkRanges[i].value;
204 
205         while(start<limit) {
206             if (start <= 0x7f) {
207                 if (valueWidth == UCPTRIE_VALUE_BITS_16) {
208                     value2 = UCPTRIE_ASCII_GET(trie, UCPTRIE_16, start);
209                 } else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
210                     value2 = UCPTRIE_ASCII_GET(trie, UCPTRIE_32, start);
211                 } else {
212                     value2 = UCPTRIE_ASCII_GET(trie, UCPTRIE_8, start);
213                 }
214                 if (value != value2) {
215                     log_err("error: %s(%s).fromASCII(U+%04lx)==0x%lx instead of 0x%lx\n",
216                             typeName, testName, (long)start, (long)value2, (long)value);
217                     ++countErrors;
218                 }
219             }
220             if (type == UCPTRIE_TYPE_FAST) {
221                 if(start<=0xffff) {
222                     if(valueWidth==UCPTRIE_VALUE_BITS_16) {
223                         value2=UCPTRIE_FAST_BMP_GET(trie, UCPTRIE_16, start);
224                     } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
225                         value2=UCPTRIE_FAST_BMP_GET(trie, UCPTRIE_32, start);
226                     } else {
227                         value2=UCPTRIE_FAST_BMP_GET(trie, UCPTRIE_8, start);
228                     }
229                     if(value!=value2) {
230                         log_err("error: %s(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n",
231                                 typeName, testName, (long)start, (long)value2, (long)value);
232                         ++countErrors;
233                     }
234                 } else {
235                     if(valueWidth==UCPTRIE_VALUE_BITS_16) {
236                         value2 = UCPTRIE_FAST_SUPP_GET(trie, UCPTRIE_16, start);
237                     } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
238                         value2 = UCPTRIE_FAST_SUPP_GET(trie, UCPTRIE_32, start);
239                     } else {
240                         value2 = UCPTRIE_FAST_SUPP_GET(trie, UCPTRIE_8, start);
241                     }
242                     if(value!=value2) {
243                         log_err("error: %s(%s).fromSupp(U+%04lx)==0x%lx instead of 0x%lx\n",
244                                 typeName, testName, (long)start, (long)value2, (long)value);
245                         ++countErrors;
246                     }
247                 }
248                 if(valueWidth==UCPTRIE_VALUE_BITS_16) {
249                     value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_16, start);
250                 } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
251                     value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_32, start);
252                 } else {
253                     value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_8, start);
254                 }
255             } else {
256                 if(valueWidth==UCPTRIE_VALUE_BITS_16) {
257                     value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_16, start);
258                 } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
259                     value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_32, start);
260                 } else {
261                     value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_8, start);
262                 }
263             }
264             if(value!=value2) {
265                 log_err("error: %s(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n",
266                         typeName, testName, (long)start, (long)value2, (long)value);
267                 ++countErrors;
268             }
269             value2=ucptrie_get(trie, start);
270             if(value!=value2) {
271                 log_err("error: %s(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n",
272                         typeName, testName, (long)start, (long)value2, (long)value);
273                 ++countErrors;
274             }
275             ++start;
276             if(countErrors>10) {
277                 return;
278             }
279         }
280     }
281 
282     /* test linear ASCII range from the data array pointer (access to "internal" field) */
283     start=0;
284     for(i=countSpecials; i<countCheckRanges && start<=0x7f; ++i) {
285         limit=checkRanges[i].limit;
286         value=checkRanges[i].value;
287 
288         while(start<limit && start<=0x7f) {
289             if(valueWidth==UCPTRIE_VALUE_BITS_16) {
290                 value2=trie->data.ptr16[start];
291             } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
292                 value2=trie->data.ptr32[start];
293             } else {
294                 value2=trie->data.ptr8[start];
295             }
296             if(value!=value2) {
297                 log_err("error: %s(%s).asciiData[U+%04lx]==0x%lx instead of 0x%lx\n",
298                         typeName, testName, (long)start, (long)value2, (long)value);
299                 ++countErrors;
300             }
301             ++start;
302             if(countErrors>10) {
303                 return;
304             }
305         }
306     }
307 
308     /* test errorValue */
309     if (type == UCPTRIE_TYPE_FAST) {
310         if(valueWidth==UCPTRIE_VALUE_BITS_16) {
311             value = UCPTRIE_FAST_GET(trie, UCPTRIE_16, -1);
312             value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_16, 0x110000);
313         } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
314             value = UCPTRIE_FAST_GET(trie, UCPTRIE_32, -1);
315             value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_32, 0x110000);
316         } else {
317             value = UCPTRIE_FAST_GET(trie, UCPTRIE_8, -1);
318             value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_8, 0x110000);
319         }
320     } else {
321         if(valueWidth==UCPTRIE_VALUE_BITS_16) {
322             value = UCPTRIE_SMALL_GET(trie, UCPTRIE_16, -1);
323             value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_16, 0x110000);
324         } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
325             value = UCPTRIE_SMALL_GET(trie, UCPTRIE_32, -1);
326             value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_32, 0x110000);
327         } else {
328             value = UCPTRIE_SMALL_GET(trie, UCPTRIE_8, -1);
329             value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_8, 0x110000);
330         }
331     }
332     if(value!=errorValue || value2!=errorValue) {
333         log_err("error: %s(%s).get(out of range) != errorValue\n",
334                 typeName, testName);
335     }
336     value=ucptrie_get(trie, -1);
337     value2=ucptrie_get(trie, 0x110000);
338     if(value!=errorValue || value2!=errorValue) {
339         log_err("error: %s(%s).get(out of range) != errorValue\n",
340                 typeName, testName);
341     }
342 }
343 
344 static void
testBuilderGetters(const char * testName,const UMutableCPTrie * mutableTrie,const CheckRange checkRanges[],int32_t countCheckRanges)345 testBuilderGetters(const char *testName, const UMutableCPTrie *mutableTrie,
346                    const CheckRange checkRanges[], int32_t countCheckRanges) {
347     uint32_t initialValue, errorValue;
348     uint32_t value, value2;
349     UChar32 start, limit;
350     int32_t i, countSpecials;
351     int32_t countErrors=0;
352 
353     const char *const typeName = "mutableTrie";
354 
355     countSpecials=getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue);
356 
357     start=0;
358     for(i=countSpecials; i<countCheckRanges; ++i) {
359         limit=checkRanges[i].limit;
360         value=checkRanges[i].value;
361 
362         while(start<limit) {
363             value2=umutablecptrie_get(mutableTrie, start);
364             if(value!=value2) {
365                 log_err("error: %s(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n",
366                         typeName, testName, (long)start, (long)value2, (long)value);
367                 ++countErrors;
368             }
369             ++start;
370             if(countErrors>10) {
371                 return;
372             }
373         }
374     }
375 
376     /* test errorValue */
377     value=umutablecptrie_get(mutableTrie, -1);
378     value2=umutablecptrie_get(mutableTrie, 0x110000);
379     if(value!=errorValue || value2!=errorValue) {
380         log_err("error: %s(%s).get(out of range) != errorValue\n",
381                 typeName, testName);
382     }
383 }
384 
385 #define ACCIDENTAL_SURROGATE_PAIR(s, length, cp) (length > 0 && U16_IS_LEAD(s[length-1]) && U_IS_TRAIL(cp))
386 
387 static void
testTrieUTF16(const char * testName,const UCPTrie * trie,UCPTrieValueWidth valueWidth,const CheckRange checkRanges[],int32_t countCheckRanges)388 testTrieUTF16(const char *testName,
389               const UCPTrie *trie, UCPTrieValueWidth valueWidth,
390               const CheckRange checkRanges[], int32_t countCheckRanges) {
391     UChar s[30000];
392     uint32_t values[16000];
393 
394     const UChar *p, *limit;
395 
396     uint32_t errorValue = ucptrie_get(trie, -1);
397     uint32_t value, expected;
398     UChar32 prevCP, c, c2;
399     int32_t i, length, sIndex, countValues;
400 
401     /* write a string */
402     prevCP=0;
403     length=countValues=0;
404     for(i=skipSpecialValues(checkRanges, countCheckRanges); i<countCheckRanges; ++i) {
405         value=checkRanges[i].value;
406         /* write three code points */
407         if(!ACCIDENTAL_SURROGATE_PAIR(s, length, prevCP)) {
408             U16_APPEND_UNSAFE(s, length, prevCP);   /* start of the range */
409             values[countValues++]=value;
410         }
411         U_ASSERT(length < UPRV_LENGTHOF(s) && countValues < UPRV_LENGTHOF(values));
412         c=checkRanges[i].limit;
413         prevCP=(prevCP+c)/2;                    /* middle of the range */
414         if(!ACCIDENTAL_SURROGATE_PAIR(s, length, prevCP)) {
415             U16_APPEND_UNSAFE(s, length, prevCP);
416             values[countValues++]=value;
417         }
418         prevCP=c;
419         --c;                                    /* end of the range */
420         if(!ACCIDENTAL_SURROGATE_PAIR(s, length, c)) {
421             U16_APPEND_UNSAFE(s, length, c);
422             values[countValues++]=value;
423         }
424     }
425     limit=s+length;
426     if(length>UPRV_LENGTHOF(s)) {
427         log_err("UTF-16 test string length %d > capacity %d\n", (int)length, (int)UPRV_LENGTHOF(s));
428         return;
429     }
430     if(countValues>UPRV_LENGTHOF(values)) {
431         log_err("UTF-16 test values length %d > capacity %d\n", (int)countValues, (int)UPRV_LENGTHOF(values));
432         return;
433     }
434 
435     /* try forward */
436     p=s;
437     i=0;
438     while(p<limit) {
439         sIndex=(int32_t)(p-s);
440         U16_NEXT(s, sIndex, length, c2);
441         c=0x33;
442         if(valueWidth==UCPTRIE_VALUE_BITS_16) {
443             UCPTRIE_FAST_U16_NEXT(trie, UCPTRIE_16, p, limit, c, value);
444         } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
445             UCPTRIE_FAST_U16_NEXT(trie, UCPTRIE_32, p, limit, c, value);
446         } else {
447             UCPTRIE_FAST_U16_NEXT(trie, UCPTRIE_8, p, limit, c, value);
448         }
449         expected = U_IS_SURROGATE(c) ? errorValue : values[i];
450         if(value!=expected) {
451             log_err("error: wrong value from UCPTRIE_NEXT(%s)(U+%04lx): 0x%lx instead of 0x%lx\n",
452                     testName, (long)c, (long)value, (long)expected);
453         }
454         if(c!=c2) {
455             log_err("error: wrong code point from UCPTRIE_NEXT(%s): U+%04lx != U+%04lx\n",
456                     testName, (long)c, (long)c2);
457             continue;
458         }
459         ++i;
460     }
461 
462     /* try backward */
463     p=limit;
464     i=countValues;
465     while(s<p) {
466         --i;
467         sIndex=(int32_t)(p-s);
468         U16_PREV(s, 0, sIndex, c2);
469         c=0x33;
470         if(valueWidth==UCPTRIE_VALUE_BITS_16) {
471             UCPTRIE_FAST_U16_PREV(trie, UCPTRIE_16, s, p, c, value);
472         } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
473             UCPTRIE_FAST_U16_PREV(trie, UCPTRIE_32, s, p, c, value);
474         } else {
475             UCPTRIE_FAST_U16_PREV(trie, UCPTRIE_8, s, p, c, value);
476         }
477         expected = U_IS_SURROGATE(c) ? errorValue : values[i];
478         if(value!=expected) {
479             log_err("error: wrong value from UCPTRIE_PREV(%s)(U+%04lx): 0x%lx instead of 0x%lx\n",
480                     testName, (long)c, (long)value, (long)expected);
481         }
482         if(c!=c2) {
483             log_err("error: wrong code point from UCPTRIE_PREV(%s): U+%04lx != U+%04lx\n",
484                     testName, c, c2);
485         }
486     }
487 }
488 
489 static void
testTrieUTF8(const char * testName,const UCPTrie * trie,UCPTrieValueWidth valueWidth,const CheckRange checkRanges[],int32_t countCheckRanges)490 testTrieUTF8(const char *testName,
491              const UCPTrie *trie, UCPTrieValueWidth valueWidth,
492              const CheckRange checkRanges[], int32_t countCheckRanges) {
493     // Note: The byte sequence comments refer to the original UTF-8 definition.
494     // Starting with ICU 60, any sequence that is not a prefix of a valid one
495     // is treated as multiple single-byte errors.
496     // For testing, we only rely on U8_... and UCPTrie UTF-8 macros
497     // iterating consistently.
498     static const uint8_t illegal[]={
499         0xc0, 0x80,                         /* non-shortest U+0000 */
500         0xc1, 0xbf,                         /* non-shortest U+007f */
501         0xc2,                               /* truncated */
502         0xe0, 0x90, 0x80,                   /* non-shortest U+0400 */
503         0xe0, 0xa0,                         /* truncated */
504         0xed, 0xa0, 0x80,                   /* lead surrogate U+d800 */
505         0xed, 0xbf, 0xbf,                   /* trail surrogate U+dfff */
506         0xf0, 0x8f, 0xbf, 0xbf,             /* non-shortest U+ffff */
507         0xf0, 0x90, 0x80,                   /* truncated */
508         0xf4, 0x90, 0x80, 0x80,             /* beyond-Unicode U+110000 */
509         0xf8, 0x80, 0x80, 0x80,             /* truncated */
510         0xf8, 0x80, 0x80, 0x80, 0x80,       /* 5-byte UTF-8 */
511         0xfd, 0xbf, 0xbf, 0xbf, 0xbf,       /* truncated */
512         0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, /* 6-byte UTF-8 */
513         0xfe,
514         0xff
515     };
516     uint8_t s[60000];
517     uint32_t values[16000];
518 
519     const uint8_t *p, *limit;
520 
521     uint32_t initialValue, errorValue;
522     uint32_t value, expectedBytes, actualBytes;
523     UChar32 prevCP, c;
524     int32_t i, countSpecials, length, countValues;
525     int32_t prev8, i8;
526 
527     countSpecials=getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue);
528 
529     /* write a string */
530     prevCP=0;
531     length=countValues=0;
532     /* first a couple of trail bytes in lead position */
533     s[length++]=0x80;
534     values[countValues++]=errorValue;
535     s[length++]=0xbf;
536     values[countValues++]=errorValue;
537     prev8=i8=0;
538     for(i=countSpecials; i<countCheckRanges; ++i) {
539         value=checkRanges[i].value;
540         /* write three legal (or surrogate) code points */
541         U8_APPEND_UNSAFE(s, length, prevCP);    /* start of the range */
542         if(U_IS_SURROGATE(prevCP)) {
543             // A surrogate byte sequence counts as 3 single-byte errors.
544             values[countValues++]=errorValue;
545             values[countValues++]=errorValue;
546             values[countValues++]=errorValue;
547         } else {
548             values[countValues++]=value;
549         }
550         U_ASSERT(length < UPRV_LENGTHOF(s) && countValues < UPRV_LENGTHOF(values));
551         c=checkRanges[i].limit;
552         prevCP=(prevCP+c)/2;                    /* middle of the range */
553         U8_APPEND_UNSAFE(s, length, prevCP);
554         if(U_IS_SURROGATE(prevCP)) {
555             // A surrogate byte sequence counts as 3 single-byte errors.
556             values[countValues++]=errorValue;
557             values[countValues++]=errorValue;
558             values[countValues++]=errorValue;
559         } else {
560             values[countValues++]=value;
561         }
562         prevCP=c;
563         --c;                                    /* end of the range */
564         U8_APPEND_UNSAFE(s, length, c);
565         if(U_IS_SURROGATE(c)) {
566             // A surrogate byte sequence counts as 3 single-byte errors.
567             values[countValues++]=errorValue;
568             values[countValues++]=errorValue;
569             values[countValues++]=errorValue;
570         } else {
571             values[countValues++]=value;
572         }
573         /* write an illegal byte sequence */
574         if(i8<(int32_t)sizeof(illegal)) {
575             U8_FWD_1(illegal, i8, sizeof(illegal));
576             while(prev8<i8) {
577                 s[length++]=illegal[prev8++];
578             }
579             values[countValues++]=errorValue;
580         }
581     }
582     /* write the remaining illegal byte sequences */
583     while(i8<(int32_t)sizeof(illegal)) {
584         U8_FWD_1(illegal, i8, sizeof(illegal));
585         while(prev8<i8) {
586             s[length++]=illegal[prev8++];
587         }
588         values[countValues++]=errorValue;
589     }
590     limit=s+length;
591     if(length>UPRV_LENGTHOF(s)) {
592         log_err("UTF-8 test string length %d > capacity %d\n", (int)length, (int)UPRV_LENGTHOF(s));
593         return;
594     }
595     if(countValues>UPRV_LENGTHOF(values)) {
596         log_err("UTF-8 test values length %d > capacity %d\n", (int)countValues, (int)UPRV_LENGTHOF(values));
597         return;
598     }
599 
600     /* try forward */
601     p=s;
602     i=0;
603     while(p<limit) {
604         prev8=i8=(int32_t)(p-s);
605         U8_NEXT(s, i8, length, c);
606         if(valueWidth==UCPTRIE_VALUE_BITS_16) {
607             UCPTRIE_FAST_U8_NEXT(trie, UCPTRIE_16, p, limit, value);
608         } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
609             UCPTRIE_FAST_U8_NEXT(trie, UCPTRIE_32, p, limit, value);
610         } else {
611             UCPTRIE_FAST_U8_NEXT(trie, UCPTRIE_8, p, limit, value);
612         }
613         expectedBytes=0;
614         if(value!=values[i] || i8!=(p-s)) {
615             int32_t k=prev8;
616             while(k<i8) {
617                 expectedBytes=(expectedBytes<<8)|s[k++];
618             }
619         }
620         if(i8==(p-s)) {
621             actualBytes=expectedBytes;
622         } else {
623             actualBytes=0;
624             int32_t k=prev8;
625             while(k<(p-s)) {
626                 actualBytes=(actualBytes<<8)|s[k++];
627             }
628         }
629         if(value!=values[i]) {
630             log_err("error: wrong value from UCPTRIE_FAST_U8_NEXT(%s)(from %d %lx->U+%04lx) (read %d bytes): "
631                     "0x%lx instead of 0x%lx (from bytes %lx)\n",
632                     testName, (int)prev8, (unsigned long)actualBytes, (long)c, (int)((p-s)-prev8),
633                     (long)value, (long)values[i], (unsigned long)expectedBytes);
634         }
635         if(i8!=(p-s)) {
636             log_err("error: wrong end index from UCPTRIE_FAST_U8_NEXT(%s)(from %d %lx->U+%04lx): "
637                     "%ld != %ld (bytes %lx)\n",
638                     testName, (int)prev8, (unsigned long)actualBytes, (long)c,
639                     (long)(p-s), (long)i8, (unsigned long)expectedBytes);
640             break;
641         }
642         ++i;
643     }
644 
645     /* try backward */
646     p=limit;
647     i=countValues;
648     while(s<p) {
649         --i;
650         prev8=i8=(int32_t)(p-s);
651         U8_PREV(s, 0, i8, c);
652         if(valueWidth==UCPTRIE_VALUE_BITS_16) {
653             UCPTRIE_FAST_U8_PREV(trie, UCPTRIE_16, s, p, value);
654         } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
655             UCPTRIE_FAST_U8_PREV(trie, UCPTRIE_32, s, p, value);
656         } else {
657             UCPTRIE_FAST_U8_PREV(trie, UCPTRIE_8, s, p, value);
658         }
659         expectedBytes=0;
660         if(value!=values[i] || i8!=(p-s)) {
661             int32_t k=i8;
662             while(k<prev8) {
663                 expectedBytes=(expectedBytes<<8)|s[k++];
664             }
665         }
666         if(i8==(p-s)) {
667             actualBytes=expectedBytes;
668         } else {
669             actualBytes=0;
670             int32_t k=(int32_t)(p-s);
671             while(k<prev8) {
672                 actualBytes=(actualBytes<<8)|s[k++];
673             }
674         }
675         if(value!=values[i]) {
676             log_err("error: wrong value from UCPTRIE_FAST_U8_PREV(%s)(from %d %lx->U+%04lx) (read %d bytes): "
677                     "0x%lx instead of 0x%lx (from bytes %lx)\n",
678                     testName, (int)prev8, (unsigned long)actualBytes, (long)c, (int)(prev8-(p-s)),
679                     (long)value, (long)values[i], (unsigned long)expectedBytes);
680         }
681         if(i8!=(p-s)) {
682             log_err("error: wrong end index from UCPTRIE_FAST_U8_PREV(%s)(from %d %lx->U+%04lx): "
683                     "%ld != %ld (bytes %lx)\n",
684                     testName, (int)prev8, (unsigned long)actualBytes, (long)c,
685                     (long)(p-s), (long)i8, (unsigned long)expectedBytes);
686             break;
687         }
688     }
689 }
690 
691 static void
testTrie(const char * testName,const UCPTrie * trie,UCPTrieType type,UCPTrieValueWidth valueWidth,const CheckRange checkRanges[],int32_t countCheckRanges)692 testTrie(const char *testName, const UCPTrie *trie,
693          UCPTrieType type, UCPTrieValueWidth valueWidth,
694          const CheckRange checkRanges[], int32_t countCheckRanges) {
695     testTrieGetters(testName, trie, type, valueWidth, checkRanges, countCheckRanges);
696     testTrieGetRanges(testName, trie, NULL, UCPMAP_RANGE_NORMAL, 0, checkRanges, countCheckRanges);
697     if (type == UCPTRIE_TYPE_FAST) {
698         testTrieUTF16(testName, trie, valueWidth, checkRanges, countCheckRanges);
699         testTrieUTF8(testName, trie, valueWidth, checkRanges, countCheckRanges);
700     }
701 }
702 
703 static void
testBuilder(const char * testName,const UMutableCPTrie * mutableTrie,const CheckRange checkRanges[],int32_t countCheckRanges)704 testBuilder(const char *testName, const UMutableCPTrie *mutableTrie,
705             const CheckRange checkRanges[], int32_t countCheckRanges) {
706     testBuilderGetters(testName, mutableTrie, checkRanges, countCheckRanges);
707     testTrieGetRanges(testName, NULL, mutableTrie, UCPMAP_RANGE_NORMAL, 0, checkRanges, countCheckRanges);
708 }
709 
710 static void
trieTestGolden(const char * testName,const UCPTrie * trie,const CheckRange checkRanges[],int32_t countCheckRanges)711 trieTestGolden(const char *testName,
712              const UCPTrie* trie,
713              const CheckRange checkRanges[],
714              int32_t countCheckRanges) {
715     log_verbose("golden testing Trie '%s'\n", testName);
716 
717     UErrorCode status = U_ZERO_ERROR;
718     const char *testdatapath = loadSourceTestData(&status);
719     char goldendatapath[512];
720     // note: snprintf always writes a NUL terminator.
721     snprintf(goldendatapath, 512, "%scodepointtrie%s%s.toml",
722         testdatapath, U_FILE_SEP_STRING, testName);
723 
724     // Write the data into a tmpfile (memstream is not portable)
725     FILE* stream = tmpfile();
726     usrc_writeCopyrightHeader(stream, "#", 2021);
727     usrc_writeFileNameGeneratedBy(stream, "#", testName, "ucptrietest.c");
728     fputs("[code_point_trie.struct]\n", stream);
729     fprintf(stream, "name = \"%s\"\n", testName);
730     usrc_writeUCPTrie(stream, testName, trie, UPRV_TARGET_SYNTAX_TOML);
731     fputs("\n[code_point_trie.testdata]\n", stream);
732     fputs("# Array of (limit, value) pairs\n", stream);
733     usrc_writeArray(stream, "checkRanges = [\n  ",
734         // Note: CheckRange is a tuple of two 32-bit words
735         checkRanges, 32, countCheckRanges*2,
736         "  ", "\n]\n");
737 
738     // Convert the stream into a memory buffer
739     long fsize = ftell(stream);
740     void* memoryBuffer = malloc(fsize + 1);
741     if (memoryBuffer == NULL) {
742         status = U_MEMORY_ALLOCATION_ERROR;
743         log_err_status(status, "Could not allocate buffer: %s", goldendatapath);
744         goto cleanup;
745     }
746     fseek(stream, 0, SEEK_SET);
747     fread(memoryBuffer, 1, fsize, stream);
748 
749     int32_t testResult = uprv_compareGoldenFiles(
750         memoryBuffer, fsize,
751         goldendatapath,
752         getTestOption(WRITE_GOLDEN_DATA_OPTION));
753 
754     if (testResult >= 0) {
755         log_err(
756             "Golden files for '%s' differ at index %d; "
757             "run cintltst with -G to write new goldens",
758             testName, testResult);
759     }
760 
761 cleanup:
762     fclose(stream);
763     free(memoryBuffer);
764 }
765 
766 static uint32_t storage[120000];
767 static uint32_t swapped[120000];
768 
769 static void
testTrieSerialize(const char * testName,UMutableCPTrie * mutableTrie,UCPTrieType type,UCPTrieValueWidth valueWidth,UBool withSwap,const CheckRange checkRanges[],int32_t countCheckRanges)770 testTrieSerialize(const char *testName, UMutableCPTrie *mutableTrie,
771                   UCPTrieType type, UCPTrieValueWidth valueWidth, UBool withSwap,
772                   const CheckRange checkRanges[], int32_t countCheckRanges) {
773     UCPTrie *trie;
774     int32_t length1, length2, length3;
775     UErrorCode errorCode;
776 
777     /* clone the trie so that the caller can reuse the original */
778     errorCode=U_ZERO_ERROR;
779     mutableTrie = umutablecptrie_clone(mutableTrie, &errorCode);
780     if(U_FAILURE(errorCode)) {
781         log_err("error: umutablecptrie_clone(%s) failed - %s\n",
782                 testName, u_errorName(errorCode));
783         return;
784     }
785 
786     /*
787      * This is not a loop, but simply a block that we can exit with "break"
788      * when something goes wrong.
789      */
790     do {
791         errorCode=U_ZERO_ERROR;
792         trie = umutablecptrie_buildImmutable(mutableTrie, type, valueWidth, &errorCode);
793         if (U_FAILURE(errorCode)) {
794             log_err("error: umutablecptrie_buildImmutable(%s) failed: %s\n",
795                     testName, u_errorName(errorCode));
796             break;
797         }
798         errorCode=U_ZERO_ERROR;
799         length1=ucptrie_toBinary(trie, NULL, 0, &errorCode);
800         if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
801             log_err("error: ucptrie_toBinary(%s) preflighting set %s != U_BUFFER_OVERFLOW_ERROR\n",
802                     testName, u_errorName(errorCode));
803             break;
804         }
805         errorCode=U_ZERO_ERROR;
806         length2=ucptrie_toBinary(trie, storage, sizeof(storage), &errorCode);
807         if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
808             log_err("error: ucptrie_toBinary(%s) needs more memory\n", testName);
809             break;
810         }
811         if(U_FAILURE(errorCode)) {
812             log_err("error: ucptrie_toBinary(%s) failed: %s\n", testName, u_errorName(errorCode));
813             break;
814         }
815         if(length1!=length2) {
816             log_err("error: trie serialization (%s) lengths different: "
817                     "preflight vs. serialize\n", testName);
818             break;
819         }
820 
821         testTrie(testName, trie, type, valueWidth, checkRanges, countCheckRanges);
822 
823         // Compare the tries with golden data, also used for ICU4X
824         // Don't print out cloning tests (redundant data)
825         // Don't print out stress tests (file size too large)
826         // Omit some of the short-all-same tests (~25KB apiece)
827         if (!withSwap &&
828                 uprv_strncmp("many-", testName, 5) != 0 &&
829                 uprv_strncmp("much-", testName, 5) != 0 &&
830                 uprv_strncmp("short-all-same.16", testName, 17) != 0 &&
831                 uprv_strncmp("short-all-same.32", testName, 17) != 0) {
832             trieTestGolden(testName, trie, checkRanges, countCheckRanges);
833         }
834 
835         ucptrie_close(trie);
836         trie=NULL;
837 
838         if(withSwap) {
839             int32_t swappedLength;
840 
841             UDataSwapper *ds;
842 
843             /* swap to opposite-endian */
844             uprv_memset(swapped, 0x55, length2);
845             ds=udata_openSwapper(U_IS_BIG_ENDIAN, U_CHARSET_FAMILY,
846                                  !U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
847             swappedLength=ucptrie_swap(ds, storage, -1, NULL, &errorCode);
848             if(U_FAILURE(errorCode) || swappedLength!=length2) {
849                 log_err("error: ucptrie_swap(%s to OE preflighting) failed (%s) "
850                         "or before/after lengths different\n",
851                         testName, u_errorName(errorCode));
852                 udata_closeSwapper(ds);
853                 break;
854             }
855             swappedLength=ucptrie_swap(ds, storage, length2, swapped, &errorCode);
856             udata_closeSwapper(ds);
857             if(U_FAILURE(errorCode) || swappedLength!=length2) {
858                 log_err("error: ucptrie_swap(%s to OE) failed (%s) or before/after lengths different\n",
859                         testName, u_errorName(errorCode));
860                 break;
861             }
862 
863             /* swap back to platform-endian */
864             uprv_memset(storage, 0xaa, length2);
865             ds=udata_openSwapper(!U_IS_BIG_ENDIAN, U_CHARSET_FAMILY,
866                                  U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
867             swappedLength=ucptrie_swap(ds, swapped, -1, NULL, &errorCode);
868             if(U_FAILURE(errorCode) || swappedLength!=length2) {
869                 log_err("error: ucptrie_swap(%s to PE preflighting) failed (%s) "
870                         "or before/after lengths different\n",
871                         testName, u_errorName(errorCode));
872                 udata_closeSwapper(ds);
873                 break;
874             }
875             swappedLength=ucptrie_swap(ds, swapped, length2, storage, &errorCode);
876             udata_closeSwapper(ds);
877             if(U_FAILURE(errorCode) || swappedLength!=length2) {
878                 log_err("error: ucptrie_swap(%s to PE) failed (%s) or before/after lengths different\n",
879                         testName, u_errorName(errorCode));
880                 break;
881             }
882         }
883 
884         trie = ucptrie_openFromBinary(type, valueWidth, storage, length2, &length3, &errorCode);
885         if(U_FAILURE(errorCode)) {
886             log_err("error: ucptrie_openFromBinary(%s) failed, %s\n", testName, u_errorName(errorCode));
887             break;
888         }
889         if(type != ucptrie_getType(trie)) {
890             log_err("error: trie serialization (%s) did not preserve trie type\n", testName);
891             break;
892         }
893         if(valueWidth != ucptrie_getValueWidth(trie)) {
894             log_err("error: trie serialization (%s) did not preserve data value width\n", testName);
895             break;
896         }
897         if(length2!=length3) {
898             log_err("error: trie serialization (%s) lengths different: "
899                     "serialize vs. unserialize\n", testName);
900             break;
901         }
902         /* overwrite the storage that is not supposed to be needed */
903         uprv_memset((char *)storage+length3, 0xfa, (int32_t)(sizeof(storage)-length3));
904 
905         {
906             errorCode=U_ZERO_ERROR;
907             UCPTrie *any = ucptrie_openFromBinary(UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY,
908                                                   storage, length3, NULL, &errorCode);
909             if (U_SUCCESS(errorCode)) {
910                 if (type != ucptrie_getType(any)) {
911                     log_err("error: ucptrie_openFromBinary("
912                             "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY).getType() wrong\n");
913                 }
914                 if (valueWidth != ucptrie_getValueWidth(any)) {
915                     log_err("error: ucptrie_openFromBinary("
916                             "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY).getValueWidth() wrong\n");
917                 }
918                 ucptrie_close(any);
919             } else {
920                 log_err("error: ucptrie_openFromBinary("
921                         "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY) failed - %s\n",
922                         u_errorName(errorCode));
923             }
924         }
925 
926         errorCode=U_ZERO_ERROR;
927         testTrie(testName, trie, type, valueWidth, checkRanges, countCheckRanges);
928         {
929             /* make a mutable trie from an immutable one */
930             uint32_t value, value2;
931             UMutableCPTrie *mutable2 = umutablecptrie_fromUCPTrie(trie, &errorCode);
932             if(U_FAILURE(errorCode)) {
933                 log_err("error: umutablecptrie_fromUCPTrie(unserialized %s) failed - %s\n",
934                         testName, u_errorName(errorCode));
935                 break;
936             }
937 
938             value=umutablecptrie_get(mutable2, 0xa1);
939             umutablecptrie_set(mutable2, 0xa1, 789, &errorCode);
940             value2=umutablecptrie_get(mutable2, 0xa1);
941             umutablecptrie_set(mutable2, 0xa1, value, &errorCode);
942             if(U_FAILURE(errorCode) || value2!=789) {
943                 log_err("error: modifying a mutableTrie-from-UCPTrie (%s) failed - %s\n",
944                         testName, u_errorName(errorCode));
945             }
946             testBuilder(testName, mutable2, checkRanges, countCheckRanges);
947             umutablecptrie_close(mutable2);
948         }
949     } while(0);
950 
951     umutablecptrie_close(mutableTrie);
952     ucptrie_close(trie);
953 }
954 
955 static UMutableCPTrie *
testTrieSerializeAllValueWidth(const char * testName,UMutableCPTrie * mutableTrie,UBool withClone,const CheckRange checkRanges[],int32_t countCheckRanges)956 testTrieSerializeAllValueWidth(const char *testName,
957                                UMutableCPTrie *mutableTrie, UBool withClone,
958                                const CheckRange checkRanges[], int32_t countCheckRanges) {
959     char name[40];
960     uint32_t oredValues = 0;
961     int32_t i;
962     for (i = 0; i < countCheckRanges; ++i) {
963         oredValues |= checkRanges[i].value;
964     }
965 
966     testBuilder(testName, mutableTrie, checkRanges, countCheckRanges);
967 
968     if (oredValues <= 0xffff) {
969         uprv_strcpy(name, testName);
970         uprv_strcat(name, ".16");
971         testTrieSerialize(name, mutableTrie,
972                           UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, withClone,
973                           checkRanges, countCheckRanges);
974     }
975 
976     uprv_strcpy(name, testName);
977     uprv_strcat(name, ".32");
978     testTrieSerialize(name, mutableTrie,
979                       UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_32, withClone,
980                       checkRanges, countCheckRanges);
981 
982     if (oredValues <= 0xff) {
983         uprv_strcpy(name, testName);
984         uprv_strcat(name, ".8");
985         testTrieSerialize(name, mutableTrie,
986                           UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_8, withClone,
987                           checkRanges, countCheckRanges);
988     }
989 
990     if (oredValues <= 0xffff) {
991         uprv_strcpy(name, testName);
992         uprv_strcat(name, ".small16");
993         testTrieSerialize(name, mutableTrie,
994                           UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_16, withClone,
995                           checkRanges, countCheckRanges);
996     }
997 
998     return mutableTrie;
999 }
1000 
1001 static UMutableCPTrie *
makeTrieWithRanges(const char * testName,UBool withClone,const SetRange setRanges[],int32_t countSetRanges,uint32_t initialValue,uint32_t errorValue)1002 makeTrieWithRanges(const char *testName, UBool withClone,
1003                    const SetRange setRanges[], int32_t countSetRanges,
1004                    uint32_t initialValue, uint32_t errorValue) {
1005     UMutableCPTrie *mutableTrie;
1006     uint32_t value;
1007     UChar32 start, limit;
1008     int32_t i;
1009     UErrorCode errorCode;
1010 
1011     log_verbose("testing Trie '%s'\n", testName);
1012     errorCode=U_ZERO_ERROR;
1013     mutableTrie = umutablecptrie_open(initialValue, errorValue, &errorCode);
1014     if(U_FAILURE(errorCode)) {
1015         log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode));
1016         return NULL;
1017     }
1018 
1019     /* set values from setRanges[] */
1020     for(i=0; i<countSetRanges; ++i) {
1021         if(withClone && i==countSetRanges/2) {
1022             /* switch to a clone in the middle of setting values */
1023             UMutableCPTrie *clone = umutablecptrie_clone(mutableTrie, &errorCode);
1024             if(U_FAILURE(errorCode)) {
1025                 log_err("error: umutablecptrie_clone(%s) failed - %s\n",
1026                         testName, u_errorName(errorCode));
1027                 errorCode=U_ZERO_ERROR;  /* continue with the original */
1028             } else {
1029                 umutablecptrie_close(mutableTrie);
1030                 mutableTrie = clone;
1031             }
1032         }
1033         start=setRanges[i].start;
1034         limit=setRanges[i].limit;
1035         value=setRanges[i].value;
1036         if ((limit - start) == 1) {
1037             umutablecptrie_set(mutableTrie, start, value, &errorCode);
1038         } else {
1039             umutablecptrie_setRange(mutableTrie, start, limit-1, value, &errorCode);
1040         }
1041     }
1042 
1043     if(U_SUCCESS(errorCode)) {
1044         return mutableTrie;
1045     } else {
1046         log_err("error: setting values into a mutable trie (%s) failed - %s\n",
1047                 testName, u_errorName(errorCode));
1048         umutablecptrie_close(mutableTrie);
1049         return NULL;
1050     }
1051 }
1052 
1053 static void
testTrieRanges(const char * testName,UBool withClone,const SetRange setRanges[],int32_t countSetRanges,const CheckRange checkRanges[],int32_t countCheckRanges)1054 testTrieRanges(const char *testName, UBool withClone,
1055                const SetRange setRanges[], int32_t countSetRanges,
1056                const CheckRange checkRanges[], int32_t countCheckRanges) {
1057     uint32_t initialValue, errorValue;
1058     getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue);
1059     UMutableCPTrie *mutableTrie = makeTrieWithRanges(
1060         testName, withClone, setRanges, countSetRanges, initialValue, errorValue);
1061     if (mutableTrie != NULL) {
1062         mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, withClone,
1063                                                      checkRanges, countCheckRanges);
1064         umutablecptrie_close(mutableTrie);
1065     }
1066 }
1067 
1068 /* test data ----------------------------------------------------------------*/
1069 
1070 /* set consecutive ranges, even with value 0 */
1071 static const SetRange
1072 setRanges1[]={
1073     { 0,        0x40,     0    },
1074     { 0x40,     0xe7,     0x34 },
1075     { 0xe7,     0x3400,   0    },
1076     { 0x3400,   0x9fa6,   0x61 },
1077     { 0x9fa6,   0xda9e,   0x31 },
1078     { 0xdada,   0xeeee,   0xff },
1079     { 0xeeee,   0x11111,  1    },
1080     { 0x11111,  0x44444,  0x61 },
1081     { 0x44444,  0x60003,  0    },
1082     { 0xf0003,  0xf0004,  0xf  },
1083     { 0xf0004,  0xf0006,  0x10 },
1084     { 0xf0006,  0xf0007,  0x11 },
1085     { 0xf0007,  0xf0040,  0x12 },
1086     { 0xf0040,  0x110000, 0    }
1087 };
1088 
1089 static const CheckRange
1090 checkRanges1[]={
1091     { 0,        0 },
1092     { 0x40,     0 },
1093     { 0xe7,     0x34 },
1094     { 0x3400,   0 },
1095     { 0x9fa6,   0x61 },
1096     { 0xda9e,   0x31 },
1097     { 0xdada,   0 },
1098     { 0xeeee,   0xff },
1099     { 0x11111,  1 },
1100     { 0x44444,  0x61 },
1101     { 0xf0003,  0 },
1102     { 0xf0004,  0xf },
1103     { 0xf0006,  0x10 },
1104     { 0xf0007,  0x11 },
1105     { 0xf0040,  0x12 },
1106     { 0x110000, 0 }
1107 };
1108 
1109 /* set some interesting overlapping ranges */
1110 static const SetRange
1111 setRanges2[]={
1112     { 0x21,     0x7f,     0x5555 },
1113     { 0x2f800,  0x2fedc,  0x7a   },
1114     { 0x72,     0xdd,     3      },
1115     { 0xdd,     0xde,     4      },
1116     { 0x201,    0x240,    6      },  /* 3 consecutive blocks with the same pattern but */
1117     { 0x241,    0x280,    6      },  /* discontiguous value ranges, testing iteration */
1118     { 0x281,    0x2c0,    6      },
1119     { 0x2f987,  0x2fa98,  5      },
1120     { 0x2f777,  0x2f883,  0      },
1121     { 0x2fedc,  0x2ffaa,  1      },
1122     { 0x2ffaa,  0x2ffab,  2      },
1123     { 0x2ffbb,  0x2ffc0,  7      }
1124 };
1125 
1126 static const CheckRange
1127 checkRanges2[]={
1128     { 0,        0 },
1129     { 0x21,     0 },
1130     { 0x72,     0x5555 },
1131     { 0xdd,     3 },
1132     { 0xde,     4 },
1133     { 0x201,    0 },
1134     { 0x240,    6 },
1135     { 0x241,    0 },
1136     { 0x280,    6 },
1137     { 0x281,    0 },
1138     { 0x2c0,    6 },
1139     { 0x2f883,  0 },
1140     { 0x2f987,  0x7a },
1141     { 0x2fa98,  5 },
1142     { 0x2fedc,  0x7a },
1143     { 0x2ffaa,  1 },
1144     { 0x2ffab,  2 },
1145     { 0x2ffbb,  0 },
1146     { 0x2ffc0,  7 },
1147     { 0x110000, 0 }
1148 };
1149 
1150 /* use a non-zero initial value */
1151 static const SetRange
1152 setRanges3[]={
1153     { 0x31,     0xa4,     1 },
1154     { 0x3400,   0x6789,   2 },
1155     { 0x8000,   0x89ab,   9 },
1156     { 0x9000,   0xa000,   4 },
1157     { 0xabcd,   0xbcde,   3 },
1158     { 0x55555,  0x110000, 6 },  /* highStart<U+ffff with non-initialValue */
1159     { 0xcccc,   0x55555,  6 }
1160 };
1161 
1162 static const CheckRange
1163 checkRanges3[]={
1164     { 0,        9 },  /* non-zero initialValue */
1165     { 0x31,     9 },
1166     { 0xa4,     1 },
1167     { 0x3400,   9 },
1168     { 0x6789,   2 },
1169     { 0x9000,   9 },
1170     { 0xa000,   4 },
1171     { 0xabcd,   9 },
1172     { 0xbcde,   3 },
1173     { 0xcccc,   9 },
1174     { 0x110000, 6 }
1175 };
1176 
1177 /* empty or single-value tries, testing highStart==0 */
1178 static const SetRange
1179 setRangesEmpty[]={
1180     { 0,        0,        0 },  /* need some values for it to compile */
1181 };
1182 
1183 static const CheckRange
1184 checkRangesEmpty[]={
1185     { 0,        3 },
1186     { 0x110000, 3 }
1187 };
1188 
1189 static const SetRange
1190 setRangesSingleValue[]={
1191     { 0,        0x110000, 5 },
1192 };
1193 
1194 static const CheckRange
1195 checkRangesSingleValue[]={
1196     { 0,        3 },
1197     { 0x110000, 5 }
1198 };
1199 
1200 static void
TrieTestSet1(void)1201 TrieTestSet1(void) {
1202     testTrieRanges("set1", false,
1203         setRanges1, UPRV_LENGTHOF(setRanges1),
1204         checkRanges1, UPRV_LENGTHOF(checkRanges1));
1205 }
1206 
1207 static void
TrieTestSet2Overlap(void)1208 TrieTestSet2Overlap(void) {
1209     testTrieRanges("set2-overlap", false,
1210         setRanges2, UPRV_LENGTHOF(setRanges2),
1211         checkRanges2, UPRV_LENGTHOF(checkRanges2));
1212 }
1213 
1214 static void
TrieTestSet3Initial9(void)1215 TrieTestSet3Initial9(void) {
1216     testTrieRanges("set3-initial-9", false,
1217         setRanges3, UPRV_LENGTHOF(setRanges3),
1218         checkRanges3, UPRV_LENGTHOF(checkRanges3));
1219     testTrieRanges("set3-initial-9-clone", true,
1220         setRanges3, UPRV_LENGTHOF(setRanges3),
1221         checkRanges3, UPRV_LENGTHOF(checkRanges3));
1222 }
1223 
1224 static void
TrieTestSetEmpty(void)1225 TrieTestSetEmpty(void) {
1226     testTrieRanges("set-empty", false,
1227         setRangesEmpty, 0,
1228         checkRangesEmpty, UPRV_LENGTHOF(checkRangesEmpty));
1229 }
1230 
1231 static void
TrieTestSetSingleValue(void)1232 TrieTestSetSingleValue(void) {
1233     testTrieRanges("set-single-value", false,
1234         setRangesSingleValue, UPRV_LENGTHOF(setRangesSingleValue),
1235         checkRangesSingleValue, UPRV_LENGTHOF(checkRangesSingleValue));
1236 }
1237 
1238 static void
TrieTestSet2OverlapWithClone(void)1239 TrieTestSet2OverlapWithClone(void) {
1240     testTrieRanges("set2-overlap.withClone", true,
1241         setRanges2, UPRV_LENGTHOF(setRanges2),
1242         checkRanges2, UPRV_LENGTHOF(checkRanges2));
1243 }
1244 
1245 /* test mutable-trie memory management -------------------------------------- */
1246 
1247 static void
FreeBlocksTest(void)1248 FreeBlocksTest(void) {
1249     static const CheckRange
1250     checkRanges[]={
1251         { 0,        1 },
1252         { 0x740,    1 },
1253         { 0x780,    2 },
1254         { 0x880,    3 },
1255         { 0x110000, 1 }
1256     };
1257     static const char *const testName="free-blocks";
1258 
1259     UMutableCPTrie *mutableTrie;
1260     int32_t i;
1261     UErrorCode errorCode;
1262 
1263     errorCode=U_ZERO_ERROR;
1264     mutableTrie=umutablecptrie_open(1, 0xad, &errorCode);
1265     if(U_FAILURE(errorCode)) {
1266         log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode));
1267         return;
1268     }
1269 
1270     /*
1271      * Repeatedly set overlapping same-value ranges to stress the free-data-block management.
1272      * If it fails, it will overflow the data array.
1273      */
1274     for(i=0; i<(0x120000>>4)/2; ++i) {  // 4=UCPTRIE_SHIFT_3
1275         umutablecptrie_setRange(mutableTrie, 0x740, 0x840-1, 1, &errorCode);
1276         umutablecptrie_setRange(mutableTrie, 0x780, 0x880-1, 1, &errorCode);
1277         umutablecptrie_setRange(mutableTrie, 0x740, 0x840-1, 2, &errorCode);
1278         umutablecptrie_setRange(mutableTrie, 0x780, 0x880-1, 3, &errorCode);
1279     }
1280     /* make blocks that will be free during compaction */
1281     umutablecptrie_setRange(mutableTrie, 0x1000, 0x3000-1, 2, &errorCode);
1282     umutablecptrie_setRange(mutableTrie, 0x2000, 0x4000-1, 3, &errorCode);
1283     umutablecptrie_setRange(mutableTrie, 0x1000, 0x4000-1, 1, &errorCode);
1284     if(U_FAILURE(errorCode)) {
1285         log_err("error: setting lots of ranges into a mutable trie (%s) failed - %s\n",
1286                 testName, u_errorName(errorCode));
1287         umutablecptrie_close(mutableTrie);
1288         return;
1289     }
1290 
1291     mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, false,
1292                                                  checkRanges, UPRV_LENGTHOF(checkRanges));
1293     umutablecptrie_close(mutableTrie);
1294 }
1295 
1296 static void
GrowDataArrayTest(void)1297 GrowDataArrayTest(void) {
1298     static const CheckRange
1299     checkRanges[]={
1300         { 0,        1 },
1301         { 0x720,    2 },
1302         { 0x7a0,    3 },
1303         { 0x8a0,    4 },
1304         { 0x110000, 5 }
1305     };
1306     static const char *const testName="grow-data";
1307 
1308     UMutableCPTrie *mutableTrie;
1309     int32_t i;
1310     UErrorCode errorCode;
1311 
1312     errorCode=U_ZERO_ERROR;
1313     mutableTrie=umutablecptrie_open(1, 0xad, &errorCode);
1314     if(U_FAILURE(errorCode)) {
1315         log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode));
1316         return;
1317     }
1318 
1319     /*
1320      * Use umutablecptrie_set() not umutablecptrie_setRange() to write non-initialValue-data.
1321      * Should grow/reallocate the data array to a sufficient length.
1322      */
1323     for(i=0; i<0x1000; ++i) {
1324         umutablecptrie_set(mutableTrie, i, 2, &errorCode);
1325     }
1326     for(i=0x720; i<0x1100; ++i) { /* some overlap */
1327         umutablecptrie_set(mutableTrie, i, 3, &errorCode);
1328     }
1329     for(i=0x7a0; i<0x900; ++i) {
1330         umutablecptrie_set(mutableTrie, i, 4, &errorCode);
1331     }
1332     for(i=0x8a0; i<0x110000; ++i) {
1333         umutablecptrie_set(mutableTrie, i, 5, &errorCode);
1334     }
1335     if(U_FAILURE(errorCode)) {
1336         log_err("error: setting lots of values into a mutable trie (%s) failed - %s\n",
1337                 testName, u_errorName(errorCode));
1338         umutablecptrie_close(mutableTrie);
1339         return;
1340     }
1341 
1342     mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, false,
1343                                                  checkRanges, UPRV_LENGTHOF(checkRanges));
1344     umutablecptrie_close(mutableTrie);
1345 }
1346 
1347 static void
ManyAllSameBlocksTest(void)1348 ManyAllSameBlocksTest(void) {
1349     static const char *const testName="many-all-same";
1350 
1351     UMutableCPTrie *mutableTrie;
1352     int32_t i;
1353     UErrorCode errorCode;
1354     CheckRange checkRanges[(0x110000 >> 12) + 1];
1355 
1356     errorCode = U_ZERO_ERROR;
1357     mutableTrie = umutablecptrie_open(0xff33, 0xad, &errorCode);
1358     if (U_FAILURE(errorCode)) {
1359         log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode));
1360         return;
1361     }
1362     checkRanges[0].limit = 0;
1363     checkRanges[0].value = 0xff33;  // initialValue
1364 
1365     // Many all-same-value blocks.
1366     for (i = 0; i < 0x110000; i += 0x1000) {
1367         uint32_t value = i >> 12;
1368         umutablecptrie_setRange(mutableTrie, i, i + 0xfff, value, &errorCode);
1369         checkRanges[value + 1].limit = i + 0x1000;
1370         checkRanges[value + 1].value = value;
1371     }
1372     for (i = 0; i < 0x110000; i += 0x1000) {
1373         uint32_t expected = i >> 12;
1374         uint32_t v0 = umutablecptrie_get(mutableTrie, i);
1375         uint32_t vfff = umutablecptrie_get(mutableTrie, i + 0xfff);
1376         if (v0 != expected || vfff != expected) {
1377             log_err("error: UMutableCPTrie U+%04lx unexpected value\n", (long)i);
1378         }
1379     }
1380 
1381     mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, false,
1382                                                  checkRanges, UPRV_LENGTHOF(checkRanges));
1383     umutablecptrie_close(mutableTrie);
1384 }
1385 
1386 static void
MuchDataTest(void)1387 MuchDataTest(void) {
1388     static const char *const testName="much-data";
1389 
1390     UMutableCPTrie *mutableTrie;
1391     int32_t r, c;
1392     UErrorCode errorCode = U_ZERO_ERROR;
1393     CheckRange checkRanges[(0x10000 >> 6) + (0x10240 >> 4) + 10];
1394 
1395     mutableTrie = umutablecptrie_open(0xff33, 0xad, &errorCode);
1396     if (U_FAILURE(errorCode)) {
1397         log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode));
1398         return;
1399     }
1400     checkRanges[0].limit = 0;
1401     checkRanges[0].value = 0xff33;  // initialValue
1402     r = 1;
1403 
1404     // Add much data that does not compact well,
1405     // to get more than 128k data values after compaction.
1406     for (c = 0; c < 0x10000; c += 0x40) {
1407         uint32_t value = c >> 4;
1408         umutablecptrie_setRange(mutableTrie, c, c + 0x3f, value, &errorCode);
1409         checkRanges[r].limit = c + 0x40;
1410         checkRanges[r++].value = value;
1411     }
1412     checkRanges[r].limit = 0x20000;
1413     checkRanges[r++].value = 0xff33;
1414     for (c = 0x20000; c < 0x30230; c += 0x10) {
1415         uint32_t value = c >> 4;
1416         umutablecptrie_setRange(mutableTrie, c, c + 0xf, value, &errorCode);
1417         checkRanges[r].limit = c + 0x10;
1418         checkRanges[r++].value = value;
1419     }
1420     umutablecptrie_setRange(mutableTrie, 0x30230, 0x30233, 0x3023, &errorCode);
1421     checkRanges[r].limit = 0x30234;
1422     checkRanges[r++].value = 0x3023;
1423     umutablecptrie_setRange(mutableTrie, 0x30234, 0xdffff, 0x5005, &errorCode);
1424     checkRanges[r].limit = 0xe0000;
1425     checkRanges[r++].value = 0x5005;
1426     umutablecptrie_setRange(mutableTrie, 0xe0000, 0x10ffff, 0x9009, &errorCode);
1427     checkRanges[r].limit = 0x110000;
1428     checkRanges[r++].value = 0x9009;
1429     if (U_FAILURE(errorCode)) {
1430         log_err("error: setting lots of values into a mutable trie (%s) failed - %s\n",
1431                 testName, u_errorName(errorCode));
1432         umutablecptrie_close(mutableTrie);
1433         return;
1434     }
1435     U_ASSERT(r <= UPRV_LENGTHOF(checkRanges));
1436 
1437     testBuilder(testName, mutableTrie, checkRanges, r);
1438     testTrieSerialize("much-data.16", mutableTrie,
1439                       UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, false, checkRanges, r);
1440     umutablecptrie_close(mutableTrie);
1441 }
1442 
testGetRangesFixedSurr(const char * testName,const UMutableCPTrie * mutableTrie,UCPMapRangeOption option,const CheckRange checkRanges[],int32_t countCheckRanges)1443 static void testGetRangesFixedSurr(const char *testName, const UMutableCPTrie *mutableTrie,
1444                                    UCPMapRangeOption option,
1445                                    const CheckRange checkRanges[], int32_t countCheckRanges) {
1446     testTrieGetRanges(testName, NULL, mutableTrie, option, 5, checkRanges, countCheckRanges);
1447     UErrorCode errorCode = U_ZERO_ERROR;
1448     UMutableCPTrie *clone = umutablecptrie_clone(mutableTrie, &errorCode);
1449     UCPTrie *trie;
1450     if (U_FAILURE(errorCode)) {
1451         log_err("error: umutablecptrie_clone(%s) failed: %s\n", testName, u_errorName(errorCode));
1452         return;
1453     }
1454     trie = umutablecptrie_buildImmutable(clone, UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, &errorCode);
1455     umutablecptrie_close(clone);
1456     if (U_FAILURE(errorCode)) {
1457         log_err("error: umutablecptrie_buildImmutable(%s) failed: %s\n", testName, u_errorName(errorCode));
1458         return;
1459     }
1460     testTrieGetRanges(testName, trie, NULL, option, 5, checkRanges, countCheckRanges);
1461     ucptrie_close(trie);
1462 }
1463 
1464 static void
TrieTestGetRangesFixedSurr(void)1465 TrieTestGetRangesFixedSurr(void) {
1466     static const SetRange
1467     setRangesFixedSurr[]={
1468         { 0xd000, 0xd7ff, 5 },
1469         { 0xd7ff, 0xe001, 3 },
1470         { 0xe001, 0xf900, 5 },
1471     };
1472 
1473     static const CheckRange
1474     checkRangesFixedLeadSurr1[]={
1475         { 0,      0 },
1476         { 0xd000, 0 },
1477         { 0xd7ff, 5 },
1478         { 0xd800, 3 },
1479         { 0xdc00, 5 },
1480         { 0xe001, 3 },
1481         { 0xf900, 5 },
1482         { 0x110000, 0 }
1483     };
1484 
1485     static const CheckRange
1486     checkRangesFixedAllSurr1[]={
1487         { 0,      0 },
1488         { 0xd000, 0 },
1489         { 0xd7ff, 5 },
1490         { 0xd800, 3 },
1491         { 0xe000, 5 },
1492         { 0xe001, 3 },
1493         { 0xf900, 5 },
1494         { 0x110000, 0 }
1495     };
1496 
1497     static const CheckRange
1498     checkRangesFixedLeadSurr3[]={
1499         { 0,      0 },
1500         { 0xd000, 0 },
1501         { 0xdc00, 5 },
1502         { 0xe001, 3 },
1503         { 0xf900, 5 },
1504         { 0x110000, 0 }
1505     };
1506 
1507     static const CheckRange
1508     checkRangesFixedAllSurr3[]={
1509         { 0,      0 },
1510         { 0xd000, 0 },
1511         { 0xe000, 5 },
1512         { 0xe001, 3 },
1513         { 0xf900, 5 },
1514         { 0x110000, 0 }
1515     };
1516 
1517     static const CheckRange
1518     checkRangesFixedSurr4[]={
1519         { 0,      0 },
1520         { 0xd000, 0 },
1521         { 0xf900, 5 },
1522         { 0x110000, 0 }
1523     };
1524 
1525     uint32_t initialValue, errorValue;
1526     getSpecialValues(
1527         checkRangesFixedLeadSurr1, UPRV_LENGTHOF(checkRangesFixedLeadSurr1),
1528         &initialValue, &errorValue);
1529     UMutableCPTrie *mutableTrie = makeTrieWithRanges(
1530         "fixedSurr", false, setRangesFixedSurr, UPRV_LENGTHOF(setRangesFixedSurr),
1531         initialValue, errorValue);
1532     UErrorCode errorCode = U_ZERO_ERROR;
1533     if (mutableTrie == NULL) {
1534         return;
1535     }
1536     testGetRangesFixedSurr("fixedLeadSurr1", mutableTrie, UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
1537                            checkRangesFixedLeadSurr1, UPRV_LENGTHOF(checkRangesFixedLeadSurr1));
1538     testGetRangesFixedSurr("fixedAllSurr1", mutableTrie, UCPMAP_RANGE_FIXED_ALL_SURROGATES,
1539                            checkRangesFixedAllSurr1, UPRV_LENGTHOF(checkRangesFixedAllSurr1));
1540     // Setting a range in the middle of lead surrogates makes no difference.
1541     umutablecptrie_setRange(mutableTrie, 0xd844, 0xd899, 5, &errorCode);
1542     if (U_FAILURE(errorCode)) {
1543         log_err("error: umutablecptrie_setRange(fixedSurr2) failed: %s\n", u_errorName(errorCode));
1544         umutablecptrie_close(mutableTrie);
1545         return;
1546     }
1547     testGetRangesFixedSurr("fixedLeadSurr2", mutableTrie, UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
1548                            checkRangesFixedLeadSurr1, UPRV_LENGTHOF(checkRangesFixedLeadSurr1));
1549     // Bridge the gap before the lead surrogates.
1550     umutablecptrie_set(mutableTrie, 0xd7ff, 5, &errorCode);
1551     if (U_FAILURE(errorCode)) {
1552         log_err("error: umutablecptrie_set(fixedSurr3) failed: %s\n", u_errorName(errorCode));
1553         umutablecptrie_close(mutableTrie);
1554         return;
1555     }
1556     testGetRangesFixedSurr("fixedLeadSurr3", mutableTrie, UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
1557                            checkRangesFixedLeadSurr3, UPRV_LENGTHOF(checkRangesFixedLeadSurr3));
1558     testGetRangesFixedSurr("fixedAllSurr3", mutableTrie, UCPMAP_RANGE_FIXED_ALL_SURROGATES,
1559                            checkRangesFixedAllSurr3, UPRV_LENGTHOF(checkRangesFixedAllSurr3));
1560     // Bridge the gap after the trail surrogates.
1561     umutablecptrie_set(mutableTrie, 0xe000, 5, &errorCode);
1562     if (U_FAILURE(errorCode)) {
1563         log_err("error: umutablecptrie_set(fixedSurr4) failed: %s\n", u_errorName(errorCode));
1564         umutablecptrie_close(mutableTrie);
1565         return;
1566     }
1567     testGetRangesFixedSurr("fixedSurr4", mutableTrie, UCPMAP_RANGE_FIXED_ALL_SURROGATES,
1568                            checkRangesFixedSurr4, UPRV_LENGTHOF(checkRangesFixedSurr4));
1569     umutablecptrie_close(mutableTrie);
1570 }
1571 
TestSmallNullBlockMatchesFast(void)1572 static void TestSmallNullBlockMatchesFast(void) {
1573     // The initial builder+getRange code had a bug:
1574     // When there is no null data block in the fast-index range,
1575     // but a fast-range data block starts with enough values to match a small data block,
1576     // then getRange() got confused.
1577     // The builder must prevent this.
1578     static const SetRange setRanges[] = {
1579         { 0, 0x880, 1 },
1580         // U+0880..U+088F map to initial value 0, potential match for small null data block.
1581         { 0x890, 0x1040, 2 },
1582         // U+1040..U+1050 map to 0.
1583         // First small null data block in a small-type trie.
1584         // In a fast-type trie, it is ok to match a small null data block at U+1041
1585         // but not at U+1040.
1586         { 0x1051, 0x10000, 3 },
1587         // No fast data block (block length 64) filled with 0 regardless of trie type.
1588         // Need more blocks filled with 0 than the largest range above,
1589         // and need a highStart above that so that it actually counts.
1590         { 0x20000, 0x110000, 9 }
1591     };
1592 
1593     static const CheckRange checkRanges[] = {
1594         { 0x0880, 1 },
1595         { 0x0890, 0 },
1596         { 0x1040, 2 },
1597         { 0x1051, 0 },
1598         { 0x10000, 3 },
1599         { 0x20000, 0 },
1600         { 0x110000, 9 }
1601     };
1602 
1603     testTrieRanges("small0-in-fast", false,
1604         setRanges, UPRV_LENGTHOF(setRanges),
1605         checkRanges, UPRV_LENGTHOF(checkRanges));
1606 }
1607 
ShortAllSameBlocksTest(void)1608 static void ShortAllSameBlocksTest(void) {
1609     static const char *const testName = "short-all-same";
1610     // Many all-same-value blocks but only of the small block length used in the mutable trie.
1611     // The builder code needs to turn a group of short ALL_SAME blocks below fastLimit
1612     // into a MIXED block, and reserve data array capacity for that.
1613     UErrorCode errorCode = U_ZERO_ERROR;
1614     UMutableCPTrie *mutableTrie = umutablecptrie_open(0, 0xad, &errorCode);
1615     CheckRange checkRanges[0x101];
1616     int32_t i;
1617     if (U_FAILURE(errorCode)) {
1618         log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode));
1619         return;
1620     }
1621     for (i = 0; i < 0x1000; i += 0x10) {
1622         uint32_t value = i >> 4;
1623         umutablecptrie_setRange(mutableTrie, i, i + 0xf, value, &errorCode);
1624         checkRanges[value].limit = i + 0x10;
1625         checkRanges[value].value = value;
1626     }
1627     checkRanges[0x100].limit = 0x110000;
1628     checkRanges[0x100].value = 0;
1629     if (U_FAILURE(errorCode)) {
1630         log_err("error: setting values into a mutable trie (%s) failed - %s\n",
1631                 testName, u_errorName(errorCode));
1632         umutablecptrie_close(mutableTrie);
1633         return;
1634     }
1635 
1636     mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, false,
1637                                                  checkRanges, UPRV_LENGTHOF(checkRanges));
1638     umutablecptrie_close(mutableTrie);
1639 }
1640 
1641 void
addUCPTrieTest(TestNode ** root)1642 addUCPTrieTest(TestNode** root) {
1643     addTest(root, &TrieTestSet1, "tsutil/ucptrietest/TrieTestSet1");
1644     addTest(root, &TrieTestSet2Overlap, "tsutil/ucptrietest/TrieTestSet2Overlap");
1645     addTest(root, &TrieTestSet3Initial9, "tsutil/ucptrietest/TrieTestSet3Initial9");
1646     addTest(root, &TrieTestSetEmpty, "tsutil/ucptrietest/TrieTestSetEmpty");
1647     addTest(root, &TrieTestSetSingleValue, "tsutil/ucptrietest/TrieTestSetSingleValue");
1648     addTest(root, &TrieTestSet2OverlapWithClone, "tsutil/ucptrietest/TrieTestSet2OverlapWithClone");
1649     addTest(root, &FreeBlocksTest, "tsutil/ucptrietest/FreeBlocksTest");
1650     addTest(root, &GrowDataArrayTest, "tsutil/ucptrietest/GrowDataArrayTest");
1651     addTest(root, &ManyAllSameBlocksTest, "tsutil/ucptrietest/ManyAllSameBlocksTest");
1652     addTest(root, &MuchDataTest, "tsutil/ucptrietest/MuchDataTest");
1653     addTest(root, &TrieTestGetRangesFixedSurr, "tsutil/ucptrietest/TrieTestGetRangesFixedSurr");
1654     addTest(root, &TestSmallNullBlockMatchesFast, "tsutil/ucptrietest/TestSmallNullBlockMatchesFast");
1655     addTest(root, &ShortAllSameBlocksTest, "tsutil/ucptrietest/ShortAllSameBlocksTest");
1656 }
1657