1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // ucptrie.cpp (modified from utrie2.cpp)
5 // created: 2017dec29 Markus W. Scherer
6
7 // #define UCPTRIE_DEBUG
8 #ifdef UCPTRIE_DEBUG
9 # include <stdio.h>
10 #endif
11
12 #include "unicode/utypes.h"
13 #include "unicode/ucptrie.h"
14 #include "unicode/utf.h"
15 #include "unicode/utf8.h"
16 #include "unicode/utf16.h"
17 #include "cmemory.h"
18 #include "uassert.h"
19 #include "ucptrie_impl.h"
20
21 U_CAPI UCPTrie * U_EXPORT2
ucptrie_openFromBinary(UCPTrieType type,UCPTrieValueWidth valueWidth,const void * data,int32_t length,int32_t * pActualLength,UErrorCode * pErrorCode)22 ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth,
23 const void *data, int32_t length, int32_t *pActualLength,
24 UErrorCode *pErrorCode) {
25 if (U_FAILURE(*pErrorCode)) {
26 return nullptr;
27 }
28
29 if (length <= 0 || (U_POINTER_MASK_LSB(data, 3) != 0) ||
30 type < UCPTRIE_TYPE_ANY || UCPTRIE_TYPE_SMALL < type ||
31 valueWidth < UCPTRIE_VALUE_BITS_ANY || UCPTRIE_VALUE_BITS_8 < valueWidth) {
32 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
33 return nullptr;
34 }
35
36 // Enough data for a trie header?
37 if (length < (int32_t)sizeof(UCPTrieHeader)) {
38 *pErrorCode = U_INVALID_FORMAT_ERROR;
39 return nullptr;
40 }
41
42 // Check the signature.
43 const UCPTrieHeader *header = (const UCPTrieHeader *)data;
44 if (header->signature != UCPTRIE_SIG) {
45 *pErrorCode = U_INVALID_FORMAT_ERROR;
46 return nullptr;
47 }
48
49 int32_t options = header->options;
50 int32_t typeInt = (options >> 6) & 3;
51 int32_t valueWidthInt = options & UCPTRIE_OPTIONS_VALUE_BITS_MASK;
52 if (typeInt > UCPTRIE_TYPE_SMALL || valueWidthInt > UCPTRIE_VALUE_BITS_8 ||
53 (options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0) {
54 *pErrorCode = U_INVALID_FORMAT_ERROR;
55 return nullptr;
56 }
57 UCPTrieType actualType = (UCPTrieType)typeInt;
58 UCPTrieValueWidth actualValueWidth = (UCPTrieValueWidth)valueWidthInt;
59 if (type < 0) {
60 type = actualType;
61 }
62 if (valueWidth < 0) {
63 valueWidth = actualValueWidth;
64 }
65 if (type != actualType || valueWidth != actualValueWidth) {
66 *pErrorCode = U_INVALID_FORMAT_ERROR;
67 return nullptr;
68 }
69
70 // Get the length values and offsets.
71 UCPTrie tempTrie;
72 uprv_memset(&tempTrie, 0, sizeof(tempTrie));
73 tempTrie.indexLength = header->indexLength;
74 tempTrie.dataLength =
75 ((options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | header->dataLength;
76 tempTrie.index3NullOffset = header->index3NullOffset;
77 tempTrie.dataNullOffset =
78 ((options & UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK) << 8) | header->dataNullOffset;
79
80 tempTrie.highStart = header->shiftedHighStart << UCPTRIE_SHIFT_2;
81 tempTrie.shifted12HighStart = (tempTrie.highStart + 0xfff) >> 12;
82 tempTrie.type = type;
83 tempTrie.valueWidth = valueWidth;
84
85 // Calculate the actual length.
86 int32_t actualLength = (int32_t)sizeof(UCPTrieHeader) + tempTrie.indexLength * 2;
87 if (valueWidth == UCPTRIE_VALUE_BITS_16) {
88 actualLength += tempTrie.dataLength * 2;
89 } else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
90 actualLength += tempTrie.dataLength * 4;
91 } else {
92 actualLength += tempTrie.dataLength;
93 }
94 if (length < actualLength) {
95 *pErrorCode = U_INVALID_FORMAT_ERROR; // Not enough bytes.
96 return nullptr;
97 }
98
99 // Allocate the trie.
100 UCPTrie *trie = (UCPTrie *)uprv_malloc(sizeof(UCPTrie));
101 if (trie == nullptr) {
102 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
103 return nullptr;
104 }
105 uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
106 #ifdef UCPTRIE_DEBUG
107 trie->name = "fromSerialized";
108 #endif
109
110 // Set the pointers to its index and data arrays.
111 const uint16_t *p16 = (const uint16_t *)(header + 1);
112 trie->index = p16;
113 p16 += trie->indexLength;
114
115 // Get the data.
116 int32_t nullValueOffset = trie->dataNullOffset;
117 if (nullValueOffset >= trie->dataLength) {
118 nullValueOffset = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
119 }
120 switch (valueWidth) {
121 case UCPTRIE_VALUE_BITS_16:
122 trie->data.ptr16 = p16;
123 trie->nullValue = trie->data.ptr16[nullValueOffset];
124 break;
125 case UCPTRIE_VALUE_BITS_32:
126 trie->data.ptr32 = (const uint32_t *)p16;
127 trie->nullValue = trie->data.ptr32[nullValueOffset];
128 break;
129 case UCPTRIE_VALUE_BITS_8:
130 trie->data.ptr8 = (const uint8_t *)p16;
131 trie->nullValue = trie->data.ptr8[nullValueOffset];
132 break;
133 default:
134 // Unreachable because valueWidth was checked above.
135 *pErrorCode = U_INVALID_FORMAT_ERROR;
136 return nullptr;
137 }
138
139 if (pActualLength != nullptr) {
140 *pActualLength = actualLength;
141 }
142 return trie;
143 }
144
145 U_CAPI void U_EXPORT2
ucptrie_close(UCPTrie * trie)146 ucptrie_close(UCPTrie *trie) {
147 uprv_free(trie);
148 }
149
150 U_CAPI UCPTrieType U_EXPORT2
ucptrie_getType(const UCPTrie * trie)151 ucptrie_getType(const UCPTrie *trie) {
152 return (UCPTrieType)trie->type;
153 }
154
155 U_CAPI UCPTrieValueWidth U_EXPORT2
ucptrie_getValueWidth(const UCPTrie * trie)156 ucptrie_getValueWidth(const UCPTrie *trie) {
157 return (UCPTrieValueWidth)trie->valueWidth;
158 }
159
160 U_CAPI int32_t U_EXPORT2
ucptrie_internalSmallIndex(const UCPTrie * trie,UChar32 c)161 ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c) {
162 int32_t i1 = c >> UCPTRIE_SHIFT_1;
163 if (trie->type == UCPTRIE_TYPE_FAST) {
164 U_ASSERT(0xffff < c && c < trie->highStart);
165 i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH;
166 } else {
167 U_ASSERT((uint32_t)c < (uint32_t)trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT);
168 i1 += UCPTRIE_SMALL_INDEX_LENGTH;
169 }
170 int32_t i3Block = trie->index[
171 (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)];
172 int32_t i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK;
173 int32_t dataBlock;
174 if ((i3Block & 0x8000) == 0) {
175 // 16-bit indexes
176 dataBlock = trie->index[i3Block + i3];
177 } else {
178 // 18-bit indexes stored in groups of 9 entries per 8 indexes.
179 i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
180 i3 &= 7;
181 dataBlock = ((int32_t)trie->index[i3Block++] << (2 + (2 * i3))) & 0x30000;
182 dataBlock |= trie->index[i3Block + i3];
183 }
184 return dataBlock + (c & UCPTRIE_SMALL_DATA_MASK);
185 }
186
187 U_CAPI int32_t U_EXPORT2
ucptrie_internalSmallU8Index(const UCPTrie * trie,int32_t lt1,uint8_t t2,uint8_t t3)188 ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3) {
189 UChar32 c = (lt1 << 12) | (t2 << 6) | t3;
190 if (c >= trie->highStart) {
191 // Possible because the UTF-8 macro compares with shifted12HighStart which may be higher.
192 return trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
193 }
194 return ucptrie_internalSmallIndex(trie, c);
195 }
196
197 U_CAPI int32_t U_EXPORT2
ucptrie_internalU8PrevIndex(const UCPTrie * trie,UChar32 c,const uint8_t * start,const uint8_t * src)198 ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c,
199 const uint8_t *start, const uint8_t *src) {
200 int32_t i, length;
201 // Support 64-bit pointers by avoiding cast of arbitrary difference.
202 if ((src - start) <= 7) {
203 i = length = (int32_t)(src - start);
204 } else {
205 i = length = 7;
206 start = src - 7;
207 }
208 c = utf8_prevCharSafeBody(start, 0, &i, c, -1);
209 i = length - i; // Number of bytes read backward from src.
210 int32_t idx = _UCPTRIE_CP_INDEX(trie, 0xffff, c);
211 return (idx << 3) | i;
212 }
213
214 namespace {
215
getValue(UCPTrieData data,UCPTrieValueWidth valueWidth,int32_t dataIndex)216 inline uint32_t getValue(UCPTrieData data, UCPTrieValueWidth valueWidth, int32_t dataIndex) {
217 switch (valueWidth) {
218 case UCPTRIE_VALUE_BITS_16:
219 return data.ptr16[dataIndex];
220 case UCPTRIE_VALUE_BITS_32:
221 return data.ptr32[dataIndex];
222 case UCPTRIE_VALUE_BITS_8:
223 return data.ptr8[dataIndex];
224 default:
225 // Unreachable if the trie is properly initialized.
226 return 0xffffffff;
227 }
228 }
229
230 } // namespace
231
232 U_CAPI uint32_t U_EXPORT2
ucptrie_get(const UCPTrie * trie,UChar32 c)233 ucptrie_get(const UCPTrie *trie, UChar32 c) {
234 int32_t dataIndex;
235 if ((uint32_t)c <= 0x7f) {
236 // linear ASCII
237 dataIndex = c;
238 } else {
239 UChar32 fastMax = trie->type == UCPTRIE_TYPE_FAST ? 0xffff : UCPTRIE_SMALL_MAX;
240 dataIndex = _UCPTRIE_CP_INDEX(trie, fastMax, c);
241 }
242 return getValue(trie->data, (UCPTrieValueWidth)trie->valueWidth, dataIndex);
243 }
244
245 namespace {
246
247 constexpr int32_t MAX_UNICODE = 0x10ffff;
248
maybeFilterValue(uint32_t value,uint32_t trieNullValue,uint32_t nullValue,UCPMapValueFilter * filter,const void * context)249 inline uint32_t maybeFilterValue(uint32_t value, uint32_t trieNullValue, uint32_t nullValue,
250 UCPMapValueFilter *filter, const void *context) {
251 if (value == trieNullValue) {
252 value = nullValue;
253 } else if (filter != nullptr) {
254 value = filter(context, value);
255 }
256 return value;
257 }
258
getRange(const void * t,UChar32 start,UCPMapValueFilter * filter,const void * context,uint32_t * pValue)259 UChar32 getRange(const void *t, UChar32 start,
260 UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
261 if ((uint32_t)start > MAX_UNICODE) {
262 return U_SENTINEL;
263 }
264 const UCPTrie *trie = reinterpret_cast<const UCPTrie *>(t);
265 UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth;
266 if (start >= trie->highStart) {
267 if (pValue != nullptr) {
268 int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
269 uint32_t value = getValue(trie->data, valueWidth, di);
270 if (filter != nullptr) { value = filter(context, value); }
271 *pValue = value;
272 }
273 return MAX_UNICODE;
274 }
275
276 uint32_t nullValue = trie->nullValue;
277 if (filter != nullptr) { nullValue = filter(context, nullValue); }
278 const uint16_t *index = trie->index;
279
280 int32_t prevI3Block = -1;
281 int32_t prevBlock = -1;
282 UChar32 c = start;
283 uint32_t trieValue, value = nullValue;
284 bool haveValue = false;
285 do {
286 int32_t i3Block;
287 int32_t i3;
288 int32_t i3BlockLength;
289 int32_t dataBlockLength;
290 if (c <= 0xffff && (trie->type == UCPTRIE_TYPE_FAST || c <= UCPTRIE_SMALL_MAX)) {
291 i3Block = 0;
292 i3 = c >> UCPTRIE_FAST_SHIFT;
293 i3BlockLength = trie->type == UCPTRIE_TYPE_FAST ?
294 UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH;
295 dataBlockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
296 } else {
297 // Use the multi-stage index.
298 int32_t i1 = c >> UCPTRIE_SHIFT_1;
299 if (trie->type == UCPTRIE_TYPE_FAST) {
300 U_ASSERT(0xffff < c && c < trie->highStart);
301 i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH;
302 } else {
303 U_ASSERT(c < trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT);
304 i1 += UCPTRIE_SMALL_INDEX_LENGTH;
305 }
306 i3Block = trie->index[
307 (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)];
308 if (i3Block == prevI3Block && (c - start) >= UCPTRIE_CP_PER_INDEX_2_ENTRY) {
309 // The index-3 block is the same as the previous one, and filled with value.
310 U_ASSERT((c & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0);
311 c += UCPTRIE_CP_PER_INDEX_2_ENTRY;
312 continue;
313 }
314 prevI3Block = i3Block;
315 if (i3Block == trie->index3NullOffset) {
316 // This is the index-3 null block.
317 if (haveValue) {
318 if (nullValue != value) {
319 return c - 1;
320 }
321 } else {
322 trieValue = trie->nullValue;
323 value = nullValue;
324 if (pValue != nullptr) { *pValue = nullValue; }
325 haveValue = true;
326 }
327 prevBlock = trie->dataNullOffset;
328 c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
329 continue;
330 }
331 i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK;
332 i3BlockLength = UCPTRIE_INDEX_3_BLOCK_LENGTH;
333 dataBlockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
334 }
335 // Enumerate data blocks for one index-3 block.
336 do {
337 int32_t block;
338 if ((i3Block & 0x8000) == 0) {
339 block = index[i3Block + i3];
340 } else {
341 // 18-bit indexes stored in groups of 9 entries per 8 indexes.
342 int32_t group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
343 int32_t gi = i3 & 7;
344 block = ((int32_t)index[group++] << (2 + (2 * gi))) & 0x30000;
345 block |= index[group + gi];
346 }
347 if (block == prevBlock && (c - start) >= dataBlockLength) {
348 // The block is the same as the previous one, and filled with value.
349 U_ASSERT((c & (dataBlockLength - 1)) == 0);
350 c += dataBlockLength;
351 } else {
352 int32_t dataMask = dataBlockLength - 1;
353 prevBlock = block;
354 if (block == trie->dataNullOffset) {
355 // This is the data null block.
356 if (haveValue) {
357 if (nullValue != value) {
358 return c - 1;
359 }
360 } else {
361 trieValue = trie->nullValue;
362 value = nullValue;
363 if (pValue != nullptr) { *pValue = nullValue; }
364 haveValue = true;
365 }
366 c = (c + dataBlockLength) & ~dataMask;
367 } else {
368 int32_t di = block + (c & dataMask);
369 uint32_t trieValue2 = getValue(trie->data, valueWidth, di);
370 if (haveValue) {
371 if (trieValue2 != trieValue) {
372 if (filter == nullptr ||
373 maybeFilterValue(trieValue2, trie->nullValue, nullValue,
374 filter, context) != value) {
375 return c - 1;
376 }
377 trieValue = trieValue2; // may or may not help
378 }
379 } else {
380 trieValue = trieValue2;
381 value = maybeFilterValue(trieValue2, trie->nullValue, nullValue,
382 filter, context);
383 if (pValue != nullptr) { *pValue = value; }
384 haveValue = true;
385 }
386 while ((++c & dataMask) != 0) {
387 trieValue2 = getValue(trie->data, valueWidth, ++di);
388 if (trieValue2 != trieValue) {
389 if (filter == nullptr ||
390 maybeFilterValue(trieValue2, trie->nullValue, nullValue,
391 filter, context) != value) {
392 return c - 1;
393 }
394 trieValue = trieValue2; // may or may not help
395 }
396 }
397 }
398 }
399 } while (++i3 < i3BlockLength);
400 } while (c < trie->highStart);
401 U_ASSERT(haveValue);
402 int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
403 uint32_t highValue = getValue(trie->data, valueWidth, di);
404 if (maybeFilterValue(highValue, trie->nullValue, nullValue,
405 filter, context) != value) {
406 return c - 1;
407 } else {
408 return MAX_UNICODE;
409 }
410 }
411
412 } // namespace
413
414 U_CFUNC UChar32
ucptrie_internalGetRange(UCPTrieGetRange * getRange,const void * trie,UChar32 start,UCPMapRangeOption option,uint32_t surrogateValue,UCPMapValueFilter * filter,const void * context,uint32_t * pValue)415 ucptrie_internalGetRange(UCPTrieGetRange *getRange,
416 const void *trie, UChar32 start,
417 UCPMapRangeOption option, uint32_t surrogateValue,
418 UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
419 if (option == UCPMAP_RANGE_NORMAL) {
420 return getRange(trie, start, filter, context, pValue);
421 }
422 uint32_t value;
423 if (pValue == nullptr) {
424 // We need to examine the range value even if the caller does not want it.
425 pValue = &value;
426 }
427 UChar32 surrEnd = option == UCPMAP_RANGE_FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff;
428 UChar32 end = getRange(trie, start, filter, context, pValue);
429 if (end < 0xd7ff || start > surrEnd) {
430 return end;
431 }
432 // The range overlaps with surrogates, or ends just before the first one.
433 if (*pValue == surrogateValue) {
434 if (end >= surrEnd) {
435 // Surrogates followed by a non-surrogateValue range,
436 // or surrogates are part of a larger surrogateValue range.
437 return end;
438 }
439 } else {
440 if (start <= 0xd7ff) {
441 return 0xd7ff; // Non-surrogateValue range ends before surrogateValue surrogates.
442 }
443 // Start is a surrogate with a non-surrogateValue code *unit* value.
444 // Return a surrogateValue code *point* range.
445 *pValue = surrogateValue;
446 if (end > surrEnd) {
447 return surrEnd; // Surrogate range ends before non-surrogateValue rest of range.
448 }
449 }
450 // See if the surrogateValue surrogate range can be merged with
451 // an immediately following range.
452 uint32_t value2;
453 UChar32 end2 = getRange(trie, surrEnd + 1, filter, context, &value2);
454 if (value2 == surrogateValue) {
455 return end2;
456 }
457 return surrEnd;
458 }
459
460 U_CAPI UChar32 U_EXPORT2
ucptrie_getRange(const UCPTrie * trie,UChar32 start,UCPMapRangeOption option,uint32_t surrogateValue,UCPMapValueFilter * filter,const void * context,uint32_t * pValue)461 ucptrie_getRange(const UCPTrie *trie, UChar32 start,
462 UCPMapRangeOption option, uint32_t surrogateValue,
463 UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
464 return ucptrie_internalGetRange(getRange, trie, start,
465 option, surrogateValue,
466 filter, context, pValue);
467 }
468
469 U_CAPI int32_t U_EXPORT2
ucptrie_toBinary(const UCPTrie * trie,void * data,int32_t capacity,UErrorCode * pErrorCode)470 ucptrie_toBinary(const UCPTrie *trie,
471 void *data, int32_t capacity,
472 UErrorCode *pErrorCode) {
473 if (U_FAILURE(*pErrorCode)) {
474 return 0;
475 }
476
477 UCPTrieType type = (UCPTrieType)trie->type;
478 UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth;
479 if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type ||
480 valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth ||
481 capacity < 0 ||
482 (capacity > 0 && (data == nullptr || (U_POINTER_MASK_LSB(data, 3) != 0)))) {
483 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
484 return 0;
485 }
486
487 int32_t length = (int32_t)sizeof(UCPTrieHeader) + trie->indexLength * 2;
488 switch (valueWidth) {
489 case UCPTRIE_VALUE_BITS_16:
490 length += trie->dataLength * 2;
491 break;
492 case UCPTRIE_VALUE_BITS_32:
493 length += trie->dataLength * 4;
494 break;
495 case UCPTRIE_VALUE_BITS_8:
496 length += trie->dataLength;
497 break;
498 default:
499 // unreachable
500 break;
501 }
502 if (capacity < length) {
503 *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
504 return length;
505 }
506
507 char *bytes = (char *)data;
508 UCPTrieHeader *header = (UCPTrieHeader *)bytes;
509 header->signature = UCPTRIE_SIG; // "Tri3"
510 header->options = (uint16_t)(
511 ((trie->dataLength & 0xf0000) >> 4) |
512 ((trie->dataNullOffset & 0xf0000) >> 8) |
513 (trie->type << 6) |
514 valueWidth);
515 header->indexLength = (uint16_t)trie->indexLength;
516 header->dataLength = (uint16_t)trie->dataLength;
517 header->index3NullOffset = trie->index3NullOffset;
518 header->dataNullOffset = (uint16_t)trie->dataNullOffset;
519 header->shiftedHighStart = trie->highStart >> UCPTRIE_SHIFT_2;
520 bytes += sizeof(UCPTrieHeader);
521
522 uprv_memcpy(bytes, trie->index, trie->indexLength * 2);
523 bytes += trie->indexLength * 2;
524
525 switch (valueWidth) {
526 case UCPTRIE_VALUE_BITS_16:
527 uprv_memcpy(bytes, trie->data.ptr16, trie->dataLength * 2);
528 break;
529 case UCPTRIE_VALUE_BITS_32:
530 uprv_memcpy(bytes, trie->data.ptr32, trie->dataLength * 4);
531 break;
532 case UCPTRIE_VALUE_BITS_8:
533 uprv_memcpy(bytes, trie->data.ptr8, trie->dataLength);
534 break;
535 default:
536 // unreachable
537 break;
538 }
539 return length;
540 }
541
542 namespace {
543
544 #ifdef UCPTRIE_DEBUG
countNull(const UCPTrie * trie)545 long countNull(const UCPTrie *trie) {
546 uint32_t nullValue=trie->nullValue;
547 int32_t length=trie->dataLength;
548 long count=0;
549 switch (trie->valueWidth) {
550 case UCPTRIE_VALUE_BITS_16:
551 for(int32_t i=0; i<length; ++i) {
552 if(trie->data.ptr16[i]==nullValue) { ++count; }
553 }
554 break;
555 case UCPTRIE_VALUE_BITS_32:
556 for(int32_t i=0; i<length; ++i) {
557 if(trie->data.ptr32[i]==nullValue) { ++count; }
558 }
559 break;
560 case UCPTRIE_VALUE_BITS_8:
561 for(int32_t i=0; i<length; ++i) {
562 if(trie->data.ptr8[i]==nullValue) { ++count; }
563 }
564 break;
565 default:
566 // unreachable
567 break;
568 }
569 return count;
570 }
571
572 U_CFUNC void
ucptrie_printLengths(const UCPTrie * trie,const char * which)573 ucptrie_printLengths(const UCPTrie *trie, const char *which) {
574 long indexLength=trie->indexLength;
575 long dataLength=(long)trie->dataLength;
576 long totalLength=(long)sizeof(UCPTrieHeader)+indexLength*2+
577 dataLength*(trie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 2 :
578 trie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 4 : 1);
579 printf("**UCPTrieLengths(%s %s)** index:%6ld data:%6ld countNull:%6ld serialized:%6ld\n",
580 which, trie->name, indexLength, dataLength, countNull(trie), totalLength);
581 }
582 #endif
583
584 } // namespace
585
586 // UCPMap ----
587 // Initially, this is the same as UCPTrie. This may well change.
588
589 U_CAPI uint32_t U_EXPORT2
ucpmap_get(const UCPMap * map,UChar32 c)590 ucpmap_get(const UCPMap *map, UChar32 c) {
591 return ucptrie_get(reinterpret_cast<const UCPTrie *>(map), c);
592 }
593
594 U_CAPI UChar32 U_EXPORT2
ucpmap_getRange(const UCPMap * map,UChar32 start,UCPMapRangeOption option,uint32_t surrogateValue,UCPMapValueFilter * filter,const void * context,uint32_t * pValue)595 ucpmap_getRange(const UCPMap *map, UChar32 start,
596 UCPMapRangeOption option, uint32_t surrogateValue,
597 UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
598 return ucptrie_getRange(reinterpret_cast<const UCPTrie *>(map), start,
599 option, surrogateValue,
600 filter, context, pValue);
601 }
602