1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // umutablecptrie.cpp (inspired by utrie2_builder.cpp)
5 // created: 2017dec29 Markus W. Scherer
6
7 // #define UCPTRIE_DEBUG
8 #ifdef UCPTRIE_DEBUG
9 # include <stdio.h>
10 #endif
11
12 #include "unicode/utypes.h"
13 #include "unicode/ucptrie.h"
14 #include "unicode/umutablecptrie.h"
15 #include "unicode/uobject.h"
16 #include "unicode/utf16.h"
17 #include "cmemory.h"
18 #include "uassert.h"
19 #include "ucptrie_impl.h"
20
21 // ICU-20235 In case Microsoft math.h has defined this, undefine it.
22 #ifdef OVERFLOW
23 #undef OVERFLOW
24 #endif
25
26 U_NAMESPACE_BEGIN
27
28 namespace {
29
30 constexpr int32_t MAX_UNICODE = 0x10ffff;
31
32 constexpr int32_t UNICODE_LIMIT = 0x110000;
33 constexpr int32_t BMP_LIMIT = 0x10000;
34 constexpr int32_t ASCII_LIMIT = 0x80;
35
36 constexpr int32_t I_LIMIT = UNICODE_LIMIT >> UCPTRIE_SHIFT_3;
37 constexpr int32_t BMP_I_LIMIT = BMP_LIMIT >> UCPTRIE_SHIFT_3;
38 constexpr int32_t ASCII_I_LIMIT = ASCII_LIMIT >> UCPTRIE_SHIFT_3;
39
40 constexpr int32_t SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3));
41
42 // Flag values for data blocks.
43 constexpr uint8_t ALL_SAME = 0;
44 constexpr uint8_t MIXED = 1;
45 constexpr uint8_t SAME_AS = 2;
46
47 /** Start with allocation of 16k data entries. */
48 constexpr int32_t INITIAL_DATA_LENGTH = ((int32_t)1 << 14);
49
50 /** Grow about 8x each time. */
51 constexpr int32_t MEDIUM_DATA_LENGTH = ((int32_t)1 << 17);
52
53 /**
54 * Maximum length of the build-time data array.
55 * One entry per 0x110000 code points.
56 */
57 constexpr int32_t MAX_DATA_LENGTH = UNICODE_LIMIT;
58
59 // Flag values for index-3 blocks while compacting/building.
60 constexpr uint8_t I3_NULL = 0;
61 constexpr uint8_t I3_BMP = 1;
62 constexpr uint8_t I3_16 = 2;
63 constexpr uint8_t I3_18 = 3;
64
65 constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8;
66
67 class AllSameBlocks;
68 class MixedBlocks;
69
70 class MutableCodePointTrie : public UMemory {
71 public:
72 MutableCodePointTrie(uint32_t initialValue, uint32_t errorValue, UErrorCode &errorCode);
73 MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode);
74 MutableCodePointTrie(const MutableCodePointTrie &other) = delete;
75 ~MutableCodePointTrie();
76
77 MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete;
78
79 static MutableCodePointTrie *fromUCPMap(const UCPMap *map, UErrorCode &errorCode);
80 static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode);
81
82 uint32_t get(UChar32 c) const;
83 int32_t getRange(UChar32 start, UCPMapValueFilter *filter, const void *context,
84 uint32_t *pValue) const;
85
86 void set(UChar32 c, uint32_t value, UErrorCode &errorCode);
87 void setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode);
88
89 UCPTrie *build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode);
90
91 private:
92 void clear();
93
94 bool ensureHighStart(UChar32 c);
95 int32_t allocDataBlock(int32_t blockLength);
96 int32_t getDataBlock(int32_t i);
97
98 void maskValues(uint32_t mask);
99 UChar32 findHighStart() const;
100 int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks);
101 int32_t compactData(
102 int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
103 int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
104 int32_t compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
105 int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode);
106
107 uint32_t *index = nullptr;
108 int32_t indexCapacity = 0;
109 int32_t index3NullOffset = -1;
110 uint32_t *data = nullptr;
111 int32_t dataCapacity = 0;
112 int32_t dataLength = 0;
113 int32_t dataNullOffset = -1;
114
115 uint32_t origInitialValue;
116 uint32_t initialValue;
117 uint32_t errorValue;
118 UChar32 highStart;
119 uint32_t highValue;
120 #ifdef UCPTRIE_DEBUG
121 public:
122 const char *name;
123 #endif
124 private:
125 /** Temporary array while building the final data. */
126 uint16_t *index16 = nullptr;
127 uint8_t flags[UNICODE_LIMIT >> UCPTRIE_SHIFT_3];
128 };
129
MutableCodePointTrie(uint32_t iniValue,uint32_t errValue,UErrorCode & errorCode)130 MutableCodePointTrie::MutableCodePointTrie(uint32_t iniValue, uint32_t errValue, UErrorCode &errorCode) :
131 origInitialValue(iniValue), initialValue(iniValue), errorValue(errValue),
132 highStart(0), highValue(initialValue)
133 #ifdef UCPTRIE_DEBUG
134 , name("open")
135 #endif
136 {
137 if (U_FAILURE(errorCode)) { return; }
138 index = (uint32_t *)uprv_malloc(BMP_I_LIMIT * 4);
139 data = (uint32_t *)uprv_malloc(INITIAL_DATA_LENGTH * 4);
140 if (index == nullptr || data == nullptr) {
141 errorCode = U_MEMORY_ALLOCATION_ERROR;
142 return;
143 }
144 indexCapacity = BMP_I_LIMIT;
145 dataCapacity = INITIAL_DATA_LENGTH;
146 }
147
MutableCodePointTrie(const MutableCodePointTrie & other,UErrorCode & errorCode)148 MutableCodePointTrie::MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode) :
149 index3NullOffset(other.index3NullOffset),
150 dataNullOffset(other.dataNullOffset),
151 origInitialValue(other.origInitialValue), initialValue(other.initialValue),
152 errorValue(other.errorValue),
153 highStart(other.highStart), highValue(other.highValue)
154 #ifdef UCPTRIE_DEBUG
155 , name("mutable clone")
156 #endif
157 {
158 if (U_FAILURE(errorCode)) { return; }
159 int32_t iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT;
160 index = (uint32_t *)uprv_malloc(iCapacity * 4);
161 data = (uint32_t *)uprv_malloc(other.dataCapacity * 4);
162 if (index == nullptr || data == nullptr) {
163 errorCode = U_MEMORY_ALLOCATION_ERROR;
164 return;
165 }
166 indexCapacity = iCapacity;
167 dataCapacity = other.dataCapacity;
168
169 int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
170 uprv_memcpy(flags, other.flags, iLimit);
171 uprv_memcpy(index, other.index, iLimit * 4);
172 uprv_memcpy(data, other.data, (size_t)other.dataLength * 4);
173 dataLength = other.dataLength;
174 U_ASSERT(other.index16 == nullptr);
175 }
176
~MutableCodePointTrie()177 MutableCodePointTrie::~MutableCodePointTrie() {
178 uprv_free(index);
179 uprv_free(data);
180 uprv_free(index16);
181 }
182
fromUCPMap(const UCPMap * map,UErrorCode & errorCode)183 MutableCodePointTrie *MutableCodePointTrie::fromUCPMap(const UCPMap *map, UErrorCode &errorCode) {
184 // Use the highValue as the initialValue to reduce the highStart.
185 uint32_t errorValue = ucpmap_get(map, -1);
186 uint32_t initialValue = ucpmap_get(map, 0x10ffff);
187 LocalPointer<MutableCodePointTrie> mutableTrie(
188 new MutableCodePointTrie(initialValue, errorValue, errorCode),
189 errorCode);
190 if (U_FAILURE(errorCode)) {
191 return nullptr;
192 }
193 UChar32 start = 0, end;
194 uint32_t value;
195 while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
196 nullptr, nullptr, &value)) >= 0) {
197 if (value != initialValue) {
198 if (start == end) {
199 mutableTrie->set(start, value, errorCode);
200 } else {
201 mutableTrie->setRange(start, end, value, errorCode);
202 }
203 }
204 start = end + 1;
205 }
206 if (U_SUCCESS(errorCode)) {
207 return mutableTrie.orphan();
208 } else {
209 return nullptr;
210 }
211 }
212
fromUCPTrie(const UCPTrie * trie,UErrorCode & errorCode)213 MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) {
214 // Use the highValue as the initialValue to reduce the highStart.
215 uint32_t errorValue;
216 uint32_t initialValue;
217 switch (trie->valueWidth) {
218 case UCPTRIE_VALUE_BITS_16:
219 errorValue = trie->data.ptr16[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
220 initialValue = trie->data.ptr16[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
221 break;
222 case UCPTRIE_VALUE_BITS_32:
223 errorValue = trie->data.ptr32[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
224 initialValue = trie->data.ptr32[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
225 break;
226 case UCPTRIE_VALUE_BITS_8:
227 errorValue = trie->data.ptr8[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
228 initialValue = trie->data.ptr8[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
229 break;
230 default:
231 // Unreachable if the trie is properly initialized.
232 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
233 return nullptr;
234 }
235 LocalPointer<MutableCodePointTrie> mutableTrie(
236 new MutableCodePointTrie(initialValue, errorValue, errorCode),
237 errorCode);
238 if (U_FAILURE(errorCode)) {
239 return nullptr;
240 }
241 UChar32 start = 0, end;
242 uint32_t value;
243 while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
244 nullptr, nullptr, &value)) >= 0) {
245 if (value != initialValue) {
246 if (start == end) {
247 mutableTrie->set(start, value, errorCode);
248 } else {
249 mutableTrie->setRange(start, end, value, errorCode);
250 }
251 }
252 start = end + 1;
253 }
254 if (U_SUCCESS(errorCode)) {
255 return mutableTrie.orphan();
256 } else {
257 return nullptr;
258 }
259 }
260
clear()261 void MutableCodePointTrie::clear() {
262 index3NullOffset = dataNullOffset = -1;
263 dataLength = 0;
264 highValue = initialValue = origInitialValue;
265 highStart = 0;
266 uprv_free(index16);
267 index16 = nullptr;
268 }
269
get(UChar32 c) const270 uint32_t MutableCodePointTrie::get(UChar32 c) const {
271 if ((uint32_t)c > MAX_UNICODE) {
272 return errorValue;
273 }
274 if (c >= highStart) {
275 return highValue;
276 }
277 int32_t i = c >> UCPTRIE_SHIFT_3;
278 if (flags[i] == ALL_SAME) {
279 return index[i];
280 } else {
281 return data[index[i] + (c & UCPTRIE_SMALL_DATA_MASK)];
282 }
283 }
284
maybeFilterValue(uint32_t value,uint32_t initialValue,uint32_t nullValue,UCPMapValueFilter * filter,const void * context)285 inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue,
286 UCPMapValueFilter *filter, const void *context) {
287 if (value == initialValue) {
288 value = nullValue;
289 } else if (filter != nullptr) {
290 value = filter(context, value);
291 }
292 return value;
293 }
294
getRange(UChar32 start,UCPMapValueFilter * filter,const void * context,uint32_t * pValue) const295 UChar32 MutableCodePointTrie::getRange(
296 UChar32 start, UCPMapValueFilter *filter, const void *context,
297 uint32_t *pValue) const {
298 if ((uint32_t)start > MAX_UNICODE) {
299 return U_SENTINEL;
300 }
301 if (start >= highStart) {
302 if (pValue != nullptr) {
303 uint32_t value = highValue;
304 if (filter != nullptr) { value = filter(context, value); }
305 *pValue = value;
306 }
307 return MAX_UNICODE;
308 }
309 uint32_t nullValue = initialValue;
310 if (filter != nullptr) { nullValue = filter(context, nullValue); }
311 UChar32 c = start;
312 uint32_t trieValue, value;
313 bool haveValue = false;
314 int32_t i = c >> UCPTRIE_SHIFT_3;
315 do {
316 if (flags[i] == ALL_SAME) {
317 uint32_t trieValue2 = index[i];
318 if (haveValue) {
319 if (trieValue2 != trieValue) {
320 if (filter == nullptr ||
321 maybeFilterValue(trieValue2, initialValue, nullValue,
322 filter, context) != value) {
323 return c - 1;
324 }
325 trieValue = trieValue2; // may or may not help
326 }
327 } else {
328 trieValue = trieValue2;
329 value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
330 if (pValue != nullptr) { *pValue = value; }
331 haveValue = true;
332 }
333 c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK;
334 } else /* MIXED */ {
335 int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK);
336 uint32_t trieValue2 = data[di];
337 if (haveValue) {
338 if (trieValue2 != trieValue) {
339 if (filter == nullptr ||
340 maybeFilterValue(trieValue2, initialValue, nullValue,
341 filter, context) != value) {
342 return c - 1;
343 }
344 trieValue = trieValue2; // may or may not help
345 }
346 } else {
347 trieValue = trieValue2;
348 value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
349 if (pValue != nullptr) { *pValue = value; }
350 haveValue = true;
351 }
352 while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) {
353 trieValue2 = data[++di];
354 if (trieValue2 != trieValue) {
355 if (filter == nullptr ||
356 maybeFilterValue(trieValue2, initialValue, nullValue,
357 filter, context) != value) {
358 return c - 1;
359 }
360 }
361 trieValue = trieValue2; // may or may not help
362 }
363 }
364 ++i;
365 } while (c < highStart);
366 U_ASSERT(haveValue);
367 if (maybeFilterValue(highValue, initialValue, nullValue,
368 filter, context) != value) {
369 return c - 1;
370 } else {
371 return MAX_UNICODE;
372 }
373 }
374
375 void
writeBlock(uint32_t * block,uint32_t value)376 writeBlock(uint32_t *block, uint32_t value) {
377 uint32_t *limit = block + UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
378 while (block < limit) {
379 *block++ = value;
380 }
381 }
382
ensureHighStart(UChar32 c)383 bool MutableCodePointTrie::ensureHighStart(UChar32 c) {
384 if (c >= highStart) {
385 // Round up to a UCPTRIE_CP_PER_INDEX_2_ENTRY boundary to simplify compaction.
386 c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
387 int32_t i = highStart >> UCPTRIE_SHIFT_3;
388 int32_t iLimit = c >> UCPTRIE_SHIFT_3;
389 if (iLimit > indexCapacity) {
390 uint32_t *newIndex = (uint32_t *)uprv_malloc(I_LIMIT * 4);
391 if (newIndex == nullptr) { return false; }
392 uprv_memcpy(newIndex, index, i * 4);
393 uprv_free(index);
394 index = newIndex;
395 indexCapacity = I_LIMIT;
396 }
397 do {
398 flags[i] = ALL_SAME;
399 index[i] = initialValue;
400 } while(++i < iLimit);
401 highStart = c;
402 }
403 return true;
404 }
405
allocDataBlock(int32_t blockLength)406 int32_t MutableCodePointTrie::allocDataBlock(int32_t blockLength) {
407 int32_t newBlock = dataLength;
408 int32_t newTop = newBlock + blockLength;
409 if (newTop > dataCapacity) {
410 int32_t capacity;
411 if (dataCapacity < MEDIUM_DATA_LENGTH) {
412 capacity = MEDIUM_DATA_LENGTH;
413 } else if (dataCapacity < MAX_DATA_LENGTH) {
414 capacity = MAX_DATA_LENGTH;
415 } else {
416 // Should never occur.
417 // Either MAX_DATA_LENGTH is incorrect,
418 // or the code writes more values than should be possible.
419 return -1;
420 }
421 uint32_t *newData = (uint32_t *)uprv_malloc(capacity * 4);
422 if (newData == nullptr) {
423 return -1;
424 }
425 uprv_memcpy(newData, data, (size_t)dataLength * 4);
426 uprv_free(data);
427 data = newData;
428 dataCapacity = capacity;
429 }
430 dataLength = newTop;
431 return newBlock;
432 }
433
434 /**
435 * No error checking for illegal arguments.
436 *
437 * @return -1 if no new data block available (out of memory in data array)
438 * @internal
439 */
getDataBlock(int32_t i)440 int32_t MutableCodePointTrie::getDataBlock(int32_t i) {
441 if (flags[i] == MIXED) {
442 return index[i];
443 }
444 if (i < BMP_I_LIMIT) {
445 int32_t newBlock = allocDataBlock(UCPTRIE_FAST_DATA_BLOCK_LENGTH);
446 if (newBlock < 0) { return newBlock; }
447 int32_t iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1);
448 int32_t iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
449 do {
450 U_ASSERT(flags[iStart] == ALL_SAME);
451 writeBlock(data + newBlock, index[iStart]);
452 flags[iStart] = MIXED;
453 index[iStart++] = newBlock;
454 newBlock += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
455 } while (iStart < iLimit);
456 return index[i];
457 } else {
458 int32_t newBlock = allocDataBlock(UCPTRIE_SMALL_DATA_BLOCK_LENGTH);
459 if (newBlock < 0) { return newBlock; }
460 writeBlock(data + newBlock, index[i]);
461 flags[i] = MIXED;
462 index[i] = newBlock;
463 return newBlock;
464 }
465 }
466
set(UChar32 c,uint32_t value,UErrorCode & errorCode)467 void MutableCodePointTrie::set(UChar32 c, uint32_t value, UErrorCode &errorCode) {
468 if (U_FAILURE(errorCode)) {
469 return;
470 }
471 if ((uint32_t)c > MAX_UNICODE) {
472 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
473 return;
474 }
475
476 int32_t block;
477 if (!ensureHighStart(c) || (block = getDataBlock(c >> UCPTRIE_SHIFT_3)) < 0) {
478 errorCode = U_MEMORY_ALLOCATION_ERROR;
479 return;
480 }
481
482 data[block + (c & UCPTRIE_SMALL_DATA_MASK)] = value;
483 }
484
485 void
fillBlock(uint32_t * block,UChar32 start,UChar32 limit,uint32_t value)486 fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value) {
487 uint32_t *pLimit = block + limit;
488 block += start;
489 while (block < pLimit) {
490 *block++ = value;
491 }
492 }
493
setRange(UChar32 start,UChar32 end,uint32_t value,UErrorCode & errorCode)494 void MutableCodePointTrie::setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode) {
495 if (U_FAILURE(errorCode)) {
496 return;
497 }
498 if ((uint32_t)start > MAX_UNICODE || (uint32_t)end > MAX_UNICODE || start > end) {
499 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
500 return;
501 }
502 if (!ensureHighStart(end)) {
503 errorCode = U_MEMORY_ALLOCATION_ERROR;
504 return;
505 }
506
507 UChar32 limit = end + 1;
508 if (start & UCPTRIE_SMALL_DATA_MASK) {
509 // Set partial block at [start..following block boundary[.
510 int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);
511 if (block < 0) {
512 errorCode = U_MEMORY_ALLOCATION_ERROR;
513 return;
514 }
515
516 UChar32 nextStart = (start + UCPTRIE_SMALL_DATA_MASK) & ~UCPTRIE_SMALL_DATA_MASK;
517 if (nextStart <= limit) {
518 fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, UCPTRIE_SMALL_DATA_BLOCK_LENGTH,
519 value);
520 start = nextStart;
521 } else {
522 fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, limit & UCPTRIE_SMALL_DATA_MASK,
523 value);
524 return;
525 }
526 }
527
528 // Number of positions in the last, partial block.
529 int32_t rest = limit & UCPTRIE_SMALL_DATA_MASK;
530
531 // Round down limit to a block boundary.
532 limit &= ~UCPTRIE_SMALL_DATA_MASK;
533
534 // Iterate over all-value blocks.
535 while (start < limit) {
536 int32_t i = start >> UCPTRIE_SHIFT_3;
537 if (flags[i] == ALL_SAME) {
538 index[i] = value;
539 } else /* MIXED */ {
540 fillBlock(data + index[i], 0, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, value);
541 }
542 start += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
543 }
544
545 if (rest > 0) {
546 // Set partial block at [last block boundary..limit[.
547 int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);
548 if (block < 0) {
549 errorCode = U_MEMORY_ALLOCATION_ERROR;
550 return;
551 }
552
553 fillBlock(data + block, 0, rest, value);
554 }
555 }
556
557 /* compaction --------------------------------------------------------------- */
558
maskValues(uint32_t mask)559 void MutableCodePointTrie::maskValues(uint32_t mask) {
560 initialValue &= mask;
561 errorValue &= mask;
562 highValue &= mask;
563 int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
564 for (int32_t i = 0; i < iLimit; ++i) {
565 if (flags[i] == ALL_SAME) {
566 index[i] &= mask;
567 }
568 }
569 for (int32_t i = 0; i < dataLength; ++i) {
570 data[i] &= mask;
571 }
572 }
573
574 template<typename UIntA, typename UIntB>
equalBlocks(const UIntA * s,const UIntB * t,int32_t length)575 bool equalBlocks(const UIntA *s, const UIntB *t, int32_t length) {
576 while (length > 0 && *s == *t) {
577 ++s;
578 ++t;
579 --length;
580 }
581 return length == 0;
582 }
583
allValuesSameAs(const uint32_t * p,int32_t length,uint32_t value)584 bool allValuesSameAs(const uint32_t *p, int32_t length, uint32_t value) {
585 const uint32_t *pLimit = p + length;
586 while (p < pLimit && *p == value) { ++p; }
587 return p == pLimit;
588 }
589
590 /** Search for an identical block. */
findSameBlock(const uint16_t * p,int32_t pStart,int32_t length,const uint16_t * q,int32_t qStart,int32_t blockLength)591 int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length,
592 const uint16_t *q, int32_t qStart, int32_t blockLength) {
593 // Ensure that we do not even partially get past length.
594 length -= blockLength;
595
596 q += qStart;
597 while (pStart <= length) {
598 if (equalBlocks(p + pStart, q, blockLength)) {
599 return pStart;
600 }
601 ++pStart;
602 }
603 return -1;
604 }
605
findAllSameBlock(const uint32_t * p,int32_t start,int32_t limit,uint32_t value,int32_t blockLength)606 int32_t findAllSameBlock(const uint32_t *p, int32_t start, int32_t limit,
607 uint32_t value, int32_t blockLength) {
608 // Ensure that we do not even partially get past limit.
609 limit -= blockLength;
610
611 for (int32_t block = start; block <= limit; ++block) {
612 if (p[block] == value) {
613 for (int32_t i = 1;; ++i) {
614 if (i == blockLength) {
615 return block;
616 }
617 if (p[block + i] != value) {
618 block += i;
619 break;
620 }
621 }
622 }
623 }
624 return -1;
625 }
626
627 /**
628 * Look for maximum overlap of the beginning of the other block
629 * with the previous, adjacent block.
630 */
631 template<typename UIntA, typename UIntB>
getOverlap(const UIntA * p,int32_t length,const UIntB * q,int32_t qStart,int32_t blockLength)632 int32_t getOverlap(const UIntA *p, int32_t length,
633 const UIntB *q, int32_t qStart, int32_t blockLength) {
634 int32_t overlap = blockLength - 1;
635 U_ASSERT(overlap <= length);
636 q += qStart;
637 while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
638 --overlap;
639 }
640 return overlap;
641 }
642
getAllSameOverlap(const uint32_t * p,int32_t length,uint32_t value,int32_t blockLength)643 int32_t getAllSameOverlap(const uint32_t *p, int32_t length, uint32_t value,
644 int32_t blockLength) {
645 int32_t min = length - (blockLength - 1);
646 int32_t i = length;
647 while (min < i && p[i - 1] == value) { --i; }
648 return length - i;
649 }
650
isStartOfSomeFastBlock(uint32_t dataOffset,const uint32_t index[],int32_t fastILimit)651 bool isStartOfSomeFastBlock(uint32_t dataOffset, const uint32_t index[], int32_t fastILimit) {
652 for (int32_t i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
653 if (index[i] == dataOffset) {
654 return true;
655 }
656 }
657 return false;
658 }
659
660 /**
661 * Finds the start of the last range in the trie by enumerating backward.
662 * Indexes for code points higher than this will be omitted.
663 */
findHighStart() const664 UChar32 MutableCodePointTrie::findHighStart() const {
665 int32_t i = highStart >> UCPTRIE_SHIFT_3;
666 while (i > 0) {
667 bool match;
668 if (flags[--i] == ALL_SAME) {
669 match = index[i] == highValue;
670 } else /* MIXED */ {
671 const uint32_t *p = data + index[i];
672 for (int32_t j = 0;; ++j) {
673 if (j == UCPTRIE_SMALL_DATA_BLOCK_LENGTH) {
674 match = true;
675 break;
676 }
677 if (p[j] != highValue) {
678 match = false;
679 break;
680 }
681 }
682 }
683 if (!match) {
684 return (i + 1) << UCPTRIE_SHIFT_3;
685 }
686 }
687 return 0;
688 }
689
690 class AllSameBlocks {
691 public:
692 static constexpr int32_t NEW_UNIQUE = -1;
693 static constexpr int32_t OVERFLOW = -2;
694
AllSameBlocks()695 AllSameBlocks() : length(0), mostRecent(-1) {}
696
findOrAdd(int32_t index,int32_t count,uint32_t value)697 int32_t findOrAdd(int32_t index, int32_t count, uint32_t value) {
698 if (mostRecent >= 0 && values[mostRecent] == value) {
699 refCounts[mostRecent] += count;
700 return indexes[mostRecent];
701 }
702 for (int32_t i = 0; i < length; ++i) {
703 if (values[i] == value) {
704 mostRecent = i;
705 refCounts[i] += count;
706 return indexes[i];
707 }
708 }
709 if (length == CAPACITY) {
710 return OVERFLOW;
711 }
712 mostRecent = length;
713 indexes[length] = index;
714 values[length] = value;
715 refCounts[length++] = count;
716 return NEW_UNIQUE;
717 }
718
719 /** Replaces the block which has the lowest reference count. */
add(int32_t index,int32_t count,uint32_t value)720 void add(int32_t index, int32_t count, uint32_t value) {
721 U_ASSERT(length == CAPACITY);
722 int32_t least = -1;
723 int32_t leastCount = I_LIMIT;
724 for (int32_t i = 0; i < length; ++i) {
725 U_ASSERT(values[i] != value);
726 if (refCounts[i] < leastCount) {
727 least = i;
728 leastCount = refCounts[i];
729 }
730 }
731 U_ASSERT(least >= 0);
732 mostRecent = least;
733 indexes[least] = index;
734 values[least] = value;
735 refCounts[least] = count;
736 }
737
findMostUsed() const738 int32_t findMostUsed() const {
739 if (length == 0) { return -1; }
740 int32_t max = -1;
741 int32_t maxCount = 0;
742 for (int32_t i = 0; i < length; ++i) {
743 if (refCounts[i] > maxCount) {
744 max = i;
745 maxCount = refCounts[i];
746 }
747 }
748 return indexes[max];
749 }
750
751 private:
752 static constexpr int32_t CAPACITY = 32;
753
754 int32_t length;
755 int32_t mostRecent;
756
757 int32_t indexes[CAPACITY];
758 uint32_t values[CAPACITY];
759 int32_t refCounts[CAPACITY];
760 };
761
762 // Custom hash table for mixed-value blocks to be found anywhere in the
763 // compacted data or index so far.
764 class MixedBlocks {
765 public:
MixedBlocks()766 MixedBlocks() {}
~MixedBlocks()767 ~MixedBlocks() {
768 uprv_free(table);
769 }
770
init(int32_t maxLength,int32_t newBlockLength)771 bool init(int32_t maxLength, int32_t newBlockLength) {
772 // We store actual data indexes + 1 to reserve 0 for empty entries.
773 int32_t maxDataIndex = maxLength - newBlockLength + 1;
774 int32_t newLength;
775 if (maxDataIndex <= 0xfff) { // 4k
776 newLength = 6007;
777 shift = 12;
778 mask = 0xfff;
779 } else if (maxDataIndex <= 0x7fff) { // 32k
780 newLength = 50021;
781 shift = 15;
782 mask = 0x7fff;
783 } else if (maxDataIndex <= 0x1ffff) { // 128k
784 newLength = 200003;
785 shift = 17;
786 mask = 0x1ffff;
787 } else {
788 // maxDataIndex up to around MAX_DATA_LENGTH, ca. 1.1M
789 newLength = 1500007;
790 shift = 21;
791 mask = 0x1fffff;
792 }
793 if (newLength > capacity) {
794 uprv_free(table);
795 table = (uint32_t *)uprv_malloc(newLength * 4);
796 if (table == nullptr) {
797 return false;
798 }
799 capacity = newLength;
800 }
801 length = newLength;
802 uprv_memset(table, 0, length * 4);
803
804 blockLength = newBlockLength;
805 return true;
806 }
807
808 template<typename UInt>
extend(const UInt * data,int32_t minStart,int32_t prevDataLength,int32_t newDataLength)809 void extend(const UInt *data, int32_t minStart, int32_t prevDataLength, int32_t newDataLength) {
810 int32_t start = prevDataLength - blockLength;
811 if (start >= minStart) {
812 ++start; // Skip the last block that we added last time.
813 } else {
814 start = minStart; // Begin with the first full block.
815 }
816 for (int32_t end = newDataLength - blockLength; start <= end; ++start) {
817 uint32_t hashCode = makeHashCode(data, start);
818 addEntry(data, start, hashCode, start);
819 }
820 }
821
822 template<typename UIntA, typename UIntB>
findBlock(const UIntA * data,const UIntB * blockData,int32_t blockStart) const823 int32_t findBlock(const UIntA *data, const UIntB *blockData, int32_t blockStart) const {
824 uint32_t hashCode = makeHashCode(blockData, blockStart);
825 int32_t entryIndex = findEntry(data, blockData, blockStart, hashCode);
826 if (entryIndex >= 0) {
827 return (table[entryIndex] & mask) - 1;
828 } else {
829 return -1;
830 }
831 }
832
findAllSameBlock(const uint32_t * data,uint32_t blockValue) const833 int32_t findAllSameBlock(const uint32_t *data, uint32_t blockValue) const {
834 uint32_t hashCode = makeHashCode(blockValue);
835 int32_t entryIndex = findEntry(data, blockValue, hashCode);
836 if (entryIndex >= 0) {
837 return (table[entryIndex] & mask) - 1;
838 } else {
839 return -1;
840 }
841 }
842
843 private:
844 template<typename UInt>
makeHashCode(const UInt * blockData,int32_t blockStart) const845 uint32_t makeHashCode(const UInt *blockData, int32_t blockStart) const {
846 int32_t blockLimit = blockStart + blockLength;
847 uint32_t hashCode = blockData[blockStart++];
848 do {
849 hashCode = 37 * hashCode + blockData[blockStart++];
850 } while (blockStart < blockLimit);
851 return hashCode;
852 }
853
makeHashCode(uint32_t blockValue) const854 uint32_t makeHashCode(uint32_t blockValue) const {
855 uint32_t hashCode = blockValue;
856 for (int32_t i = 1; i < blockLength; ++i) {
857 hashCode = 37 * hashCode + blockValue;
858 }
859 return hashCode;
860 }
861
862 template<typename UInt>
addEntry(const UInt * data,int32_t blockStart,uint32_t hashCode,int32_t dataIndex)863 void addEntry(const UInt *data, int32_t blockStart, uint32_t hashCode, int32_t dataIndex) {
864 U_ASSERT(0 <= dataIndex && dataIndex < (int32_t)mask);
865 int32_t entryIndex = findEntry(data, data, blockStart, hashCode);
866 if (entryIndex < 0) {
867 table[~entryIndex] = (hashCode << shift) | (dataIndex + 1);
868 }
869 }
870
871 template<typename UIntA, typename UIntB>
findEntry(const UIntA * data,const UIntB * blockData,int32_t blockStart,uint32_t hashCode) const872 int32_t findEntry(const UIntA *data, const UIntB *blockData, int32_t blockStart,
873 uint32_t hashCode) const {
874 uint32_t shiftedHashCode = hashCode << shift;
875 int32_t initialEntryIndex = (hashCode % (length - 1)) + 1; // 1..length-1
876 for (int32_t entryIndex = initialEntryIndex;;) {
877 uint32_t entry = table[entryIndex];
878 if (entry == 0) {
879 return ~entryIndex;
880 }
881 if ((entry & ~mask) == shiftedHashCode) {
882 int32_t dataIndex = (entry & mask) - 1;
883 if (equalBlocks(data + dataIndex, blockData + blockStart, blockLength)) {
884 return entryIndex;
885 }
886 }
887 entryIndex = nextIndex(initialEntryIndex, entryIndex);
888 }
889 }
890
findEntry(const uint32_t * data,uint32_t blockValue,uint32_t hashCode) const891 int32_t findEntry(const uint32_t *data, uint32_t blockValue, uint32_t hashCode) const {
892 uint32_t shiftedHashCode = hashCode << shift;
893 int32_t initialEntryIndex = (hashCode % (length - 1)) + 1; // 1..length-1
894 for (int32_t entryIndex = initialEntryIndex;;) {
895 uint32_t entry = table[entryIndex];
896 if (entry == 0) {
897 return ~entryIndex;
898 }
899 if ((entry & ~mask) == shiftedHashCode) {
900 int32_t dataIndex = (entry & mask) - 1;
901 if (allValuesSameAs(data + dataIndex, blockLength, blockValue)) {
902 return entryIndex;
903 }
904 }
905 entryIndex = nextIndex(initialEntryIndex, entryIndex);
906 }
907 }
908
nextIndex(int32_t initialEntryIndex,int32_t entryIndex) const909 inline int32_t nextIndex(int32_t initialEntryIndex, int32_t entryIndex) const {
910 // U_ASSERT(0 < initialEntryIndex && initialEntryIndex < length);
911 return (entryIndex + initialEntryIndex) % length;
912 }
913
914 // Hash table.
915 // The length is a prime number, larger than the maximum data length.
916 // The "shift" lower bits store a data index + 1.
917 // The remaining upper bits store a partial hashCode of the block data values.
918 uint32_t *table = nullptr;
919 int32_t capacity = 0;
920 int32_t length = 0;
921 int32_t shift = 0;
922 uint32_t mask = 0;
923
924 int32_t blockLength = 0;
925 };
926
compactWholeDataBlocks(int32_t fastILimit,AllSameBlocks & allSameBlocks)927 int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) {
928 #ifdef UCPTRIE_DEBUG
929 bool overflow = false;
930 #endif
931
932 // ASCII data will be stored as a linear table, even if the following code
933 // does not yet count it that way.
934 int32_t newDataCapacity = ASCII_LIMIT;
935 // Add room for a small data null block in case it would match the start of
936 // a fast data block where dataNullOffset must not be set in that case.
937 newDataCapacity += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
938 // Add room for special values (errorValue, highValue) and padding.
939 newDataCapacity += 4;
940 int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
941 int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
942 int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
943 for (int32_t i = 0; i < iLimit; i += inc) {
944 if (i == fastILimit) {
945 blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
946 inc = 1;
947 }
948 uint32_t value = index[i];
949 if (flags[i] == MIXED) {
950 // Really mixed?
951 const uint32_t *p = data + value;
952 value = *p;
953 if (allValuesSameAs(p + 1, blockLength - 1, value)) {
954 flags[i] = ALL_SAME;
955 index[i] = value;
956 // Fall through to ALL_SAME handling.
957 } else {
958 newDataCapacity += blockLength;
959 continue;
960 }
961 } else {
962 U_ASSERT(flags[i] == ALL_SAME);
963 if (inc > 1) {
964 // Do all of the fast-range data block's ALL_SAME parts have the same value?
965 bool allSame = true;
966 int32_t next_i = i + inc;
967 for (int32_t j = i + 1; j < next_i; ++j) {
968 U_ASSERT(flags[j] == ALL_SAME);
969 if (index[j] != value) {
970 allSame = false;
971 break;
972 }
973 }
974 if (!allSame) {
975 // Turn it into a MIXED block.
976 if (getDataBlock(i) < 0) {
977 return -1;
978 }
979 newDataCapacity += blockLength;
980 continue;
981 }
982 }
983 }
984 // Is there another ALL_SAME block with the same value?
985 int32_t other = allSameBlocks.findOrAdd(i, inc, value);
986 if (other == AllSameBlocks::OVERFLOW) {
987 // The fixed-size array overflowed. Slow check for a duplicate block.
988 #ifdef UCPTRIE_DEBUG
989 if (!overflow) {
990 puts("UCPTrie AllSameBlocks overflow");
991 overflow = true;
992 }
993 #endif
994 int32_t jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
995 for (int32_t j = 0;; j += jInc) {
996 if (j == i) {
997 allSameBlocks.add(i, inc, value);
998 break;
999 }
1000 if (j == fastILimit) {
1001 jInc = 1;
1002 }
1003 if (flags[j] == ALL_SAME && index[j] == value) {
1004 allSameBlocks.add(j, jInc + inc, value);
1005 other = j;
1006 break;
1007 // We could keep counting blocks with the same value
1008 // before we add the first one, which may improve compaction in rare cases,
1009 // but it would make it slower.
1010 }
1011 }
1012 }
1013 if (other >= 0) {
1014 flags[i] = SAME_AS;
1015 index[i] = other;
1016 } else {
1017 // New unique same-value block.
1018 newDataCapacity += blockLength;
1019 }
1020 }
1021 return newDataCapacity;
1022 }
1023
1024 #ifdef UCPTRIE_DEBUG
1025 # define DEBUG_DO(expr) expr
1026 #else
1027 # define DEBUG_DO(expr)
1028 #endif
1029
1030 #ifdef UCPTRIE_DEBUG
1031 // Braille symbols: U+28xx = UTF-8 E2 A0 80..E2 A3 BF
appendValue(char s[],int32_t length,uint32_t value)1032 int32_t appendValue(char s[], int32_t length, uint32_t value) {
1033 value ^= value >> 16;
1034 value ^= value >> 8;
1035 s[length] = 0xE2;
1036 s[length + 1] = (char)(0xA0 + ((value >> 6) & 3));
1037 s[length + 2] = (char)(0x80 + (value & 0x3F));
1038 return length + 3;
1039 }
1040
printBlock(const uint32_t * block,int32_t blockLength,uint32_t value,UChar32 start,int32_t overlap,uint32_t initialValue)1041 void printBlock(const uint32_t *block, int32_t blockLength, uint32_t value,
1042 UChar32 start, int32_t overlap, uint32_t initialValue) {
1043 char s[UCPTRIE_FAST_DATA_BLOCK_LENGTH * 3 + 3];
1044 int32_t length = 0;
1045 int32_t i;
1046 for (i = 0; i < overlap; ++i) {
1047 length = appendValue(s, length, 0); // Braille blank
1048 }
1049 s[length++] = '|';
1050 for (; i < blockLength; ++i) {
1051 if (block != nullptr) {
1052 value = block[i];
1053 }
1054 if (value == initialValue) {
1055 value = 0x40; // Braille lower left dot
1056 }
1057 length = appendValue(s, length, value);
1058 }
1059 s[length] = 0;
1060 start += overlap;
1061 if (start <= 0xffff) {
1062 printf(" %04lX %s|\n", (long)start, s);
1063 } else if (start <= 0xfffff) {
1064 printf(" %5lX %s|\n", (long)start, s);
1065 } else {
1066 printf(" %6lX %s|\n", (long)start, s);
1067 }
1068 }
1069 #endif
1070
1071 /**
1072 * Compacts a build-time trie.
1073 *
1074 * The compaction
1075 * - removes blocks that are identical with earlier ones
1076 * - overlaps each new non-duplicate block as much as possible with the previously-written one
1077 * - works with fast-range data blocks whose length is a multiple of that of
1078 * higher-code-point data blocks
1079 *
1080 * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks.
1081 */
compactData(int32_t fastILimit,uint32_t * newData,int32_t newDataCapacity,int32_t dataNullIndex,MixedBlocks & mixedBlocks,UErrorCode & errorCode)1082 int32_t MutableCodePointTrie::compactData(
1083 int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
1084 int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode) {
1085 #ifdef UCPTRIE_DEBUG
1086 int32_t countSame=0, sumOverlaps=0;
1087 bool printData = dataLength == 29088 /* line.brk */ ||
1088 // dataLength == 30048 /* CanonIterData */ ||
1089 dataLength == 50400 /* zh.txt~stroke */;
1090 #endif
1091
1092 // The linear ASCII data has been copied into newData already.
1093 int32_t newDataLength = 0;
1094 for (int32_t i = 0; newDataLength < ASCII_LIMIT;
1095 newDataLength += UCPTRIE_FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
1096 index[i] = newDataLength;
1097 #ifdef UCPTRIE_DEBUG
1098 if (printData) {
1099 printBlock(newData + newDataLength, UCPTRIE_FAST_DATA_BLOCK_LENGTH, 0, newDataLength, 0, initialValue);
1100 }
1101 #endif
1102 }
1103
1104 int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
1105 if (!mixedBlocks.init(newDataCapacity, blockLength)) {
1106 errorCode = U_MEMORY_ALLOCATION_ERROR;
1107 return 0;
1108 }
1109 mixedBlocks.extend(newData, 0, 0, newDataLength);
1110
1111 int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
1112 int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
1113 int32_t fastLength = 0;
1114 for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) {
1115 if (i == fastILimit) {
1116 blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
1117 inc = 1;
1118 fastLength = newDataLength;
1119 if (!mixedBlocks.init(newDataCapacity, blockLength)) {
1120 errorCode = U_MEMORY_ALLOCATION_ERROR;
1121 return 0;
1122 }
1123 mixedBlocks.extend(newData, 0, 0, newDataLength);
1124 }
1125 if (flags[i] == ALL_SAME) {
1126 uint32_t value = index[i];
1127 // Find an earlier part of the data array of length blockLength
1128 // that is filled with this value.
1129 int32_t n = mixedBlocks.findAllSameBlock(newData, value);
1130 // If we find a match, and the current block is the data null block,
1131 // and it is not a fast block but matches the start of a fast block,
1132 // then we need to continue looking.
1133 // This is because this small block is shorter than the fast block,
1134 // and not all of the rest of the fast block is filled with this value.
1135 // Otherwise trie.getRange() would detect that the fast block starts at
1136 // dataNullOffset and assume incorrectly that it is filled with the null value.
1137 while (n >= 0 && i == dataNullIndex && i >= fastILimit && n < fastLength &&
1138 isStartOfSomeFastBlock(n, index, fastILimit)) {
1139 n = findAllSameBlock(newData, n + 1, newDataLength, value, blockLength);
1140 }
1141 if (n >= 0) {
1142 DEBUG_DO(++countSame);
1143 index[i] = n;
1144 } else {
1145 n = getAllSameOverlap(newData, newDataLength, value, blockLength);
1146 DEBUG_DO(sumOverlaps += n);
1147 #ifdef UCPTRIE_DEBUG
1148 if (printData) {
1149 printBlock(nullptr, blockLength, value, i << UCPTRIE_SHIFT_3, n, initialValue);
1150 }
1151 #endif
1152 index[i] = newDataLength - n;
1153 int32_t prevDataLength = newDataLength;
1154 while (n < blockLength) {
1155 newData[newDataLength++] = value;
1156 ++n;
1157 }
1158 mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
1159 }
1160 } else if (flags[i] == MIXED) {
1161 const uint32_t *block = data + index[i];
1162 int32_t n = mixedBlocks.findBlock(newData, block, 0);
1163 if (n >= 0) {
1164 DEBUG_DO(++countSame);
1165 index[i] = n;
1166 } else {
1167 n = getOverlap(newData, newDataLength, block, 0, blockLength);
1168 DEBUG_DO(sumOverlaps += n);
1169 #ifdef UCPTRIE_DEBUG
1170 if (printData) {
1171 printBlock(block, blockLength, 0, i << UCPTRIE_SHIFT_3, n, initialValue);
1172 }
1173 #endif
1174 index[i] = newDataLength - n;
1175 int32_t prevDataLength = newDataLength;
1176 while (n < blockLength) {
1177 newData[newDataLength++] = block[n++];
1178 }
1179 mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
1180 }
1181 } else /* SAME_AS */ {
1182 uint32_t j = index[i];
1183 index[i] = index[j];
1184 }
1185 }
1186
1187 #ifdef UCPTRIE_DEBUG
1188 /* we saved some space */
1189 printf("compacting UCPTrie: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n",
1190 (long)dataLength, (long)newDataLength, (long)countSame, (long)sumOverlaps);
1191 #endif
1192 return newDataLength;
1193 }
1194
compactIndex(int32_t fastILimit,MixedBlocks & mixedBlocks,UErrorCode & errorCode)1195 int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks,
1196 UErrorCode &errorCode) {
1197 int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3);
1198 if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) {
1199 // Only the linear fast index, no multi-stage index tables.
1200 index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
1201 return fastIndexLength;
1202 }
1203
1204 // Condense the fast index table.
1205 // Also, does it contain an index-3 block with all dataNullOffset?
1206 uint16_t fastIndex[UCPTRIE_BMP_INDEX_LENGTH]; // fastIndexLength
1207 int32_t i3FirstNull = -1;
1208 for (int32_t i = 0, j = 0; i < fastILimit; ++j) {
1209 uint32_t i3 = index[i];
1210 fastIndex[j] = (uint16_t)i3;
1211 if (i3 == (uint32_t)dataNullOffset) {
1212 if (i3FirstNull < 0) {
1213 i3FirstNull = j;
1214 } else if (index3NullOffset < 0 &&
1215 (j - i3FirstNull + 1) == UCPTRIE_INDEX_3_BLOCK_LENGTH) {
1216 index3NullOffset = i3FirstNull;
1217 }
1218 } else {
1219 i3FirstNull = -1;
1220 }
1221 // Set the index entries that compactData() skipped.
1222 // Needed when the multi-stage index covers the fast index range as well.
1223 int32_t iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
1224 while (++i < iNext) {
1225 i3 += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
1226 index[i] = i3;
1227 }
1228 }
1229
1230 if (!mixedBlocks.init(fastIndexLength, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
1231 errorCode = U_MEMORY_ALLOCATION_ERROR;
1232 return 0;
1233 }
1234 mixedBlocks.extend(fastIndex, 0, 0, fastIndexLength);
1235
1236 // Examine index-3 blocks. For each determine one of:
1237 // - same as the index-3 null block
1238 // - same as a fast-index block
1239 // - 16-bit indexes
1240 // - 18-bit indexes
1241 // We store this in the first flags entry for the index-3 block.
1242 //
1243 // Also determine an upper limit for the index-3 table length.
1244 int32_t index3Capacity = 0;
1245 i3FirstNull = index3NullOffset;
1246 bool hasLongI3Blocks = false;
1247 // If the fast index covers the whole BMP, then
1248 // the multi-stage index is only for supplementary code points.
1249 // Otherwise, the multi-stage index covers all of Unicode.
1250 int32_t iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT;
1251 int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
1252 for (int32_t i = iStart; i < iLimit;) {
1253 int32_t j = i;
1254 int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
1255 uint32_t oredI3 = 0;
1256 bool isNull = true;
1257 do {
1258 uint32_t i3 = index[j];
1259 oredI3 |= i3;
1260 if (i3 != (uint32_t)dataNullOffset) {
1261 isNull = false;
1262 }
1263 } while (++j < jLimit);
1264 if (isNull) {
1265 flags[i] = I3_NULL;
1266 if (i3FirstNull < 0) {
1267 if (oredI3 <= 0xffff) {
1268 index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
1269 } else {
1270 index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
1271 hasLongI3Blocks = true;
1272 }
1273 i3FirstNull = 0;
1274 }
1275 } else {
1276 if (oredI3 <= 0xffff) {
1277 int32_t n = mixedBlocks.findBlock(fastIndex, index, i);
1278 if (n >= 0) {
1279 flags[i] = I3_BMP;
1280 index[i] = n;
1281 } else {
1282 flags[i] = I3_16;
1283 index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
1284 }
1285 } else {
1286 flags[i] = I3_18;
1287 index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
1288 hasLongI3Blocks = true;
1289 }
1290 }
1291 i = j;
1292 }
1293
1294 int32_t index2Capacity = (iLimit - iStart) >> UCPTRIE_SHIFT_2_3;
1295
1296 // Length of the index-1 table, rounded up.
1297 int32_t index1Length = (index2Capacity + UCPTRIE_INDEX_2_MASK) >> UCPTRIE_SHIFT_1_2;
1298
1299 // Index table: Fast index, index-1, index-3, index-2.
1300 // +1 for possible index table padding.
1301 int32_t index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1;
1302 index16 = (uint16_t *)uprv_malloc(index16Capacity * 2);
1303 if (index16 == nullptr) {
1304 errorCode = U_MEMORY_ALLOCATION_ERROR;
1305 return 0;
1306 }
1307 uprv_memcpy(index16, fastIndex, fastIndexLength * 2);
1308
1309 if (!mixedBlocks.init(index16Capacity, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
1310 errorCode = U_MEMORY_ALLOCATION_ERROR;
1311 return 0;
1312 }
1313 MixedBlocks longI3Blocks;
1314 if (hasLongI3Blocks) {
1315 if (!longI3Blocks.init(index16Capacity, INDEX_3_18BIT_BLOCK_LENGTH)) {
1316 errorCode = U_MEMORY_ALLOCATION_ERROR;
1317 return 0;
1318 }
1319 }
1320
1321 // Compact the index-3 table and write an uncompacted version of the index-2 table.
1322 uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2]; // index2Capacity
1323 int32_t i2Length = 0;
1324 i3FirstNull = index3NullOffset;
1325 int32_t index3Start = fastIndexLength + index1Length;
1326 int32_t indexLength = index3Start;
1327 for (int32_t i = iStart; i < iLimit; i += UCPTRIE_INDEX_3_BLOCK_LENGTH) {
1328 int32_t i3;
1329 uint8_t f = flags[i];
1330 if (f == I3_NULL && i3FirstNull < 0) {
1331 // First index-3 null block. Write & overlap it like a normal block, then remember it.
1332 f = dataNullOffset <= 0xffff ? I3_16 : I3_18;
1333 i3FirstNull = 0;
1334 }
1335 if (f == I3_NULL) {
1336 i3 = index3NullOffset;
1337 } else if (f == I3_BMP) {
1338 i3 = index[i];
1339 } else if (f == I3_16) {
1340 int32_t n = mixedBlocks.findBlock(index16, index, i);
1341 if (n >= 0) {
1342 i3 = n;
1343 } else {
1344 if (indexLength == index3Start) {
1345 // No overlap at the boundary between the index-1 and index-3 tables.
1346 n = 0;
1347 } else {
1348 n = getOverlap(index16, indexLength,
1349 index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
1350 }
1351 i3 = indexLength - n;
1352 int32_t prevIndexLength = indexLength;
1353 while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) {
1354 index16[indexLength++] = index[i + n++];
1355 }
1356 mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
1357 if (hasLongI3Blocks) {
1358 longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
1359 }
1360 }
1361 } else {
1362 U_ASSERT(f == I3_18);
1363 U_ASSERT(hasLongI3Blocks);
1364 // Encode an index-3 block that contains one or more data indexes exceeding 16 bits.
1365 int32_t j = i;
1366 int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
1367 int32_t k = indexLength;
1368 do {
1369 ++k;
1370 uint32_t v = index[j++];
1371 uint32_t upperBits = (v & 0x30000) >> 2;
1372 index16[k++] = v;
1373 v = index[j++];
1374 upperBits |= (v & 0x30000) >> 4;
1375 index16[k++] = v;
1376 v = index[j++];
1377 upperBits |= (v & 0x30000) >> 6;
1378 index16[k++] = v;
1379 v = index[j++];
1380 upperBits |= (v & 0x30000) >> 8;
1381 index16[k++] = v;
1382 v = index[j++];
1383 upperBits |= (v & 0x30000) >> 10;
1384 index16[k++] = v;
1385 v = index[j++];
1386 upperBits |= (v & 0x30000) >> 12;
1387 index16[k++] = v;
1388 v = index[j++];
1389 upperBits |= (v & 0x30000) >> 14;
1390 index16[k++] = v;
1391 v = index[j++];
1392 upperBits |= (v & 0x30000) >> 16;
1393 index16[k++] = v;
1394 index16[k - 9] = upperBits;
1395 } while (j < jLimit);
1396 int32_t n = longI3Blocks.findBlock(index16, index16, indexLength);
1397 if (n >= 0) {
1398 i3 = n | 0x8000;
1399 } else {
1400 if (indexLength == index3Start) {
1401 // No overlap at the boundary between the index-1 and index-3 tables.
1402 n = 0;
1403 } else {
1404 n = getOverlap(index16, indexLength,
1405 index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);
1406 }
1407 i3 = (indexLength - n) | 0x8000;
1408 int32_t prevIndexLength = indexLength;
1409 if (n > 0) {
1410 int32_t start = indexLength;
1411 while (n < INDEX_3_18BIT_BLOCK_LENGTH) {
1412 index16[indexLength++] = index16[start + n++];
1413 }
1414 } else {
1415 indexLength += INDEX_3_18BIT_BLOCK_LENGTH;
1416 }
1417 mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
1418 if (hasLongI3Blocks) {
1419 longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
1420 }
1421 }
1422 }
1423 if (index3NullOffset < 0 && i3FirstNull >= 0) {
1424 index3NullOffset = i3;
1425 }
1426 // Set the index-2 table entry.
1427 index2[i2Length++] = i3;
1428 }
1429 U_ASSERT(i2Length == index2Capacity);
1430 U_ASSERT(indexLength <= index3Start + index3Capacity);
1431
1432 if (index3NullOffset < 0) {
1433 index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
1434 }
1435 if (indexLength >= (UCPTRIE_NO_INDEX3_NULL_OFFSET + UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
1436 // The index-3 offsets exceed 15 bits, or
1437 // the last one cannot be distinguished from the no-null-block value.
1438 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1439 return 0;
1440 }
1441
1442 // Compact the index-2 table and write the index-1 table.
1443 static_assert(UCPTRIE_INDEX_2_BLOCK_LENGTH == UCPTRIE_INDEX_3_BLOCK_LENGTH,
1444 "must re-init mixedBlocks");
1445 int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH;
1446 int32_t i1 = fastIndexLength;
1447 for (int32_t i = 0; i < i2Length; i += blockLength) {
1448 int32_t n;
1449 if ((i2Length - i) >= blockLength) {
1450 // normal block
1451 U_ASSERT(blockLength == UCPTRIE_INDEX_2_BLOCK_LENGTH);
1452 n = mixedBlocks.findBlock(index16, index2, i);
1453 } else {
1454 // highStart is inside the last index-2 block. Shorten it.
1455 blockLength = i2Length - i;
1456 n = findSameBlock(index16, index3Start, indexLength,
1457 index2, i, blockLength);
1458 }
1459 int32_t i2;
1460 if (n >= 0) {
1461 i2 = n;
1462 } else {
1463 if (indexLength == index3Start) {
1464 // No overlap at the boundary between the index-1 and index-3/2 tables.
1465 n = 0;
1466 } else {
1467 n = getOverlap(index16, indexLength, index2, i, blockLength);
1468 }
1469 i2 = indexLength - n;
1470 int32_t prevIndexLength = indexLength;
1471 while (n < blockLength) {
1472 index16[indexLength++] = index2[i + n++];
1473 }
1474 mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
1475 }
1476 // Set the index-1 table entry.
1477 index16[i1++] = i2;
1478 }
1479 U_ASSERT(i1 == index3Start);
1480 U_ASSERT(indexLength <= index16Capacity);
1481
1482 #ifdef UCPTRIE_DEBUG
1483 /* we saved some space */
1484 printf("compacting UCPTrie: count of 16-bit index words %lu->%lu\n",
1485 (long)iLimit, (long)indexLength);
1486 #endif
1487
1488 return indexLength;
1489 }
1490
compactTrie(int32_t fastILimit,UErrorCode & errorCode)1491 int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorCode) {
1492 // Find the real highStart and round it up.
1493 U_ASSERT((highStart & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0);
1494 highValue = get(MAX_UNICODE);
1495 int32_t realHighStart = findHighStart();
1496 realHighStart = (realHighStart + (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) &
1497 ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
1498 if (realHighStart == UNICODE_LIMIT) {
1499 highValue = initialValue;
1500 }
1501
1502 #ifdef UCPTRIE_DEBUG
1503 printf("UCPTrie: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n",
1504 (long)realHighStart, (long)highValue, (long)initialValue);
1505 #endif
1506
1507 // We always store indexes and data values for the fast range.
1508 // Pin highStart to the top of that range while building.
1509 UChar32 fastLimit = fastILimit << UCPTRIE_SHIFT_3;
1510 if (realHighStart < fastLimit) {
1511 for (int32_t i = (realHighStart >> UCPTRIE_SHIFT_3); i < fastILimit; ++i) {
1512 flags[i] = ALL_SAME;
1513 index[i] = highValue;
1514 }
1515 highStart = fastLimit;
1516 } else {
1517 highStart = realHighStart;
1518 }
1519
1520 uint32_t asciiData[ASCII_LIMIT];
1521 for (int32_t i = 0; i < ASCII_LIMIT; ++i) {
1522 asciiData[i] = get(i);
1523 }
1524
1525 // First we look for which data blocks have the same value repeated over the whole block,
1526 // deduplicate such blocks, find a good null data block (for faster enumeration),
1527 // and get an upper bound for the necessary data array length.
1528 AllSameBlocks allSameBlocks;
1529 int32_t newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks);
1530 if (newDataCapacity < 0) {
1531 errorCode = U_MEMORY_ALLOCATION_ERROR;
1532 return 0;
1533 }
1534 uint32_t *newData = (uint32_t *)uprv_malloc(newDataCapacity * 4);
1535 if (newData == nullptr) {
1536 errorCode = U_MEMORY_ALLOCATION_ERROR;
1537 return 0;
1538 }
1539 uprv_memcpy(newData, asciiData, sizeof(asciiData));
1540
1541 int32_t dataNullIndex = allSameBlocks.findMostUsed();
1542
1543 MixedBlocks mixedBlocks;
1544 int32_t newDataLength = compactData(fastILimit, newData, newDataCapacity,
1545 dataNullIndex, mixedBlocks, errorCode);
1546 if (U_FAILURE(errorCode)) { return 0; }
1547 U_ASSERT(newDataLength <= newDataCapacity);
1548 uprv_free(data);
1549 data = newData;
1550 dataCapacity = newDataCapacity;
1551 dataLength = newDataLength;
1552 if (dataLength > (0x3ffff + UCPTRIE_SMALL_DATA_BLOCK_LENGTH)) {
1553 // The offset of the last data block is too high to be stored in the index table.
1554 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1555 return 0;
1556 }
1557
1558 if (dataNullIndex >= 0) {
1559 dataNullOffset = index[dataNullIndex];
1560 #ifdef UCPTRIE_DEBUG
1561 if (data[dataNullOffset] != initialValue) {
1562 printf("UCPTrie initialValue %lx -> more common nullValue %lx\n",
1563 (long)initialValue, (long)data[dataNullOffset]);
1564 }
1565 #endif
1566 initialValue = data[dataNullOffset];
1567 } else {
1568 dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET;
1569 }
1570
1571 int32_t indexLength = compactIndex(fastILimit, mixedBlocks, errorCode);
1572 highStart = realHighStart;
1573 return indexLength;
1574 }
1575
build(UCPTrieType type,UCPTrieValueWidth valueWidth,UErrorCode & errorCode)1576 UCPTrie *MutableCodePointTrie::build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) {
1577 if (U_FAILURE(errorCode)) {
1578 return nullptr;
1579 }
1580 if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type ||
1581 valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth) {
1582 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1583 return nullptr;
1584 }
1585
1586 // The mutable trie always stores 32-bit values.
1587 // When we build a UCPTrie for a smaller value width, we first mask off unused bits
1588 // before compacting the data.
1589 switch (valueWidth) {
1590 case UCPTRIE_VALUE_BITS_32:
1591 break;
1592 case UCPTRIE_VALUE_BITS_16:
1593 maskValues(0xffff);
1594 break;
1595 case UCPTRIE_VALUE_BITS_8:
1596 maskValues(0xff);
1597 break;
1598 default:
1599 break;
1600 }
1601
1602 UChar32 fastLimit = type == UCPTRIE_TYPE_FAST ? BMP_LIMIT : UCPTRIE_SMALL_LIMIT;
1603 int32_t indexLength = compactTrie(fastLimit >> UCPTRIE_SHIFT_3, errorCode);
1604 if (U_FAILURE(errorCode)) {
1605 clear();
1606 return nullptr;
1607 }
1608
1609 // Ensure data table alignment: The index length must be even for uint32_t data.
1610 if (valueWidth == UCPTRIE_VALUE_BITS_32 && (indexLength & 1) != 0) {
1611 index16[indexLength++] = 0xffee; // arbitrary value
1612 }
1613
1614 // Make the total trie structure length a multiple of 4 bytes by padding the data table,
1615 // and store special values as the last two data values.
1616 int32_t length = indexLength * 2;
1617 if (valueWidth == UCPTRIE_VALUE_BITS_16) {
1618 if (((indexLength ^ dataLength) & 1) != 0) {
1619 // padding
1620 data[dataLength++] = errorValue;
1621 }
1622 if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
1623 data[dataLength++] = highValue;
1624 data[dataLength++] = errorValue;
1625 }
1626 length += dataLength * 2;
1627 } else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
1628 // 32-bit data words never need padding to a multiple of 4 bytes.
1629 if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
1630 if (data[dataLength - 1] != highValue) {
1631 data[dataLength++] = highValue;
1632 }
1633 data[dataLength++] = errorValue;
1634 }
1635 length += dataLength * 4;
1636 } else {
1637 int32_t and3 = (length + dataLength) & 3;
1638 if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) {
1639 // all set
1640 } else if(and3 == 3 && data[dataLength - 1] == highValue) {
1641 data[dataLength++] = errorValue;
1642 } else {
1643 while (and3 != 2) {
1644 data[dataLength++] = highValue;
1645 and3 = (and3 + 1) & 3;
1646 }
1647 data[dataLength++] = highValue;
1648 data[dataLength++] = errorValue;
1649 }
1650 length += dataLength;
1651 }
1652
1653 // Calculate the total length of the UCPTrie as a single memory block.
1654 length += sizeof(UCPTrie);
1655 U_ASSERT((length & 3) == 0);
1656
1657 uint8_t *bytes = (uint8_t *)uprv_malloc(length);
1658 if (bytes == nullptr) {
1659 errorCode = U_MEMORY_ALLOCATION_ERROR;
1660 clear();
1661 return nullptr;
1662 }
1663 UCPTrie *trie = reinterpret_cast<UCPTrie *>(bytes);
1664 uprv_memset(trie, 0, sizeof(UCPTrie));
1665 trie->indexLength = indexLength;
1666 trie->dataLength = dataLength;
1667
1668 trie->highStart = highStart;
1669 // Round up shifted12HighStart to a multiple of 0x1000 for easy testing from UTF-8 lead bytes.
1670 // Runtime code needs to then test for the real highStart as well.
1671 trie->shifted12HighStart = (highStart + 0xfff) >> 12;
1672 trie->type = type;
1673 trie->valueWidth = valueWidth;
1674
1675 trie->index3NullOffset = index3NullOffset;
1676 trie->dataNullOffset = dataNullOffset;
1677 trie->nullValue = initialValue;
1678
1679 bytes += sizeof(UCPTrie);
1680
1681 // Fill the index and data arrays.
1682 uint16_t *dest16 = (uint16_t *)bytes;
1683 trie->index = dest16;
1684
1685 if (highStart <= fastLimit) {
1686 // Condense only the fast index from the mutable-trie index.
1687 for (int32_t i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) {
1688 *dest16++ = (uint16_t)index[i]; // dest16[j]
1689 }
1690 } else {
1691 uprv_memcpy(dest16, index16, indexLength * 2);
1692 dest16 += indexLength;
1693 }
1694 bytes += indexLength * 2;
1695
1696 // Write the data array.
1697 const uint32_t *p = data;
1698 switch (valueWidth) {
1699 case UCPTRIE_VALUE_BITS_16:
1700 // Write 16-bit data values.
1701 trie->data.ptr16 = dest16;
1702 for (int32_t i = dataLength; i > 0; --i) {
1703 *dest16++ = (uint16_t)*p++;
1704 }
1705 break;
1706 case UCPTRIE_VALUE_BITS_32:
1707 // Write 32-bit data values.
1708 trie->data.ptr32 = (uint32_t *)bytes;
1709 uprv_memcpy(bytes, p, (size_t)dataLength * 4);
1710 break;
1711 case UCPTRIE_VALUE_BITS_8:
1712 // Write 8-bit data values.
1713 trie->data.ptr8 = bytes;
1714 for (int32_t i = dataLength; i > 0; --i) {
1715 *bytes++ = (uint8_t)*p++;
1716 }
1717 break;
1718 default:
1719 // Will not occur, valueWidth checked at the beginning.
1720 break;
1721 }
1722
1723 #ifdef UCPTRIE_DEBUG
1724 trie->name = name;
1725
1726 ucptrie_printLengths(trie, "");
1727 #endif
1728
1729 clear();
1730 return trie;
1731 }
1732
1733 } // namespace
1734
1735 U_NAMESPACE_END
1736
1737 U_NAMESPACE_USE
1738
1739 U_CAPI UMutableCPTrie * U_EXPORT2
umutablecptrie_open(uint32_t initialValue,uint32_t errorValue,UErrorCode * pErrorCode)1740 umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
1741 if (U_FAILURE(*pErrorCode)) {
1742 return nullptr;
1743 }
1744 LocalPointer<MutableCodePointTrie> trie(
1745 new MutableCodePointTrie(initialValue, errorValue, *pErrorCode), *pErrorCode);
1746 if (U_FAILURE(*pErrorCode)) {
1747 return nullptr;
1748 }
1749 return reinterpret_cast<UMutableCPTrie *>(trie.orphan());
1750 }
1751
1752 U_CAPI UMutableCPTrie * U_EXPORT2
umutablecptrie_clone(const UMutableCPTrie * other,UErrorCode * pErrorCode)1753 umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode) {
1754 if (U_FAILURE(*pErrorCode)) {
1755 return nullptr;
1756 }
1757 if (other == nullptr) {
1758 return nullptr;
1759 }
1760 LocalPointer<MutableCodePointTrie> clone(
1761 new MutableCodePointTrie(*reinterpret_cast<const MutableCodePointTrie *>(other), *pErrorCode), *pErrorCode);
1762 if (U_FAILURE(*pErrorCode)) {
1763 return nullptr;
1764 }
1765 return reinterpret_cast<UMutableCPTrie *>(clone.orphan());
1766 }
1767
1768 U_CAPI void U_EXPORT2
umutablecptrie_close(UMutableCPTrie * trie)1769 umutablecptrie_close(UMutableCPTrie *trie) {
1770 delete reinterpret_cast<MutableCodePointTrie *>(trie);
1771 }
1772
1773 U_CAPI UMutableCPTrie * U_EXPORT2
umutablecptrie_fromUCPMap(const UCPMap * map,UErrorCode * pErrorCode)1774 umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode) {
1775 if (U_FAILURE(*pErrorCode)) {
1776 return nullptr;
1777 }
1778 if (map == nullptr) {
1779 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
1780 return nullptr;
1781 }
1782 return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPMap(map, *pErrorCode));
1783 }
1784
1785 U_CAPI UMutableCPTrie * U_EXPORT2
umutablecptrie_fromUCPTrie(const UCPTrie * trie,UErrorCode * pErrorCode)1786 umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) {
1787 if (U_FAILURE(*pErrorCode)) {
1788 return nullptr;
1789 }
1790 if (trie == nullptr) {
1791 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
1792 return nullptr;
1793 }
1794 return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPTrie(trie, *pErrorCode));
1795 }
1796
1797 U_CAPI uint32_t U_EXPORT2
umutablecptrie_get(const UMutableCPTrie * trie,UChar32 c)1798 umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c) {
1799 return reinterpret_cast<const MutableCodePointTrie *>(trie)->get(c);
1800 }
1801
1802 namespace {
1803
getRange(const void * trie,UChar32 start,UCPMapValueFilter * filter,const void * context,uint32_t * pValue)1804 UChar32 getRange(const void *trie, UChar32 start,
1805 UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
1806 return reinterpret_cast<const MutableCodePointTrie *>(trie)->
1807 getRange(start, filter, context, pValue);
1808 }
1809
1810 } // namespace
1811
1812 U_CAPI UChar32 U_EXPORT2
umutablecptrie_getRange(const UMutableCPTrie * trie,UChar32 start,UCPMapRangeOption option,uint32_t surrogateValue,UCPMapValueFilter * filter,const void * context,uint32_t * pValue)1813 umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,
1814 UCPMapRangeOption option, uint32_t surrogateValue,
1815 UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
1816 return ucptrie_internalGetRange(getRange, trie, start,
1817 option, surrogateValue,
1818 filter, context, pValue);
1819 }
1820
1821 U_CAPI void U_EXPORT2
umutablecptrie_set(UMutableCPTrie * trie,UChar32 c,uint32_t value,UErrorCode * pErrorCode)1822 umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
1823 if (U_FAILURE(*pErrorCode)) {
1824 return;
1825 }
1826 reinterpret_cast<MutableCodePointTrie *>(trie)->set(c, value, *pErrorCode);
1827 }
1828
1829 U_CAPI void U_EXPORT2
umutablecptrie_setRange(UMutableCPTrie * trie,UChar32 start,UChar32 end,uint32_t value,UErrorCode * pErrorCode)1830 umutablecptrie_setRange(UMutableCPTrie *trie, UChar32 start, UChar32 end,
1831 uint32_t value, UErrorCode *pErrorCode) {
1832 if (U_FAILURE(*pErrorCode)) {
1833 return;
1834 }
1835 reinterpret_cast<MutableCodePointTrie *>(trie)->setRange(start, end, value, *pErrorCode);
1836 }
1837
1838 /* Compact and internally serialize the trie. */
1839 U_CAPI UCPTrie * U_EXPORT2
umutablecptrie_buildImmutable(UMutableCPTrie * trie,UCPTrieType type,UCPTrieValueWidth valueWidth,UErrorCode * pErrorCode)1840 umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth,
1841 UErrorCode *pErrorCode) {
1842 if (U_FAILURE(*pErrorCode)) {
1843 return nullptr;
1844 }
1845 return reinterpret_cast<MutableCodePointTrie *>(trie)->build(type, valueWidth, *pErrorCode);
1846 }
1847
1848 #ifdef UCPTRIE_DEBUG
umutablecptrie_setName(UMutableCPTrie * trie,const char * name)1849 U_CFUNC void umutablecptrie_setName(UMutableCPTrie *trie, const char *name) {
1850 reinterpret_cast<MutableCodePointTrie *>(trie)->name = name;
1851 }
1852 #endif
1853