1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // umutablecptrie.cpp (inspired by utrie2_builder.cpp)
5 // created: 2017dec29 Markus W. Scherer
6
7 // #define UCPTRIE_DEBUG
8 #ifdef UCPTRIE_DEBUG
9 # include <stdio.h>
10 #endif
11
12 #include "unicode/utypes.h"
13 #include "unicode/ucptrie.h"
14 #include "unicode/umutablecptrie.h"
15 #include "unicode/uobject.h"
16 #include "unicode/utf16.h"
17 #include "cmemory.h"
18 #include "uassert.h"
19 #include "ucptrie_impl.h"
20
21 U_NAMESPACE_BEGIN
22
23 namespace {
24
25 constexpr int32_t MAX_UNICODE = 0x10ffff;
26
27 constexpr int32_t UNICODE_LIMIT = 0x110000;
28 constexpr int32_t BMP_LIMIT = 0x10000;
29 constexpr int32_t ASCII_LIMIT = 0x80;
30
31 constexpr int32_t I_LIMIT = UNICODE_LIMIT >> UCPTRIE_SHIFT_3;
32 constexpr int32_t BMP_I_LIMIT = BMP_LIMIT >> UCPTRIE_SHIFT_3;
33 constexpr int32_t ASCII_I_LIMIT = ASCII_LIMIT >> UCPTRIE_SHIFT_3;
34
35 constexpr int32_t SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3));
36
37 // Flag values for data blocks.
38 constexpr uint8_t ALL_SAME = 0;
39 constexpr uint8_t MIXED = 1;
40 constexpr uint8_t SAME_AS = 2;
41
42 /** Start with allocation of 16k data entries. */
43 constexpr int32_t INITIAL_DATA_LENGTH = ((int32_t)1 << 14);
44
45 /** Grow about 8x each time. */
46 constexpr int32_t MEDIUM_DATA_LENGTH = ((int32_t)1 << 17);
47
48 /**
49 * Maximum length of the build-time data array.
50 * One entry per 0x110000 code points.
51 */
52 constexpr int32_t MAX_DATA_LENGTH = UNICODE_LIMIT;
53
54 // Flag values for index-3 blocks while compacting/building.
55 constexpr uint8_t I3_NULL = 0;
56 constexpr uint8_t I3_BMP = 1;
57 constexpr uint8_t I3_16 = 2;
58 constexpr uint8_t I3_18 = 3;
59
60 constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8;
61
62 class AllSameBlocks;
63 class MixedBlocks;
64
65 class MutableCodePointTrie : public UMemory {
66 public:
67 MutableCodePointTrie(uint32_t initialValue, uint32_t errorValue, UErrorCode &errorCode);
68 MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode);
69 MutableCodePointTrie(const MutableCodePointTrie &other) = delete;
70 ~MutableCodePointTrie();
71
72 MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete;
73
74 static MutableCodePointTrie *fromUCPMap(const UCPMap *map, UErrorCode &errorCode);
75 static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode);
76
77 uint32_t get(UChar32 c) const;
78 int32_t getRange(UChar32 start, UCPMapValueFilter *filter, const void *context,
79 uint32_t *pValue) const;
80
81 void set(UChar32 c, uint32_t value, UErrorCode &errorCode);
82 void setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode);
83
84 UCPTrie *build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode);
85
86 private:
87 void clear();
88
89 bool ensureHighStart(UChar32 c);
90 int32_t allocDataBlock(int32_t blockLength);
91 int32_t getDataBlock(int32_t i);
92
93 void maskValues(uint32_t mask);
94 UChar32 findHighStart() const;
95 int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks);
96 int32_t compactData(
97 int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
98 int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
99 int32_t compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
100 int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode);
101
102 uint32_t *index = nullptr;
103 int32_t indexCapacity = 0;
104 int32_t index3NullOffset = -1;
105 uint32_t *data = nullptr;
106 int32_t dataCapacity = 0;
107 int32_t dataLength = 0;
108 int32_t dataNullOffset = -1;
109
110 uint32_t origInitialValue;
111 uint32_t initialValue;
112 uint32_t errorValue;
113 UChar32 highStart;
114 uint32_t highValue;
115 #ifdef UCPTRIE_DEBUG
116 public:
117 const char *name;
118 #endif
119 private:
120 /** Temporary array while building the final data. */
121 uint16_t *index16 = nullptr;
122 uint8_t flags[UNICODE_LIMIT >> UCPTRIE_SHIFT_3];
123 };
124
MutableCodePointTrie(uint32_t iniValue,uint32_t errValue,UErrorCode & errorCode)125 MutableCodePointTrie::MutableCodePointTrie(uint32_t iniValue, uint32_t errValue, UErrorCode &errorCode) :
126 origInitialValue(iniValue), initialValue(iniValue), errorValue(errValue),
127 highStart(0), highValue(initialValue)
128 #ifdef UCPTRIE_DEBUG
129 , name("open")
130 #endif
131 {
132 if (U_FAILURE(errorCode)) { return; }
133 index = (uint32_t *)uprv_malloc(BMP_I_LIMIT * 4);
134 data = (uint32_t *)uprv_malloc(INITIAL_DATA_LENGTH * 4);
135 if (index == nullptr || data == nullptr) {
136 errorCode = U_MEMORY_ALLOCATION_ERROR;
137 return;
138 }
139 indexCapacity = BMP_I_LIMIT;
140 dataCapacity = INITIAL_DATA_LENGTH;
141 }
142
MutableCodePointTrie(const MutableCodePointTrie & other,UErrorCode & errorCode)143 MutableCodePointTrie::MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode) :
144 index3NullOffset(other.index3NullOffset),
145 dataNullOffset(other.dataNullOffset),
146 origInitialValue(other.origInitialValue), initialValue(other.initialValue),
147 errorValue(other.errorValue),
148 highStart(other.highStart), highValue(other.highValue)
149 #ifdef UCPTRIE_DEBUG
150 , name("mutable clone")
151 #endif
152 {
153 if (U_FAILURE(errorCode)) { return; }
154 int32_t iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT;
155 index = (uint32_t *)uprv_malloc(iCapacity * 4);
156 data = (uint32_t *)uprv_malloc(other.dataCapacity * 4);
157 if (index == nullptr || data == nullptr) {
158 errorCode = U_MEMORY_ALLOCATION_ERROR;
159 return;
160 }
161 indexCapacity = iCapacity;
162 dataCapacity = other.dataCapacity;
163
164 int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
165 uprv_memcpy(flags, other.flags, iLimit);
166 uprv_memcpy(index, other.index, iLimit * 4);
167 uprv_memcpy(data, other.data, (size_t)other.dataLength * 4);
168 dataLength = other.dataLength;
169 U_ASSERT(other.index16 == nullptr);
170 }
171
~MutableCodePointTrie()172 MutableCodePointTrie::~MutableCodePointTrie() {
173 uprv_free(index);
174 uprv_free(data);
175 uprv_free(index16);
176 }
177
fromUCPMap(const UCPMap * map,UErrorCode & errorCode)178 MutableCodePointTrie *MutableCodePointTrie::fromUCPMap(const UCPMap *map, UErrorCode &errorCode) {
179 // Use the highValue as the initialValue to reduce the highStart.
180 uint32_t errorValue = ucpmap_get(map, -1);
181 uint32_t initialValue = ucpmap_get(map, 0x10ffff);
182 LocalPointer<MutableCodePointTrie> mutableTrie(
183 new MutableCodePointTrie(initialValue, errorValue, errorCode),
184 errorCode);
185 if (U_FAILURE(errorCode)) {
186 return nullptr;
187 }
188 UChar32 start = 0, end;
189 uint32_t value;
190 while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
191 nullptr, nullptr, &value)) >= 0) {
192 if (value != initialValue) {
193 if (start == end) {
194 mutableTrie->set(start, value, errorCode);
195 } else {
196 mutableTrie->setRange(start, end, value, errorCode);
197 }
198 }
199 start = end + 1;
200 }
201 if (U_SUCCESS(errorCode)) {
202 return mutableTrie.orphan();
203 } else {
204 return nullptr;
205 }
206 }
207
fromUCPTrie(const UCPTrie * trie,UErrorCode & errorCode)208 MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) {
209 // Use the highValue as the initialValue to reduce the highStart.
210 uint32_t errorValue;
211 uint32_t initialValue;
212 switch (trie->valueWidth) {
213 case UCPTRIE_VALUE_BITS_16:
214 errorValue = trie->data.ptr16[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
215 initialValue = trie->data.ptr16[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
216 break;
217 case UCPTRIE_VALUE_BITS_32:
218 errorValue = trie->data.ptr32[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
219 initialValue = trie->data.ptr32[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
220 break;
221 case UCPTRIE_VALUE_BITS_8:
222 errorValue = trie->data.ptr8[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
223 initialValue = trie->data.ptr8[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
224 break;
225 default:
226 // Unreachable if the trie is properly initialized.
227 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
228 return nullptr;
229 }
230 LocalPointer<MutableCodePointTrie> mutableTrie(
231 new MutableCodePointTrie(initialValue, errorValue, errorCode),
232 errorCode);
233 if (U_FAILURE(errorCode)) {
234 return nullptr;
235 }
236 UChar32 start = 0, end;
237 uint32_t value;
238 while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
239 nullptr, nullptr, &value)) >= 0) {
240 if (value != initialValue) {
241 if (start == end) {
242 mutableTrie->set(start, value, errorCode);
243 } else {
244 mutableTrie->setRange(start, end, value, errorCode);
245 }
246 }
247 start = end + 1;
248 }
249 if (U_SUCCESS(errorCode)) {
250 return mutableTrie.orphan();
251 } else {
252 return nullptr;
253 }
254 }
255
clear()256 void MutableCodePointTrie::clear() {
257 index3NullOffset = dataNullOffset = -1;
258 dataLength = 0;
259 highValue = initialValue = origInitialValue;
260 highStart = 0;
261 uprv_free(index16);
262 index16 = nullptr;
263 }
264
get(UChar32 c) const265 uint32_t MutableCodePointTrie::get(UChar32 c) const {
266 if ((uint32_t)c > MAX_UNICODE) {
267 return errorValue;
268 }
269 if (c >= highStart) {
270 return highValue;
271 }
272 int32_t i = c >> UCPTRIE_SHIFT_3;
273 if (flags[i] == ALL_SAME) {
274 return index[i];
275 } else {
276 return data[index[i] + (c & UCPTRIE_SMALL_DATA_MASK)];
277 }
278 }
279
maybeFilterValue(uint32_t value,uint32_t initialValue,uint32_t nullValue,UCPMapValueFilter * filter,const void * context)280 inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue,
281 UCPMapValueFilter *filter, const void *context) {
282 if (value == initialValue) {
283 value = nullValue;
284 } else if (filter != nullptr) {
285 value = filter(context, value);
286 }
287 return value;
288 }
289
getRange(UChar32 start,UCPMapValueFilter * filter,const void * context,uint32_t * pValue) const290 UChar32 MutableCodePointTrie::getRange(
291 UChar32 start, UCPMapValueFilter *filter, const void *context,
292 uint32_t *pValue) const {
293 if ((uint32_t)start > MAX_UNICODE) {
294 return U_SENTINEL;
295 }
296 if (start >= highStart) {
297 if (pValue != nullptr) {
298 uint32_t value = highValue;
299 if (filter != nullptr) { value = filter(context, value); }
300 *pValue = value;
301 }
302 return MAX_UNICODE;
303 }
304 uint32_t nullValue = initialValue;
305 if (filter != nullptr) { nullValue = filter(context, nullValue); }
306 UChar32 c = start;
307 uint32_t trieValue, value;
308 bool haveValue = false;
309 int32_t i = c >> UCPTRIE_SHIFT_3;
310 do {
311 if (flags[i] == ALL_SAME) {
312 uint32_t trieValue2 = index[i];
313 if (haveValue) {
314 if (trieValue2 != trieValue) {
315 if (filter == nullptr ||
316 maybeFilterValue(trieValue2, initialValue, nullValue,
317 filter, context) != value) {
318 return c - 1;
319 }
320 trieValue = trieValue2; // may or may not help
321 }
322 } else {
323 trieValue = trieValue2;
324 value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
325 if (pValue != nullptr) { *pValue = value; }
326 haveValue = true;
327 }
328 c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK;
329 } else /* MIXED */ {
330 int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK);
331 uint32_t trieValue2 = data[di];
332 if (haveValue) {
333 if (trieValue2 != trieValue) {
334 if (filter == nullptr ||
335 maybeFilterValue(trieValue2, initialValue, nullValue,
336 filter, context) != value) {
337 return c - 1;
338 }
339 trieValue = trieValue2; // may or may not help
340 }
341 } else {
342 trieValue = trieValue2;
343 value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
344 if (pValue != nullptr) { *pValue = value; }
345 haveValue = true;
346 }
347 while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) {
348 trieValue2 = data[++di];
349 if (trieValue2 != trieValue) {
350 if (filter == nullptr ||
351 maybeFilterValue(trieValue2, initialValue, nullValue,
352 filter, context) != value) {
353 return c - 1;
354 }
355 }
356 trieValue = trieValue2; // may or may not help
357 }
358 }
359 ++i;
360 } while (c < highStart);
361 U_ASSERT(haveValue);
362 if (maybeFilterValue(highValue, initialValue, nullValue,
363 filter, context) != value) {
364 return c - 1;
365 } else {
366 return MAX_UNICODE;
367 }
368 }
369
370 void
writeBlock(uint32_t * block,uint32_t value)371 writeBlock(uint32_t *block, uint32_t value) {
372 uint32_t *limit = block + UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
373 while (block < limit) {
374 *block++ = value;
375 }
376 }
377
ensureHighStart(UChar32 c)378 bool MutableCodePointTrie::ensureHighStart(UChar32 c) {
379 if (c >= highStart) {
380 // Round up to a UCPTRIE_CP_PER_INDEX_2_ENTRY boundary to simplify compaction.
381 c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
382 int32_t i = highStart >> UCPTRIE_SHIFT_3;
383 int32_t iLimit = c >> UCPTRIE_SHIFT_3;
384 if (iLimit > indexCapacity) {
385 uint32_t *newIndex = (uint32_t *)uprv_malloc(I_LIMIT * 4);
386 if (newIndex == nullptr) { return false; }
387 uprv_memcpy(newIndex, index, i * 4);
388 uprv_free(index);
389 index = newIndex;
390 indexCapacity = I_LIMIT;
391 }
392 do {
393 flags[i] = ALL_SAME;
394 index[i] = initialValue;
395 } while(++i < iLimit);
396 highStart = c;
397 }
398 return true;
399 }
400
allocDataBlock(int32_t blockLength)401 int32_t MutableCodePointTrie::allocDataBlock(int32_t blockLength) {
402 int32_t newBlock = dataLength;
403 int32_t newTop = newBlock + blockLength;
404 if (newTop > dataCapacity) {
405 int32_t capacity;
406 if (dataCapacity < MEDIUM_DATA_LENGTH) {
407 capacity = MEDIUM_DATA_LENGTH;
408 } else if (dataCapacity < MAX_DATA_LENGTH) {
409 capacity = MAX_DATA_LENGTH;
410 } else {
411 // Should never occur.
412 // Either MAX_DATA_LENGTH is incorrect,
413 // or the code writes more values than should be possible.
414 return -1;
415 }
416 uint32_t *newData = (uint32_t *)uprv_malloc(capacity * 4);
417 if (newData == nullptr) {
418 return -1;
419 }
420 uprv_memcpy(newData, data, (size_t)dataLength * 4);
421 uprv_free(data);
422 data = newData;
423 dataCapacity = capacity;
424 }
425 dataLength = newTop;
426 return newBlock;
427 }
428
429 /**
430 * No error checking for illegal arguments.
431 *
432 * @return -1 if no new data block available (out of memory in data array)
433 * @internal
434 */
getDataBlock(int32_t i)435 int32_t MutableCodePointTrie::getDataBlock(int32_t i) {
436 if (flags[i] == MIXED) {
437 return index[i];
438 }
439 if (i < BMP_I_LIMIT) {
440 int32_t newBlock = allocDataBlock(UCPTRIE_FAST_DATA_BLOCK_LENGTH);
441 if (newBlock < 0) { return newBlock; }
442 int32_t iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1);
443 int32_t iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
444 do {
445 U_ASSERT(flags[iStart] == ALL_SAME);
446 writeBlock(data + newBlock, index[iStart]);
447 flags[iStart] = MIXED;
448 index[iStart++] = newBlock;
449 newBlock += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
450 } while (iStart < iLimit);
451 return index[i];
452 } else {
453 int32_t newBlock = allocDataBlock(UCPTRIE_SMALL_DATA_BLOCK_LENGTH);
454 if (newBlock < 0) { return newBlock; }
455 writeBlock(data + newBlock, index[i]);
456 flags[i] = MIXED;
457 index[i] = newBlock;
458 return newBlock;
459 }
460 }
461
set(UChar32 c,uint32_t value,UErrorCode & errorCode)462 void MutableCodePointTrie::set(UChar32 c, uint32_t value, UErrorCode &errorCode) {
463 if (U_FAILURE(errorCode)) {
464 return;
465 }
466 if ((uint32_t)c > MAX_UNICODE) {
467 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
468 return;
469 }
470
471 int32_t block;
472 if (!ensureHighStart(c) || (block = getDataBlock(c >> UCPTRIE_SHIFT_3)) < 0) {
473 errorCode = U_MEMORY_ALLOCATION_ERROR;
474 return;
475 }
476
477 data[block + (c & UCPTRIE_SMALL_DATA_MASK)] = value;
478 }
479
480 void
fillBlock(uint32_t * block,UChar32 start,UChar32 limit,uint32_t value)481 fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value) {
482 uint32_t *pLimit = block + limit;
483 block += start;
484 while (block < pLimit) {
485 *block++ = value;
486 }
487 }
488
setRange(UChar32 start,UChar32 end,uint32_t value,UErrorCode & errorCode)489 void MutableCodePointTrie::setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode) {
490 if (U_FAILURE(errorCode)) {
491 return;
492 }
493 if ((uint32_t)start > MAX_UNICODE || (uint32_t)end > MAX_UNICODE || start > end) {
494 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
495 return;
496 }
497 if (!ensureHighStart(end)) {
498 errorCode = U_MEMORY_ALLOCATION_ERROR;
499 return;
500 }
501
502 UChar32 limit = end + 1;
503 if (start & UCPTRIE_SMALL_DATA_MASK) {
504 // Set partial block at [start..following block boundary[.
505 int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);
506 if (block < 0) {
507 errorCode = U_MEMORY_ALLOCATION_ERROR;
508 return;
509 }
510
511 UChar32 nextStart = (start + UCPTRIE_SMALL_DATA_MASK) & ~UCPTRIE_SMALL_DATA_MASK;
512 if (nextStart <= limit) {
513 fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, UCPTRIE_SMALL_DATA_BLOCK_LENGTH,
514 value);
515 start = nextStart;
516 } else {
517 fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, limit & UCPTRIE_SMALL_DATA_MASK,
518 value);
519 return;
520 }
521 }
522
523 // Number of positions in the last, partial block.
524 int32_t rest = limit & UCPTRIE_SMALL_DATA_MASK;
525
526 // Round down limit to a block boundary.
527 limit &= ~UCPTRIE_SMALL_DATA_MASK;
528
529 // Iterate over all-value blocks.
530 while (start < limit) {
531 int32_t i = start >> UCPTRIE_SHIFT_3;
532 if (flags[i] == ALL_SAME) {
533 index[i] = value;
534 } else /* MIXED */ {
535 fillBlock(data + index[i], 0, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, value);
536 }
537 start += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
538 }
539
540 if (rest > 0) {
541 // Set partial block at [last block boundary..limit[.
542 int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);
543 if (block < 0) {
544 errorCode = U_MEMORY_ALLOCATION_ERROR;
545 return;
546 }
547
548 fillBlock(data + block, 0, rest, value);
549 }
550 }
551
552 /* compaction --------------------------------------------------------------- */
553
maskValues(uint32_t mask)554 void MutableCodePointTrie::maskValues(uint32_t mask) {
555 initialValue &= mask;
556 errorValue &= mask;
557 highValue &= mask;
558 int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
559 for (int32_t i = 0; i < iLimit; ++i) {
560 if (flags[i] == ALL_SAME) {
561 index[i] &= mask;
562 }
563 }
564 for (int32_t i = 0; i < dataLength; ++i) {
565 data[i] &= mask;
566 }
567 }
568
569 template<typename UIntA, typename UIntB>
equalBlocks(const UIntA * s,const UIntB * t,int32_t length)570 bool equalBlocks(const UIntA *s, const UIntB *t, int32_t length) {
571 while (length > 0 && *s == *t) {
572 ++s;
573 ++t;
574 --length;
575 }
576 return length == 0;
577 }
578
allValuesSameAs(const uint32_t * p,int32_t length,uint32_t value)579 bool allValuesSameAs(const uint32_t *p, int32_t length, uint32_t value) {
580 const uint32_t *pLimit = p + length;
581 while (p < pLimit && *p == value) { ++p; }
582 return p == pLimit;
583 }
584
585 /** Search for an identical block. */
findSameBlock(const uint16_t * p,int32_t pStart,int32_t length,const uint16_t * q,int32_t qStart,int32_t blockLength)586 int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length,
587 const uint16_t *q, int32_t qStart, int32_t blockLength) {
588 // Ensure that we do not even partially get past length.
589 length -= blockLength;
590
591 q += qStart;
592 while (pStart <= length) {
593 if (equalBlocks(p + pStart, q, blockLength)) {
594 return pStart;
595 }
596 ++pStart;
597 }
598 return -1;
599 }
600
findAllSameBlock(const uint32_t * p,int32_t start,int32_t limit,uint32_t value,int32_t blockLength)601 int32_t findAllSameBlock(const uint32_t *p, int32_t start, int32_t limit,
602 uint32_t value, int32_t blockLength) {
603 // Ensure that we do not even partially get past limit.
604 limit -= blockLength;
605
606 for (int32_t block = start; block <= limit; ++block) {
607 if (p[block] == value) {
608 for (int32_t i = 1;; ++i) {
609 if (i == blockLength) {
610 return block;
611 }
612 if (p[block + i] != value) {
613 block += i;
614 break;
615 }
616 }
617 }
618 }
619 return -1;
620 }
621
622 /**
623 * Look for maximum overlap of the beginning of the other block
624 * with the previous, adjacent block.
625 */
626 template<typename UIntA, typename UIntB>
getOverlap(const UIntA * p,int32_t length,const UIntB * q,int32_t qStart,int32_t blockLength)627 int32_t getOverlap(const UIntA *p, int32_t length,
628 const UIntB *q, int32_t qStart, int32_t blockLength) {
629 int32_t overlap = blockLength - 1;
630 U_ASSERT(overlap <= length);
631 q += qStart;
632 while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
633 --overlap;
634 }
635 return overlap;
636 }
637
getAllSameOverlap(const uint32_t * p,int32_t length,uint32_t value,int32_t blockLength)638 int32_t getAllSameOverlap(const uint32_t *p, int32_t length, uint32_t value,
639 int32_t blockLength) {
640 int32_t min = length - (blockLength - 1);
641 int32_t i = length;
642 while (min < i && p[i - 1] == value) { --i; }
643 return length - i;
644 }
645
isStartOfSomeFastBlock(uint32_t dataOffset,const uint32_t index[],int32_t fastILimit)646 bool isStartOfSomeFastBlock(uint32_t dataOffset, const uint32_t index[], int32_t fastILimit) {
647 for (int32_t i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
648 if (index[i] == dataOffset) {
649 return true;
650 }
651 }
652 return false;
653 }
654
655 /**
656 * Finds the start of the last range in the trie by enumerating backward.
657 * Indexes for code points higher than this will be omitted.
658 */
findHighStart() const659 UChar32 MutableCodePointTrie::findHighStart() const {
660 int32_t i = highStart >> UCPTRIE_SHIFT_3;
661 while (i > 0) {
662 bool match;
663 if (flags[--i] == ALL_SAME) {
664 match = index[i] == highValue;
665 } else /* MIXED */ {
666 const uint32_t *p = data + index[i];
667 for (int32_t j = 0;; ++j) {
668 if (j == UCPTRIE_SMALL_DATA_BLOCK_LENGTH) {
669 match = true;
670 break;
671 }
672 if (p[j] != highValue) {
673 match = false;
674 break;
675 }
676 }
677 }
678 if (!match) {
679 return (i + 1) << UCPTRIE_SHIFT_3;
680 }
681 }
682 return 0;
683 }
684
685 class AllSameBlocks {
686 public:
687 static constexpr int32_t NEW_UNIQUE = -1;
688 static constexpr int32_t OVERFLOW = -2;
689
AllSameBlocks()690 AllSameBlocks() : length(0), mostRecent(-1) {}
691
findOrAdd(int32_t index,int32_t count,uint32_t value)692 int32_t findOrAdd(int32_t index, int32_t count, uint32_t value) {
693 if (mostRecent >= 0 && values[mostRecent] == value) {
694 refCounts[mostRecent] += count;
695 return indexes[mostRecent];
696 }
697 for (int32_t i = 0; i < length; ++i) {
698 if (values[i] == value) {
699 mostRecent = i;
700 refCounts[i] += count;
701 return indexes[i];
702 }
703 }
704 if (length == CAPACITY) {
705 return OVERFLOW;
706 }
707 mostRecent = length;
708 indexes[length] = index;
709 values[length] = value;
710 refCounts[length++] = count;
711 return NEW_UNIQUE;
712 }
713
714 /** Replaces the block which has the lowest reference count. */
add(int32_t index,int32_t count,uint32_t value)715 void add(int32_t index, int32_t count, uint32_t value) {
716 U_ASSERT(length == CAPACITY);
717 int32_t least = -1;
718 int32_t leastCount = I_LIMIT;
719 for (int32_t i = 0; i < length; ++i) {
720 U_ASSERT(values[i] != value);
721 if (refCounts[i] < leastCount) {
722 least = i;
723 leastCount = refCounts[i];
724 }
725 }
726 U_ASSERT(least >= 0);
727 mostRecent = least;
728 indexes[least] = index;
729 values[least] = value;
730 refCounts[least] = count;
731 }
732
findMostUsed() const733 int32_t findMostUsed() const {
734 if (length == 0) { return -1; }
735 int32_t max = -1;
736 int32_t maxCount = 0;
737 for (int32_t i = 0; i < length; ++i) {
738 if (refCounts[i] > maxCount) {
739 max = i;
740 maxCount = refCounts[i];
741 }
742 }
743 return indexes[max];
744 }
745
746 private:
747 static constexpr int32_t CAPACITY = 32;
748
749 int32_t length;
750 int32_t mostRecent;
751
752 int32_t indexes[CAPACITY];
753 uint32_t values[CAPACITY];
754 int32_t refCounts[CAPACITY];
755 };
756
757 // Custom hash table for mixed-value blocks to be found anywhere in the
758 // compacted data or index so far.
759 class MixedBlocks {
760 public:
MixedBlocks()761 MixedBlocks() {}
~MixedBlocks()762 ~MixedBlocks() {
763 uprv_free(table);
764 }
765
init(int32_t maxLength,int32_t newBlockLength)766 bool init(int32_t maxLength, int32_t newBlockLength) {
767 // We store actual data indexes + 1 to reserve 0 for empty entries.
768 int32_t maxDataIndex = maxLength - newBlockLength + 1;
769 int32_t newLength;
770 if (maxDataIndex <= 0xfff) { // 4k
771 newLength = 6007;
772 shift = 12;
773 mask = 0xfff;
774 } else if (maxDataIndex <= 0x7fff) { // 32k
775 newLength = 50021;
776 shift = 15;
777 mask = 0x7fff;
778 } else if (maxDataIndex <= 0x1ffff) { // 128k
779 newLength = 200003;
780 shift = 17;
781 mask = 0x1ffff;
782 } else {
783 // maxDataIndex up to around MAX_DATA_LENGTH, ca. 1.1M
784 newLength = 1500007;
785 shift = 21;
786 mask = 0x1fffff;
787 }
788 if (newLength > capacity) {
789 uprv_free(table);
790 table = (uint32_t *)uprv_malloc(newLength * 4);
791 if (table == nullptr) {
792 return false;
793 }
794 capacity = newLength;
795 }
796 length = newLength;
797 uprv_memset(table, 0, length * 4);
798
799 blockLength = newBlockLength;
800 return true;
801 }
802
803 template<typename UInt>
extend(const UInt * data,int32_t minStart,int32_t prevDataLength,int32_t newDataLength)804 void extend(const UInt *data, int32_t minStart, int32_t prevDataLength, int32_t newDataLength) {
805 int32_t start = prevDataLength - blockLength;
806 if (start >= minStart) {
807 ++start; // Skip the last block that we added last time.
808 } else {
809 start = minStart; // Begin with the first full block.
810 }
811 for (int32_t end = newDataLength - blockLength; start <= end; ++start) {
812 uint32_t hashCode = makeHashCode(data, start);
813 addEntry(data, start, hashCode, start);
814 }
815 }
816
817 template<typename UIntA, typename UIntB>
findBlock(const UIntA * data,const UIntB * blockData,int32_t blockStart) const818 int32_t findBlock(const UIntA *data, const UIntB *blockData, int32_t blockStart) const {
819 uint32_t hashCode = makeHashCode(blockData, blockStart);
820 int32_t entryIndex = findEntry(data, blockData, blockStart, hashCode);
821 if (entryIndex >= 0) {
822 return (table[entryIndex] & mask) - 1;
823 } else {
824 return -1;
825 }
826 }
827
findAllSameBlock(const uint32_t * data,uint32_t blockValue) const828 int32_t findAllSameBlock(const uint32_t *data, uint32_t blockValue) const {
829 uint32_t hashCode = makeHashCode(blockValue);
830 int32_t entryIndex = findEntry(data, blockValue, hashCode);
831 if (entryIndex >= 0) {
832 return (table[entryIndex] & mask) - 1;
833 } else {
834 return -1;
835 }
836 }
837
838 private:
839 template<typename UInt>
makeHashCode(const UInt * blockData,int32_t blockStart) const840 uint32_t makeHashCode(const UInt *blockData, int32_t blockStart) const {
841 int32_t blockLimit = blockStart + blockLength;
842 uint32_t hashCode = blockData[blockStart++];
843 do {
844 hashCode = 37 * hashCode + blockData[blockStart++];
845 } while (blockStart < blockLimit);
846 return hashCode;
847 }
848
makeHashCode(uint32_t blockValue) const849 uint32_t makeHashCode(uint32_t blockValue) const {
850 uint32_t hashCode = blockValue;
851 for (int32_t i = 1; i < blockLength; ++i) {
852 hashCode = 37 * hashCode + blockValue;
853 }
854 return hashCode;
855 }
856
857 template<typename UInt>
addEntry(const UInt * data,int32_t blockStart,uint32_t hashCode,int32_t dataIndex)858 void addEntry(const UInt *data, int32_t blockStart, uint32_t hashCode, int32_t dataIndex) {
859 U_ASSERT(0 <= dataIndex && dataIndex < (int32_t)mask);
860 int32_t entryIndex = findEntry(data, data, blockStart, hashCode);
861 if (entryIndex < 0) {
862 table[~entryIndex] = (hashCode << shift) | (dataIndex + 1);
863 }
864 }
865
866 template<typename UIntA, typename UIntB>
findEntry(const UIntA * data,const UIntB * blockData,int32_t blockStart,uint32_t hashCode) const867 int32_t findEntry(const UIntA *data, const UIntB *blockData, int32_t blockStart,
868 uint32_t hashCode) const {
869 uint32_t shiftedHashCode = hashCode << shift;
870 int32_t initialEntryIndex = (hashCode % (length - 1)) + 1; // 1..length-1
871 for (int32_t entryIndex = initialEntryIndex;;) {
872 uint32_t entry = table[entryIndex];
873 if (entry == 0) {
874 return ~entryIndex;
875 }
876 if ((entry & ~mask) == shiftedHashCode) {
877 int32_t dataIndex = (entry & mask) - 1;
878 if (equalBlocks(data + dataIndex, blockData + blockStart, blockLength)) {
879 return entryIndex;
880 }
881 }
882 entryIndex = nextIndex(initialEntryIndex, entryIndex);
883 }
884 }
885
findEntry(const uint32_t * data,uint32_t blockValue,uint32_t hashCode) const886 int32_t findEntry(const uint32_t *data, uint32_t blockValue, uint32_t hashCode) const {
887 uint32_t shiftedHashCode = hashCode << shift;
888 int32_t initialEntryIndex = (hashCode % (length - 1)) + 1; // 1..length-1
889 for (int32_t entryIndex = initialEntryIndex;;) {
890 uint32_t entry = table[entryIndex];
891 if (entry == 0) {
892 return ~entryIndex;
893 }
894 if ((entry & ~mask) == shiftedHashCode) {
895 int32_t dataIndex = (entry & mask) - 1;
896 if (allValuesSameAs(data + dataIndex, blockLength, blockValue)) {
897 return entryIndex;
898 }
899 }
900 entryIndex = nextIndex(initialEntryIndex, entryIndex);
901 }
902 }
903
nextIndex(int32_t initialEntryIndex,int32_t entryIndex) const904 inline int32_t nextIndex(int32_t initialEntryIndex, int32_t entryIndex) const {
905 // U_ASSERT(0 < initialEntryIndex && initialEntryIndex < length);
906 return (entryIndex + initialEntryIndex) % length;
907 }
908
909 // Hash table.
910 // The length is a prime number, larger than the maximum data length.
911 // The "shift" lower bits store a data index + 1.
912 // The remaining upper bits store a partial hashCode of the block data values.
913 uint32_t *table = nullptr;
914 int32_t capacity = 0;
915 int32_t length = 0;
916 int32_t shift = 0;
917 uint32_t mask = 0;
918
919 int32_t blockLength = 0;
920 };
921
compactWholeDataBlocks(int32_t fastILimit,AllSameBlocks & allSameBlocks)922 int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) {
923 #ifdef UCPTRIE_DEBUG
924 bool overflow = false;
925 #endif
926
927 // ASCII data will be stored as a linear table, even if the following code
928 // does not yet count it that way.
929 int32_t newDataCapacity = ASCII_LIMIT;
930 // Add room for a small data null block in case it would match the start of
931 // a fast data block where dataNullOffset must not be set in that case.
932 newDataCapacity += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
933 // Add room for special values (errorValue, highValue) and padding.
934 newDataCapacity += 4;
935 int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
936 int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
937 int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
938 for (int32_t i = 0; i < iLimit; i += inc) {
939 if (i == fastILimit) {
940 blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
941 inc = 1;
942 }
943 uint32_t value = index[i];
944 if (flags[i] == MIXED) {
945 // Really mixed?
946 const uint32_t *p = data + value;
947 value = *p;
948 if (allValuesSameAs(p + 1, blockLength - 1, value)) {
949 flags[i] = ALL_SAME;
950 index[i] = value;
951 // Fall through to ALL_SAME handling.
952 } else {
953 newDataCapacity += blockLength;
954 continue;
955 }
956 } else {
957 U_ASSERT(flags[i] == ALL_SAME);
958 if (inc > 1) {
959 // Do all of the fast-range data block's ALL_SAME parts have the same value?
960 bool allSame = true;
961 int32_t next_i = i + inc;
962 for (int32_t j = i + 1; j < next_i; ++j) {
963 U_ASSERT(flags[j] == ALL_SAME);
964 if (index[j] != value) {
965 allSame = false;
966 break;
967 }
968 }
969 if (!allSame) {
970 // Turn it into a MIXED block.
971 if (getDataBlock(i) < 0) {
972 return -1;
973 }
974 newDataCapacity += blockLength;
975 continue;
976 }
977 }
978 }
979 // Is there another ALL_SAME block with the same value?
980 int32_t other = allSameBlocks.findOrAdd(i, inc, value);
981 if (other == AllSameBlocks::OVERFLOW) {
982 // The fixed-size array overflowed. Slow check for a duplicate block.
983 #ifdef UCPTRIE_DEBUG
984 if (!overflow) {
985 puts("UCPTrie AllSameBlocks overflow");
986 overflow = true;
987 }
988 #endif
989 int32_t jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
990 for (int32_t j = 0;; j += jInc) {
991 if (j == i) {
992 allSameBlocks.add(i, inc, value);
993 break;
994 }
995 if (j == fastILimit) {
996 jInc = 1;
997 }
998 if (flags[j] == ALL_SAME && index[j] == value) {
999 allSameBlocks.add(j, jInc + inc, value);
1000 other = j;
1001 break;
1002 // We could keep counting blocks with the same value
1003 // before we add the first one, which may improve compaction in rare cases,
1004 // but it would make it slower.
1005 }
1006 }
1007 }
1008 if (other >= 0) {
1009 flags[i] = SAME_AS;
1010 index[i] = other;
1011 } else {
1012 // New unique same-value block.
1013 newDataCapacity += blockLength;
1014 }
1015 }
1016 return newDataCapacity;
1017 }
1018
1019 #ifdef UCPTRIE_DEBUG
1020 # define DEBUG_DO(expr) expr
1021 #else
1022 # define DEBUG_DO(expr)
1023 #endif
1024
1025 #ifdef UCPTRIE_DEBUG
1026 // Braille symbols: U+28xx = UTF-8 E2 A0 80..E2 A3 BF
appendValue(char s[],int32_t length,uint32_t value)1027 int32_t appendValue(char s[], int32_t length, uint32_t value) {
1028 value ^= value >> 16;
1029 value ^= value >> 8;
1030 s[length] = 0xE2;
1031 s[length + 1] = (char)(0xA0 + ((value >> 6) & 3));
1032 s[length + 2] = (char)(0x80 + (value & 0x3F));
1033 return length + 3;
1034 }
1035
printBlock(const uint32_t * block,int32_t blockLength,uint32_t value,UChar32 start,int32_t overlap,uint32_t initialValue)1036 void printBlock(const uint32_t *block, int32_t blockLength, uint32_t value,
1037 UChar32 start, int32_t overlap, uint32_t initialValue) {
1038 char s[UCPTRIE_FAST_DATA_BLOCK_LENGTH * 3 + 3];
1039 int32_t length = 0;
1040 int32_t i;
1041 for (i = 0; i < overlap; ++i) {
1042 length = appendValue(s, length, 0); // Braille blank
1043 }
1044 s[length++] = '|';
1045 for (; i < blockLength; ++i) {
1046 if (block != nullptr) {
1047 value = block[i];
1048 }
1049 if (value == initialValue) {
1050 value = 0x40; // Braille lower left dot
1051 }
1052 length = appendValue(s, length, value);
1053 }
1054 s[length] = 0;
1055 start += overlap;
1056 if (start <= 0xffff) {
1057 printf(" %04lX %s|\n", (long)start, s);
1058 } else if (start <= 0xfffff) {
1059 printf(" %5lX %s|\n", (long)start, s);
1060 } else {
1061 printf(" %6lX %s|\n", (long)start, s);
1062 }
1063 }
1064 #endif
1065
1066 /**
1067 * Compacts a build-time trie.
1068 *
1069 * The compaction
1070 * - removes blocks that are identical with earlier ones
1071 * - overlaps each new non-duplicate block as much as possible with the previously-written one
1072 * - works with fast-range data blocks whose length is a multiple of that of
1073 * higher-code-point data blocks
1074 *
1075 * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks.
1076 */
compactData(int32_t fastILimit,uint32_t * newData,int32_t newDataCapacity,int32_t dataNullIndex,MixedBlocks & mixedBlocks,UErrorCode & errorCode)1077 int32_t MutableCodePointTrie::compactData(
1078 int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
1079 int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode) {
1080 #ifdef UCPTRIE_DEBUG
1081 int32_t countSame=0, sumOverlaps=0;
1082 bool printData = dataLength == 29088 /* line.brk */ ||
1083 // dataLength == 30048 /* CanonIterData */ ||
1084 dataLength == 50400 /* zh.txt~stroke */;
1085 #endif
1086
1087 // The linear ASCII data has been copied into newData already.
1088 int32_t newDataLength = 0;
1089 for (int32_t i = 0; newDataLength < ASCII_LIMIT;
1090 newDataLength += UCPTRIE_FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
1091 index[i] = newDataLength;
1092 #ifdef UCPTRIE_DEBUG
1093 if (printData) {
1094 printBlock(newData + newDataLength, UCPTRIE_FAST_DATA_BLOCK_LENGTH, 0, newDataLength, 0, initialValue);
1095 }
1096 #endif
1097 }
1098
1099 int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
1100 if (!mixedBlocks.init(newDataCapacity, blockLength)) {
1101 errorCode = U_MEMORY_ALLOCATION_ERROR;
1102 return 0;
1103 }
1104 mixedBlocks.extend(newData, 0, 0, newDataLength);
1105
1106 int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
1107 int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
1108 int32_t fastLength = 0;
1109 for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) {
1110 if (i == fastILimit) {
1111 blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
1112 inc = 1;
1113 fastLength = newDataLength;
1114 if (!mixedBlocks.init(newDataCapacity, blockLength)) {
1115 errorCode = U_MEMORY_ALLOCATION_ERROR;
1116 return 0;
1117 }
1118 mixedBlocks.extend(newData, 0, 0, newDataLength);
1119 }
1120 if (flags[i] == ALL_SAME) {
1121 uint32_t value = index[i];
1122 // Find an earlier part of the data array of length blockLength
1123 // that is filled with this value.
1124 int32_t n = mixedBlocks.findAllSameBlock(newData, value);
1125 // If we find a match, and the current block is the data null block,
1126 // and it is not a fast block but matches the start of a fast block,
1127 // then we need to continue looking.
1128 // This is because this small block is shorter than the fast block,
1129 // and not all of the rest of the fast block is filled with this value.
1130 // Otherwise trie.getRange() would detect that the fast block starts at
1131 // dataNullOffset and assume incorrectly that it is filled with the null value.
1132 while (n >= 0 && i == dataNullIndex && i >= fastILimit && n < fastLength &&
1133 isStartOfSomeFastBlock(n, index, fastILimit)) {
1134 n = findAllSameBlock(newData, n + 1, newDataLength, value, blockLength);
1135 }
1136 if (n >= 0) {
1137 DEBUG_DO(++countSame);
1138 index[i] = n;
1139 } else {
1140 n = getAllSameOverlap(newData, newDataLength, value, blockLength);
1141 DEBUG_DO(sumOverlaps += n);
1142 #ifdef UCPTRIE_DEBUG
1143 if (printData) {
1144 printBlock(nullptr, blockLength, value, i << UCPTRIE_SHIFT_3, n, initialValue);
1145 }
1146 #endif
1147 index[i] = newDataLength - n;
1148 int32_t prevDataLength = newDataLength;
1149 while (n < blockLength) {
1150 newData[newDataLength++] = value;
1151 ++n;
1152 }
1153 mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
1154 }
1155 } else if (flags[i] == MIXED) {
1156 const uint32_t *block = data + index[i];
1157 int32_t n = mixedBlocks.findBlock(newData, block, 0);
1158 if (n >= 0) {
1159 DEBUG_DO(++countSame);
1160 index[i] = n;
1161 } else {
1162 n = getOverlap(newData, newDataLength, block, 0, blockLength);
1163 DEBUG_DO(sumOverlaps += n);
1164 #ifdef UCPTRIE_DEBUG
1165 if (printData) {
1166 printBlock(block, blockLength, 0, i << UCPTRIE_SHIFT_3, n, initialValue);
1167 }
1168 #endif
1169 index[i] = newDataLength - n;
1170 int32_t prevDataLength = newDataLength;
1171 while (n < blockLength) {
1172 newData[newDataLength++] = block[n++];
1173 }
1174 mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
1175 }
1176 } else /* SAME_AS */ {
1177 uint32_t j = index[i];
1178 index[i] = index[j];
1179 }
1180 }
1181
1182 #ifdef UCPTRIE_DEBUG
1183 /* we saved some space */
1184 printf("compacting UCPTrie: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n",
1185 (long)dataLength, (long)newDataLength, (long)countSame, (long)sumOverlaps);
1186 #endif
1187 return newDataLength;
1188 }
1189
compactIndex(int32_t fastILimit,MixedBlocks & mixedBlocks,UErrorCode & errorCode)1190 int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks,
1191 UErrorCode &errorCode) {
1192 int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3);
1193 if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) {
1194 // Only the linear fast index, no multi-stage index tables.
1195 index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
1196 return fastIndexLength;
1197 }
1198
1199 // Condense the fast index table.
1200 // Also, does it contain an index-3 block with all dataNullOffset?
1201 uint16_t fastIndex[UCPTRIE_BMP_INDEX_LENGTH]; // fastIndexLength
1202 int32_t i3FirstNull = -1;
1203 for (int32_t i = 0, j = 0; i < fastILimit; ++j) {
1204 uint32_t i3 = index[i];
1205 fastIndex[j] = (uint16_t)i3;
1206 if (i3 == (uint32_t)dataNullOffset) {
1207 if (i3FirstNull < 0) {
1208 i3FirstNull = j;
1209 } else if (index3NullOffset < 0 &&
1210 (j - i3FirstNull + 1) == UCPTRIE_INDEX_3_BLOCK_LENGTH) {
1211 index3NullOffset = i3FirstNull;
1212 }
1213 } else {
1214 i3FirstNull = -1;
1215 }
1216 // Set the index entries that compactData() skipped.
1217 // Needed when the multi-stage index covers the fast index range as well.
1218 int32_t iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
1219 while (++i < iNext) {
1220 i3 += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
1221 index[i] = i3;
1222 }
1223 }
1224
1225 if (!mixedBlocks.init(fastIndexLength, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
1226 errorCode = U_MEMORY_ALLOCATION_ERROR;
1227 return 0;
1228 }
1229 mixedBlocks.extend(fastIndex, 0, 0, fastIndexLength);
1230
1231 // Examine index-3 blocks. For each determine one of:
1232 // - same as the index-3 null block
1233 // - same as a fast-index block
1234 // - 16-bit indexes
1235 // - 18-bit indexes
1236 // We store this in the first flags entry for the index-3 block.
1237 //
1238 // Also determine an upper limit for the index-3 table length.
1239 int32_t index3Capacity = 0;
1240 i3FirstNull = index3NullOffset;
1241 bool hasLongI3Blocks = false;
1242 // If the fast index covers the whole BMP, then
1243 // the multi-stage index is only for supplementary code points.
1244 // Otherwise, the multi-stage index covers all of Unicode.
1245 int32_t iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT;
1246 int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
1247 for (int32_t i = iStart; i < iLimit;) {
1248 int32_t j = i;
1249 int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
1250 uint32_t oredI3 = 0;
1251 bool isNull = true;
1252 do {
1253 uint32_t i3 = index[j];
1254 oredI3 |= i3;
1255 if (i3 != (uint32_t)dataNullOffset) {
1256 isNull = false;
1257 }
1258 } while (++j < jLimit);
1259 if (isNull) {
1260 flags[i] = I3_NULL;
1261 if (i3FirstNull < 0) {
1262 if (oredI3 <= 0xffff) {
1263 index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
1264 } else {
1265 index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
1266 hasLongI3Blocks = true;
1267 }
1268 i3FirstNull = 0;
1269 }
1270 } else {
1271 if (oredI3 <= 0xffff) {
1272 int32_t n = mixedBlocks.findBlock(fastIndex, index, i);
1273 if (n >= 0) {
1274 flags[i] = I3_BMP;
1275 index[i] = n;
1276 } else {
1277 flags[i] = I3_16;
1278 index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
1279 }
1280 } else {
1281 flags[i] = I3_18;
1282 index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
1283 hasLongI3Blocks = true;
1284 }
1285 }
1286 i = j;
1287 }
1288
1289 int32_t index2Capacity = (iLimit - iStart) >> UCPTRIE_SHIFT_2_3;
1290
1291 // Length of the index-1 table, rounded up.
1292 int32_t index1Length = (index2Capacity + UCPTRIE_INDEX_2_MASK) >> UCPTRIE_SHIFT_1_2;
1293
1294 // Index table: Fast index, index-1, index-3, index-2.
1295 // +1 for possible index table padding.
1296 int32_t index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1;
1297 index16 = (uint16_t *)uprv_malloc(index16Capacity * 2);
1298 if (index16 == nullptr) {
1299 errorCode = U_MEMORY_ALLOCATION_ERROR;
1300 return 0;
1301 }
1302 uprv_memcpy(index16, fastIndex, fastIndexLength * 2);
1303
1304 if (!mixedBlocks.init(index16Capacity, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
1305 errorCode = U_MEMORY_ALLOCATION_ERROR;
1306 return 0;
1307 }
1308 MixedBlocks longI3Blocks;
1309 if (hasLongI3Blocks) {
1310 if (!longI3Blocks.init(index16Capacity, INDEX_3_18BIT_BLOCK_LENGTH)) {
1311 errorCode = U_MEMORY_ALLOCATION_ERROR;
1312 return 0;
1313 }
1314 }
1315
1316 // Compact the index-3 table and write an uncompacted version of the index-2 table.
1317 uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2]; // index2Capacity
1318 int32_t i2Length = 0;
1319 i3FirstNull = index3NullOffset;
1320 int32_t index3Start = fastIndexLength + index1Length;
1321 int32_t indexLength = index3Start;
1322 for (int32_t i = iStart; i < iLimit; i += UCPTRIE_INDEX_3_BLOCK_LENGTH) {
1323 int32_t i3;
1324 uint8_t f = flags[i];
1325 if (f == I3_NULL && i3FirstNull < 0) {
1326 // First index-3 null block. Write & overlap it like a normal block, then remember it.
1327 f = dataNullOffset <= 0xffff ? I3_16 : I3_18;
1328 i3FirstNull = 0;
1329 }
1330 if (f == I3_NULL) {
1331 i3 = index3NullOffset;
1332 } else if (f == I3_BMP) {
1333 i3 = index[i];
1334 } else if (f == I3_16) {
1335 int32_t n = mixedBlocks.findBlock(index16, index, i);
1336 if (n >= 0) {
1337 i3 = n;
1338 } else {
1339 if (indexLength == index3Start) {
1340 // No overlap at the boundary between the index-1 and index-3 tables.
1341 n = 0;
1342 } else {
1343 n = getOverlap(index16, indexLength,
1344 index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
1345 }
1346 i3 = indexLength - n;
1347 int32_t prevIndexLength = indexLength;
1348 while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) {
1349 index16[indexLength++] = index[i + n++];
1350 }
1351 mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
1352 if (hasLongI3Blocks) {
1353 longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
1354 }
1355 }
1356 } else {
1357 U_ASSERT(f == I3_18);
1358 U_ASSERT(hasLongI3Blocks);
1359 // Encode an index-3 block that contains one or more data indexes exceeding 16 bits.
1360 int32_t j = i;
1361 int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
1362 int32_t k = indexLength;
1363 do {
1364 ++k;
1365 uint32_t v = index[j++];
1366 uint32_t upperBits = (v & 0x30000) >> 2;
1367 index16[k++] = v;
1368 v = index[j++];
1369 upperBits |= (v & 0x30000) >> 4;
1370 index16[k++] = v;
1371 v = index[j++];
1372 upperBits |= (v & 0x30000) >> 6;
1373 index16[k++] = v;
1374 v = index[j++];
1375 upperBits |= (v & 0x30000) >> 8;
1376 index16[k++] = v;
1377 v = index[j++];
1378 upperBits |= (v & 0x30000) >> 10;
1379 index16[k++] = v;
1380 v = index[j++];
1381 upperBits |= (v & 0x30000) >> 12;
1382 index16[k++] = v;
1383 v = index[j++];
1384 upperBits |= (v & 0x30000) >> 14;
1385 index16[k++] = v;
1386 v = index[j++];
1387 upperBits |= (v & 0x30000) >> 16;
1388 index16[k++] = v;
1389 index16[k - 9] = upperBits;
1390 } while (j < jLimit);
1391 int32_t n = longI3Blocks.findBlock(index16, index16, indexLength);
1392 if (n >= 0) {
1393 i3 = n | 0x8000;
1394 } else {
1395 if (indexLength == index3Start) {
1396 // No overlap at the boundary between the index-1 and index-3 tables.
1397 n = 0;
1398 } else {
1399 n = getOverlap(index16, indexLength,
1400 index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);
1401 }
1402 i3 = (indexLength - n) | 0x8000;
1403 int32_t prevIndexLength = indexLength;
1404 if (n > 0) {
1405 int32_t start = indexLength;
1406 while (n < INDEX_3_18BIT_BLOCK_LENGTH) {
1407 index16[indexLength++] = index16[start + n++];
1408 }
1409 } else {
1410 indexLength += INDEX_3_18BIT_BLOCK_LENGTH;
1411 }
1412 mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
1413 if (hasLongI3Blocks) {
1414 longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
1415 }
1416 }
1417 }
1418 if (index3NullOffset < 0 && i3FirstNull >= 0) {
1419 index3NullOffset = i3;
1420 }
1421 // Set the index-2 table entry.
1422 index2[i2Length++] = i3;
1423 }
1424 U_ASSERT(i2Length == index2Capacity);
1425 U_ASSERT(indexLength <= index3Start + index3Capacity);
1426
1427 if (index3NullOffset < 0) {
1428 index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
1429 }
1430 if (indexLength >= (UCPTRIE_NO_INDEX3_NULL_OFFSET + UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
1431 // The index-3 offsets exceed 15 bits, or
1432 // the last one cannot be distinguished from the no-null-block value.
1433 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1434 return 0;
1435 }
1436
1437 // Compact the index-2 table and write the index-1 table.
1438 static_assert(UCPTRIE_INDEX_2_BLOCK_LENGTH == UCPTRIE_INDEX_3_BLOCK_LENGTH,
1439 "must re-init mixedBlocks");
1440 int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH;
1441 int32_t i1 = fastIndexLength;
1442 for (int32_t i = 0; i < i2Length; i += blockLength) {
1443 int32_t n;
1444 if ((i2Length - i) >= blockLength) {
1445 // normal block
1446 U_ASSERT(blockLength == UCPTRIE_INDEX_2_BLOCK_LENGTH);
1447 n = mixedBlocks.findBlock(index16, index2, i);
1448 } else {
1449 // highStart is inside the last index-2 block. Shorten it.
1450 blockLength = i2Length - i;
1451 n = findSameBlock(index16, index3Start, indexLength,
1452 index2, i, blockLength);
1453 }
1454 int32_t i2;
1455 if (n >= 0) {
1456 i2 = n;
1457 } else {
1458 if (indexLength == index3Start) {
1459 // No overlap at the boundary between the index-1 and index-3/2 tables.
1460 n = 0;
1461 } else {
1462 n = getOverlap(index16, indexLength, index2, i, blockLength);
1463 }
1464 i2 = indexLength - n;
1465 int32_t prevIndexLength = indexLength;
1466 while (n < blockLength) {
1467 index16[indexLength++] = index2[i + n++];
1468 }
1469 mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
1470 }
1471 // Set the index-1 table entry.
1472 index16[i1++] = i2;
1473 }
1474 U_ASSERT(i1 == index3Start);
1475 U_ASSERT(indexLength <= index16Capacity);
1476
1477 #ifdef UCPTRIE_DEBUG
1478 /* we saved some space */
1479 printf("compacting UCPTrie: count of 16-bit index words %lu->%lu\n",
1480 (long)iLimit, (long)indexLength);
1481 #endif
1482
1483 return indexLength;
1484 }
1485
compactTrie(int32_t fastILimit,UErrorCode & errorCode)1486 int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorCode) {
1487 // Find the real highStart and round it up.
1488 U_ASSERT((highStart & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0);
1489 highValue = get(MAX_UNICODE);
1490 int32_t realHighStart = findHighStart();
1491 realHighStart = (realHighStart + (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) &
1492 ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
1493 if (realHighStart == UNICODE_LIMIT) {
1494 highValue = initialValue;
1495 }
1496
1497 #ifdef UCPTRIE_DEBUG
1498 printf("UCPTrie: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n",
1499 (long)realHighStart, (long)highValue, (long)initialValue);
1500 #endif
1501
1502 // We always store indexes and data values for the fast range.
1503 // Pin highStart to the top of that range while building.
1504 UChar32 fastLimit = fastILimit << UCPTRIE_SHIFT_3;
1505 if (realHighStart < fastLimit) {
1506 for (int32_t i = (realHighStart >> UCPTRIE_SHIFT_3); i < fastILimit; ++i) {
1507 flags[i] = ALL_SAME;
1508 index[i] = highValue;
1509 }
1510 highStart = fastLimit;
1511 } else {
1512 highStart = realHighStart;
1513 }
1514
1515 uint32_t asciiData[ASCII_LIMIT];
1516 for (int32_t i = 0; i < ASCII_LIMIT; ++i) {
1517 asciiData[i] = get(i);
1518 }
1519
1520 // First we look for which data blocks have the same value repeated over the whole block,
1521 // deduplicate such blocks, find a good null data block (for faster enumeration),
1522 // and get an upper bound for the necessary data array length.
1523 AllSameBlocks allSameBlocks;
1524 int32_t newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks);
1525 if (newDataCapacity < 0) {
1526 errorCode = U_MEMORY_ALLOCATION_ERROR;
1527 return 0;
1528 }
1529 uint32_t *newData = (uint32_t *)uprv_malloc(newDataCapacity * 4);
1530 if (newData == nullptr) {
1531 errorCode = U_MEMORY_ALLOCATION_ERROR;
1532 return 0;
1533 }
1534 uprv_memcpy(newData, asciiData, sizeof(asciiData));
1535
1536 int32_t dataNullIndex = allSameBlocks.findMostUsed();
1537
1538 MixedBlocks mixedBlocks;
1539 int32_t newDataLength = compactData(fastILimit, newData, newDataCapacity,
1540 dataNullIndex, mixedBlocks, errorCode);
1541 if (U_FAILURE(errorCode)) { return 0; }
1542 U_ASSERT(newDataLength <= newDataCapacity);
1543 uprv_free(data);
1544 data = newData;
1545 dataCapacity = newDataCapacity;
1546 dataLength = newDataLength;
1547 if (dataLength > (0x3ffff + UCPTRIE_SMALL_DATA_BLOCK_LENGTH)) {
1548 // The offset of the last data block is too high to be stored in the index table.
1549 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1550 return 0;
1551 }
1552
1553 if (dataNullIndex >= 0) {
1554 dataNullOffset = index[dataNullIndex];
1555 #ifdef UCPTRIE_DEBUG
1556 if (data[dataNullOffset] != initialValue) {
1557 printf("UCPTrie initialValue %lx -> more common nullValue %lx\n",
1558 (long)initialValue, (long)data[dataNullOffset]);
1559 }
1560 #endif
1561 initialValue = data[dataNullOffset];
1562 } else {
1563 dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET;
1564 }
1565
1566 int32_t indexLength = compactIndex(fastILimit, mixedBlocks, errorCode);
1567 highStart = realHighStart;
1568 return indexLength;
1569 }
1570
build(UCPTrieType type,UCPTrieValueWidth valueWidth,UErrorCode & errorCode)1571 UCPTrie *MutableCodePointTrie::build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) {
1572 if (U_FAILURE(errorCode)) {
1573 return nullptr;
1574 }
1575 if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type ||
1576 valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth) {
1577 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1578 return nullptr;
1579 }
1580
1581 // The mutable trie always stores 32-bit values.
1582 // When we build a UCPTrie for a smaller value width, we first mask off unused bits
1583 // before compacting the data.
1584 switch (valueWidth) {
1585 case UCPTRIE_VALUE_BITS_32:
1586 break;
1587 case UCPTRIE_VALUE_BITS_16:
1588 maskValues(0xffff);
1589 break;
1590 case UCPTRIE_VALUE_BITS_8:
1591 maskValues(0xff);
1592 break;
1593 default:
1594 break;
1595 }
1596
1597 UChar32 fastLimit = type == UCPTRIE_TYPE_FAST ? BMP_LIMIT : UCPTRIE_SMALL_LIMIT;
1598 int32_t indexLength = compactTrie(fastLimit >> UCPTRIE_SHIFT_3, errorCode);
1599 if (U_FAILURE(errorCode)) {
1600 clear();
1601 return nullptr;
1602 }
1603
1604 // Ensure data table alignment: The index length must be even for uint32_t data.
1605 if (valueWidth == UCPTRIE_VALUE_BITS_32 && (indexLength & 1) != 0) {
1606 index16[indexLength++] = 0xffee; // arbitrary value
1607 }
1608
1609 // Make the total trie structure length a multiple of 4 bytes by padding the data table,
1610 // and store special values as the last two data values.
1611 int32_t length = indexLength * 2;
1612 if (valueWidth == UCPTRIE_VALUE_BITS_16) {
1613 if (((indexLength ^ dataLength) & 1) != 0) {
1614 // padding
1615 data[dataLength++] = errorValue;
1616 }
1617 if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
1618 data[dataLength++] = highValue;
1619 data[dataLength++] = errorValue;
1620 }
1621 length += dataLength * 2;
1622 } else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
1623 // 32-bit data words never need padding to a multiple of 4 bytes.
1624 if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
1625 if (data[dataLength - 1] != highValue) {
1626 data[dataLength++] = highValue;
1627 }
1628 data[dataLength++] = errorValue;
1629 }
1630 length += dataLength * 4;
1631 } else {
1632 int32_t and3 = (length + dataLength) & 3;
1633 if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) {
1634 // all set
1635 } else if(and3 == 3 && data[dataLength - 1] == highValue) {
1636 data[dataLength++] = errorValue;
1637 } else {
1638 while (and3 != 2) {
1639 data[dataLength++] = highValue;
1640 and3 = (and3 + 1) & 3;
1641 }
1642 data[dataLength++] = highValue;
1643 data[dataLength++] = errorValue;
1644 }
1645 length += dataLength;
1646 }
1647
1648 // Calculate the total length of the UCPTrie as a single memory block.
1649 length += sizeof(UCPTrie);
1650 U_ASSERT((length & 3) == 0);
1651
1652 uint8_t *bytes = (uint8_t *)uprv_malloc(length);
1653 if (bytes == nullptr) {
1654 errorCode = U_MEMORY_ALLOCATION_ERROR;
1655 clear();
1656 return nullptr;
1657 }
1658 UCPTrie *trie = reinterpret_cast<UCPTrie *>(bytes);
1659 uprv_memset(trie, 0, sizeof(UCPTrie));
1660 trie->indexLength = indexLength;
1661 trie->dataLength = dataLength;
1662
1663 trie->highStart = highStart;
1664 // Round up shifted12HighStart to a multiple of 0x1000 for easy testing from UTF-8 lead bytes.
1665 // Runtime code needs to then test for the real highStart as well.
1666 trie->shifted12HighStart = (highStart + 0xfff) >> 12;
1667 trie->type = type;
1668 trie->valueWidth = valueWidth;
1669
1670 trie->index3NullOffset = index3NullOffset;
1671 trie->dataNullOffset = dataNullOffset;
1672 trie->nullValue = initialValue;
1673
1674 bytes += sizeof(UCPTrie);
1675
1676 // Fill the index and data arrays.
1677 uint16_t *dest16 = (uint16_t *)bytes;
1678 trie->index = dest16;
1679
1680 if (highStart <= fastLimit) {
1681 // Condense only the fast index from the mutable-trie index.
1682 for (int32_t i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) {
1683 *dest16++ = (uint16_t)index[i]; // dest16[j]
1684 }
1685 } else {
1686 uprv_memcpy(dest16, index16, indexLength * 2);
1687 dest16 += indexLength;
1688 }
1689 bytes += indexLength * 2;
1690
1691 // Write the data array.
1692 const uint32_t *p = data;
1693 switch (valueWidth) {
1694 case UCPTRIE_VALUE_BITS_16:
1695 // Write 16-bit data values.
1696 trie->data.ptr16 = dest16;
1697 for (int32_t i = dataLength; i > 0; --i) {
1698 *dest16++ = (uint16_t)*p++;
1699 }
1700 break;
1701 case UCPTRIE_VALUE_BITS_32:
1702 // Write 32-bit data values.
1703 trie->data.ptr32 = (uint32_t *)bytes;
1704 uprv_memcpy(bytes, p, (size_t)dataLength * 4);
1705 break;
1706 case UCPTRIE_VALUE_BITS_8:
1707 // Write 8-bit data values.
1708 trie->data.ptr8 = bytes;
1709 for (int32_t i = dataLength; i > 0; --i) {
1710 *bytes++ = (uint8_t)*p++;
1711 }
1712 break;
1713 default:
1714 // Will not occur, valueWidth checked at the beginning.
1715 break;
1716 }
1717
1718 #ifdef UCPTRIE_DEBUG
1719 trie->name = name;
1720
1721 ucptrie_printLengths(trie, "");
1722 #endif
1723
1724 clear();
1725 return trie;
1726 }
1727
1728 } // namespace
1729
1730 U_NAMESPACE_END
1731
1732 U_NAMESPACE_USE
1733
1734 U_CAPI UMutableCPTrie * U_EXPORT2
umutablecptrie_open(uint32_t initialValue,uint32_t errorValue,UErrorCode * pErrorCode)1735 umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
1736 if (U_FAILURE(*pErrorCode)) {
1737 return nullptr;
1738 }
1739 LocalPointer<MutableCodePointTrie> trie(
1740 new MutableCodePointTrie(initialValue, errorValue, *pErrorCode), *pErrorCode);
1741 if (U_FAILURE(*pErrorCode)) {
1742 return nullptr;
1743 }
1744 return reinterpret_cast<UMutableCPTrie *>(trie.orphan());
1745 }
1746
1747 U_CAPI UMutableCPTrie * U_EXPORT2
umutablecptrie_clone(const UMutableCPTrie * other,UErrorCode * pErrorCode)1748 umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode) {
1749 if (U_FAILURE(*pErrorCode)) {
1750 return nullptr;
1751 }
1752 if (other == nullptr) {
1753 return nullptr;
1754 }
1755 LocalPointer<MutableCodePointTrie> clone(
1756 new MutableCodePointTrie(*reinterpret_cast<const MutableCodePointTrie *>(other), *pErrorCode), *pErrorCode);
1757 if (U_FAILURE(*pErrorCode)) {
1758 return nullptr;
1759 }
1760 return reinterpret_cast<UMutableCPTrie *>(clone.orphan());
1761 }
1762
1763 U_CAPI void U_EXPORT2
umutablecptrie_close(UMutableCPTrie * trie)1764 umutablecptrie_close(UMutableCPTrie *trie) {
1765 delete reinterpret_cast<MutableCodePointTrie *>(trie);
1766 }
1767
1768 U_CAPI UMutableCPTrie * U_EXPORT2
umutablecptrie_fromUCPMap(const UCPMap * map,UErrorCode * pErrorCode)1769 umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode) {
1770 if (U_FAILURE(*pErrorCode)) {
1771 return nullptr;
1772 }
1773 if (map == nullptr) {
1774 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
1775 return nullptr;
1776 }
1777 return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPMap(map, *pErrorCode));
1778 }
1779
1780 U_CAPI UMutableCPTrie * U_EXPORT2
umutablecptrie_fromUCPTrie(const UCPTrie * trie,UErrorCode * pErrorCode)1781 umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) {
1782 if (U_FAILURE(*pErrorCode)) {
1783 return nullptr;
1784 }
1785 if (trie == nullptr) {
1786 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
1787 return nullptr;
1788 }
1789 return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPTrie(trie, *pErrorCode));
1790 }
1791
1792 U_CAPI uint32_t U_EXPORT2
umutablecptrie_get(const UMutableCPTrie * trie,UChar32 c)1793 umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c) {
1794 return reinterpret_cast<const MutableCodePointTrie *>(trie)->get(c);
1795 }
1796
1797 namespace {
1798
getRange(const void * trie,UChar32 start,UCPMapValueFilter * filter,const void * context,uint32_t * pValue)1799 UChar32 getRange(const void *trie, UChar32 start,
1800 UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
1801 return reinterpret_cast<const MutableCodePointTrie *>(trie)->
1802 getRange(start, filter, context, pValue);
1803 }
1804
1805 } // namespace
1806
1807 U_CAPI UChar32 U_EXPORT2
umutablecptrie_getRange(const UMutableCPTrie * trie,UChar32 start,UCPMapRangeOption option,uint32_t surrogateValue,UCPMapValueFilter * filter,const void * context,uint32_t * pValue)1808 umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,
1809 UCPMapRangeOption option, uint32_t surrogateValue,
1810 UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
1811 return ucptrie_internalGetRange(getRange, trie, start,
1812 option, surrogateValue,
1813 filter, context, pValue);
1814 }
1815
1816 U_CAPI void U_EXPORT2
umutablecptrie_set(UMutableCPTrie * trie,UChar32 c,uint32_t value,UErrorCode * pErrorCode)1817 umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
1818 if (U_FAILURE(*pErrorCode)) {
1819 return;
1820 }
1821 reinterpret_cast<MutableCodePointTrie *>(trie)->set(c, value, *pErrorCode);
1822 }
1823
1824 U_CAPI void U_EXPORT2
umutablecptrie_setRange(UMutableCPTrie * trie,UChar32 start,UChar32 end,uint32_t value,UErrorCode * pErrorCode)1825 umutablecptrie_setRange(UMutableCPTrie *trie, UChar32 start, UChar32 end,
1826 uint32_t value, UErrorCode *pErrorCode) {
1827 if (U_FAILURE(*pErrorCode)) {
1828 return;
1829 }
1830 reinterpret_cast<MutableCodePointTrie *>(trie)->setRange(start, end, value, *pErrorCode);
1831 }
1832
1833 /* Compact and internally serialize the trie. */
1834 U_CAPI UCPTrie * U_EXPORT2
umutablecptrie_buildImmutable(UMutableCPTrie * trie,UCPTrieType type,UCPTrieValueWidth valueWidth,UErrorCode * pErrorCode)1835 umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth,
1836 UErrorCode *pErrorCode) {
1837 if (U_FAILURE(*pErrorCode)) {
1838 return nullptr;
1839 }
1840 return reinterpret_cast<MutableCodePointTrie *>(trie)->build(type, valueWidth, *pErrorCode);
1841 }
1842
1843 #ifdef UCPTRIE_DEBUG
umutablecptrie_setName(UMutableCPTrie * trie,const char * name)1844 U_CFUNC void umutablecptrie_setName(UMutableCPTrie *trie, const char *name) {
1845 reinterpret_cast<MutableCodePointTrie *>(trie)->name = name;
1846 }
1847 #endif
1848