1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 2001-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 * file name: utrie2.cpp
11 * encoding: US-ASCII
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2008aug16 (starting from a copy of utrie.c)
16 * created by: Markus W. Scherer
17 *
18 * This is a common implementation of a Unicode trie.
19 * It is a kind of compressed, serializable table of 16- or 32-bit values associated with
20 * Unicode code points (0..0x10ffff).
21 * This is the second common version of a Unicode trie (hence the name UTrie2).
22 * See utrie2.h for a comparison.
23 *
24 * This file contains only the runtime and enumeration code, for read-only access.
25 * See utrie2_builder.c for the builder code.
26 */
27 #ifdef UTRIE2_DEBUG
28 # include <stdio.h>
29 #endif
30
31 #include "unicode/utypes.h"
32 #include "unicode/utf.h"
33 #include "unicode/utf8.h"
34 #include "unicode/utf16.h"
35 #include "cmemory.h"
36 #include "utrie2.h"
37 #include "utrie2_impl.h"
38 #include "uassert.h"
39
40 /* Public UTrie2 API implementation ----------------------------------------- */
41
42 static uint32_t
get32(const UNewTrie2 * trie,UChar32 c,UBool fromLSCP)43 get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) {
44 int32_t i2, block;
45
46 if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) {
47 return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY];
48 }
49
50 if(U_IS_LEAD(c) && fromLSCP) {
51 i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
52 (c>>UTRIE2_SHIFT_2);
53 } else {
54 i2=trie->index1[c>>UTRIE2_SHIFT_1]+
55 ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
56 }
57 block=trie->index2[i2];
58 return trie->data[block+(c&UTRIE2_DATA_MASK)];
59 }
60
61 U_CAPI uint32_t U_EXPORT2
utrie2_get32(const UTrie2 * trie,UChar32 c)62 utrie2_get32(const UTrie2 *trie, UChar32 c) {
63 if(trie->data16!=NULL) {
64 return UTRIE2_GET16(trie, c);
65 } else if(trie->data32!=NULL) {
66 return UTRIE2_GET32(trie, c);
67 } else if((uint32_t)c>0x10ffff) {
68 return trie->errorValue;
69 } else {
70 return get32(trie->newTrie, c, TRUE);
71 }
72 }
73
74 U_CAPI uint32_t U_EXPORT2
utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 * trie,UChar32 c)75 utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) {
76 if(!U_IS_LEAD(c)) {
77 return trie->errorValue;
78 }
79 if(trie->data16!=NULL) {
80 return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c);
81 } else if(trie->data32!=NULL) {
82 return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c);
83 } else {
84 return get32(trie->newTrie, c, FALSE);
85 }
86 }
87
88 static inline int32_t
u8Index(const UTrie2 * trie,UChar32 c,int32_t i)89 u8Index(const UTrie2 *trie, UChar32 c, int32_t i) {
90 int32_t idx=
91 _UTRIE2_INDEX_FROM_CP(
92 trie,
93 trie->data32==NULL ? trie->indexLength : 0,
94 c);
95 return (idx<<3)|i;
96 }
97
98 U_CAPI int32_t U_EXPORT2
utrie2_internalU8NextIndex(const UTrie2 * trie,UChar32 c,const uint8_t * src,const uint8_t * limit)99 utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c,
100 const uint8_t *src, const uint8_t *limit) {
101 int32_t i, length;
102 i=0;
103 /* support 64-bit pointers by avoiding cast of arbitrary difference */
104 if((limit-src)<=7) {
105 length=(int32_t)(limit-src);
106 } else {
107 length=7;
108 }
109 c=utf8_nextCharSafeBody(src, &i, length, c, -1);
110 return u8Index(trie, c, i);
111 }
112
113 U_CAPI int32_t U_EXPORT2
utrie2_internalU8PrevIndex(const UTrie2 * trie,UChar32 c,const uint8_t * start,const uint8_t * src)114 utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
115 const uint8_t *start, const uint8_t *src) {
116 int32_t i, length;
117 /* support 64-bit pointers by avoiding cast of arbitrary difference */
118 if((src-start)<=7) {
119 i=length=(int32_t)(src-start);
120 } else {
121 i=length=7;
122 start=src-7;
123 }
124 c=utf8_prevCharSafeBody(start, 0, &i, c, -1);
125 i=length-i; /* number of bytes read backward from src */
126 return u8Index(trie, c, i);
127 }
128
129 U_CAPI UTrie2 * U_EXPORT2
utrie2_openFromSerialized(UTrie2ValueBits valueBits,const void * data,int32_t length,int32_t * pActualLength,UErrorCode * pErrorCode)130 utrie2_openFromSerialized(UTrie2ValueBits valueBits,
131 const void *data, int32_t length, int32_t *pActualLength,
132 UErrorCode *pErrorCode) {
133 const UTrie2Header *header;
134 const uint16_t *p16;
135 int32_t actualLength;
136
137 UTrie2 tempTrie;
138 UTrie2 *trie;
139
140 if(U_FAILURE(*pErrorCode)) {
141 return 0;
142 }
143
144 if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) ||
145 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
146 ) {
147 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
148 return 0;
149 }
150
151 /* enough data for a trie header? */
152 if(length<(int32_t)sizeof(UTrie2Header)) {
153 *pErrorCode=U_INVALID_FORMAT_ERROR;
154 return 0;
155 }
156
157 /* check the signature */
158 header=(const UTrie2Header *)data;
159 if(header->signature!=UTRIE2_SIG) {
160 *pErrorCode=U_INVALID_FORMAT_ERROR;
161 return 0;
162 }
163
164 /* get the options */
165 if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) {
166 *pErrorCode=U_INVALID_FORMAT_ERROR;
167 return 0;
168 }
169
170 /* get the length values and offsets */
171 uprv_memset(&tempTrie, 0, sizeof(tempTrie));
172 tempTrie.indexLength=header->indexLength;
173 tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT;
174 tempTrie.index2NullOffset=header->index2NullOffset;
175 tempTrie.dataNullOffset=header->dataNullOffset;
176
177 tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1;
178 tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY;
179 if(valueBits==UTRIE2_16_VALUE_BITS) {
180 tempTrie.highValueIndex+=tempTrie.indexLength;
181 }
182
183 /* calculate the actual length */
184 actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2;
185 if(valueBits==UTRIE2_16_VALUE_BITS) {
186 actualLength+=tempTrie.dataLength*2;
187 } else {
188 actualLength+=tempTrie.dataLength*4;
189 }
190 if(length<actualLength) {
191 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not enough bytes */
192 return 0;
193 }
194
195 /* allocate the trie */
196 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
197 if(trie==NULL) {
198 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
199 return 0;
200 }
201 uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
202 trie->memory=(uint32_t *)data;
203 trie->length=actualLength;
204 trie->isMemoryOwned=FALSE;
205
206 /* set the pointers to its index and data arrays */
207 p16=(const uint16_t *)(header+1);
208 trie->index=p16;
209 p16+=trie->indexLength;
210
211 /* get the data */
212 switch(valueBits) {
213 case UTRIE2_16_VALUE_BITS:
214 trie->data16=p16;
215 trie->data32=NULL;
216 trie->initialValue=trie->index[trie->dataNullOffset];
217 trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET];
218 break;
219 case UTRIE2_32_VALUE_BITS:
220 trie->data16=NULL;
221 trie->data32=(const uint32_t *)p16;
222 trie->initialValue=trie->data32[trie->dataNullOffset];
223 trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET];
224 break;
225 default:
226 *pErrorCode=U_INVALID_FORMAT_ERROR;
227 return 0;
228 }
229
230 if(pActualLength!=NULL) {
231 *pActualLength=actualLength;
232 }
233 return trie;
234 }
235
236 U_CAPI UTrie2 * U_EXPORT2
utrie2_openDummy(UTrie2ValueBits valueBits,uint32_t initialValue,uint32_t errorValue,UErrorCode * pErrorCode)237 utrie2_openDummy(UTrie2ValueBits valueBits,
238 uint32_t initialValue, uint32_t errorValue,
239 UErrorCode *pErrorCode) {
240 UTrie2 *trie;
241 UTrie2Header *header;
242 uint32_t *p;
243 uint16_t *dest16;
244 int32_t indexLength, dataLength, length, i;
245 int32_t dataMove; /* >0 if the data is moved to the end of the index array */
246
247 if(U_FAILURE(*pErrorCode)) {
248 return 0;
249 }
250
251 if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) {
252 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
253 return 0;
254 }
255
256 /* calculate the total length of the dummy trie data */
257 indexLength=UTRIE2_INDEX_1_OFFSET;
258 dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY;
259 length=(int32_t)sizeof(UTrie2Header)+indexLength*2;
260 if(valueBits==UTRIE2_16_VALUE_BITS) {
261 length+=dataLength*2;
262 } else {
263 length+=dataLength*4;
264 }
265
266 /* allocate the trie */
267 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
268 if(trie==NULL) {
269 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
270 return 0;
271 }
272 uprv_memset(trie, 0, sizeof(UTrie2));
273 trie->memory=uprv_malloc(length);
274 if(trie->memory==NULL) {
275 uprv_free(trie);
276 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
277 return 0;
278 }
279 trie->length=length;
280 trie->isMemoryOwned=TRUE;
281
282 /* set the UTrie2 fields */
283 if(valueBits==UTRIE2_16_VALUE_BITS) {
284 dataMove=indexLength;
285 } else {
286 dataMove=0;
287 }
288
289 trie->indexLength=indexLength;
290 trie->dataLength=dataLength;
291 trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET;
292 trie->dataNullOffset=(uint16_t)dataMove;
293 trie->initialValue=initialValue;
294 trie->errorValue=errorValue;
295 trie->highStart=0;
296 trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET;
297
298 /* set the header fields */
299 header=(UTrie2Header *)trie->memory;
300
301 header->signature=UTRIE2_SIG; /* "Tri2" */
302 header->options=(uint16_t)valueBits;
303
304 header->indexLength=(uint16_t)indexLength;
305 header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT);
306 header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET;
307 header->dataNullOffset=(uint16_t)dataMove;
308 header->shiftedHighStart=0;
309
310 /* fill the index and data arrays */
311 dest16=(uint16_t *)(header+1);
312 trie->index=dest16;
313
314 /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */
315 for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
316 *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT); /* null data block */
317 }
318
319 /* write UTF-8 2-byte index-2 values, not right-shifted */
320 for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */
321 *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
322 }
323 for(; i<(0xe0-0xc0); ++i) { /* C2..DF */
324 *dest16++=(uint16_t)dataMove;
325 }
326
327 /* write the 16/32-bit data array */
328 switch(valueBits) {
329 case UTRIE2_16_VALUE_BITS:
330 /* write 16-bit data values */
331 trie->data16=dest16;
332 trie->data32=NULL;
333 for(i=0; i<0x80; ++i) {
334 *dest16++=(uint16_t)initialValue;
335 }
336 for(; i<0xc0; ++i) {
337 *dest16++=(uint16_t)errorValue;
338 }
339 /* highValue and reserved values */
340 for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
341 *dest16++=(uint16_t)initialValue;
342 }
343 break;
344 case UTRIE2_32_VALUE_BITS:
345 /* write 32-bit data values */
346 p=(uint32_t *)dest16;
347 trie->data16=NULL;
348 trie->data32=p;
349 for(i=0; i<0x80; ++i) {
350 *p++=initialValue;
351 }
352 for(; i<0xc0; ++i) {
353 *p++=errorValue;
354 }
355 /* highValue and reserved values */
356 for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
357 *p++=initialValue;
358 }
359 break;
360 default:
361 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
362 return 0;
363 }
364
365 return trie;
366 }
367
368 U_CAPI void U_EXPORT2
utrie2_close(UTrie2 * trie)369 utrie2_close(UTrie2 *trie) {
370 if(trie!=NULL) {
371 if(trie->isMemoryOwned) {
372 uprv_free(trie->memory);
373 }
374 if(trie->newTrie!=NULL) {
375 uprv_free(trie->newTrie->data);
376 uprv_free(trie->newTrie);
377 }
378 uprv_free(trie);
379 }
380 }
381
382 U_CAPI int32_t U_EXPORT2
utrie2_getVersion(const void * data,int32_t length,UBool anyEndianOk)383 utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) {
384 uint32_t signature;
385 if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) {
386 return 0;
387 }
388 signature=*(const uint32_t *)data;
389 if(signature==UTRIE2_SIG) {
390 return 2;
391 }
392 if(anyEndianOk && signature==UTRIE2_OE_SIG) {
393 return 2;
394 }
395 if(signature==UTRIE_SIG) {
396 return 1;
397 }
398 if(anyEndianOk && signature==UTRIE_OE_SIG) {
399 return 1;
400 }
401 return 0;
402 }
403
404 U_CAPI UBool U_EXPORT2
utrie2_isFrozen(const UTrie2 * trie)405 utrie2_isFrozen(const UTrie2 *trie) {
406 return (UBool)(trie->newTrie==NULL);
407 }
408
409 U_CAPI int32_t U_EXPORT2
utrie2_serialize(const UTrie2 * trie,void * data,int32_t capacity,UErrorCode * pErrorCode)410 utrie2_serialize(const UTrie2 *trie,
411 void *data, int32_t capacity,
412 UErrorCode *pErrorCode) {
413 /* argument check */
414 if(U_FAILURE(*pErrorCode)) {
415 return 0;
416 }
417
418 if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL ||
419 capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)))
420 ) {
421 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
422 return 0;
423 }
424
425 if(capacity>=trie->length) {
426 uprv_memcpy(data, trie->memory, trie->length);
427 } else {
428 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
429 }
430 return trie->length;
431 }
432
433 U_CAPI int32_t U_EXPORT2
utrie2_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)434 utrie2_swap(const UDataSwapper *ds,
435 const void *inData, int32_t length, void *outData,
436 UErrorCode *pErrorCode) {
437 const UTrie2Header *inTrie;
438 UTrie2Header trie;
439 int32_t dataLength, size;
440 UTrie2ValueBits valueBits;
441
442 if(U_FAILURE(*pErrorCode)) {
443 return 0;
444 }
445 if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
446 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
447 return 0;
448 }
449
450 /* setup and swapping */
451 if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
452 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
453 return 0;
454 }
455
456 inTrie=(const UTrie2Header *)inData;
457 trie.signature=ds->readUInt32(inTrie->signature);
458 trie.options=ds->readUInt16(inTrie->options);
459 trie.indexLength=ds->readUInt16(inTrie->indexLength);
460 trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
461
462 valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
463 dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
464
465 if( trie.signature!=UTRIE2_SIG ||
466 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
467 trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
468 dataLength<UTRIE2_DATA_START_OFFSET
469 ) {
470 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
471 return 0;
472 }
473
474 size=sizeof(UTrie2Header)+trie.indexLength*2;
475 switch(valueBits) {
476 case UTRIE2_16_VALUE_BITS:
477 size+=dataLength*2;
478 break;
479 case UTRIE2_32_VALUE_BITS:
480 size+=dataLength*4;
481 break;
482 default:
483 *pErrorCode=U_INVALID_FORMAT_ERROR;
484 return 0;
485 }
486
487 if(length>=0) {
488 UTrie2Header *outTrie;
489
490 if(length<size) {
491 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
492 return 0;
493 }
494
495 outTrie=(UTrie2Header *)outData;
496
497 /* swap the header */
498 ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
499 ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
500
501 /* swap the index and the data */
502 switch(valueBits) {
503 case UTRIE2_16_VALUE_BITS:
504 ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
505 break;
506 case UTRIE2_32_VALUE_BITS:
507 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
508 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
509 (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
510 break;
511 default:
512 *pErrorCode=U_INVALID_FORMAT_ERROR;
513 return 0;
514 }
515 }
516
517 return size;
518 }
519
520 // utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c
521 // to avoid a dependency from utrie2.cpp on utrie.c.
522
523 /* enumeration -------------------------------------------------------------- */
524
525 #define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b))
526
527 /* default UTrie2EnumValue() returns the input value itself */
528 static uint32_t U_CALLCONV
enumSameValue(const void *,uint32_t value)529 enumSameValue(const void * /*context*/, uint32_t value) {
530 return value;
531 }
532
533 /**
534 * Enumerate all ranges of code points with the same relevant values.
535 * The values are transformed from the raw trie entries by the enumValue function.
536 *
537 * Currently requires start<limit and both start and limit must be multiples
538 * of UTRIE2_DATA_BLOCK_LENGTH.
539 *
540 * Optimizations:
541 * - Skip a whole block if we know that it is filled with a single value,
542 * and it is the same as we visited just before.
543 * - Handle the null block specially because we know a priori that it is filled
544 * with a single value.
545 */
546 static void
enumEitherTrie(const UTrie2 * trie,UChar32 start,UChar32 limit,UTrie2EnumValue * enumValue,UTrie2EnumRange * enumRange,const void * context)547 enumEitherTrie(const UTrie2 *trie,
548 UChar32 start, UChar32 limit,
549 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
550 const uint32_t *data32;
551 const uint16_t *idx;
552
553 uint32_t value, prevValue, initialValue;
554 UChar32 c, prev, highStart;
555 int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
556
557 if(enumRange==NULL) {
558 return;
559 }
560 if(enumValue==NULL) {
561 enumValue=enumSameValue;
562 }
563
564 if(trie->newTrie==NULL) {
565 /* frozen trie */
566 idx=trie->index;
567 U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */
568 data32=trie->data32;
569
570 index2NullOffset=trie->index2NullOffset;
571 nullBlock=trie->dataNullOffset;
572 } else {
573 /* unfrozen, mutable trie */
574 idx=NULL;
575 data32=trie->newTrie->data;
576 U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */
577
578 index2NullOffset=trie->newTrie->index2NullOffset;
579 nullBlock=trie->newTrie->dataNullOffset;
580 }
581
582 highStart=trie->highStart;
583
584 /* get the enumeration value that corresponds to an initial-value trie data entry */
585 initialValue=enumValue(context, trie->initialValue);
586
587 /* set variables for previous range */
588 prevI2Block=-1;
589 prevBlock=-1;
590 prev=start;
591 prevValue=0;
592
593 /* enumerate index-2 blocks */
594 for(c=start; c<limit && c<highStart;) {
595 /* Code point limit for iterating inside this i2Block. */
596 UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY;
597 if(limit<tempLimit) {
598 tempLimit=limit;
599 }
600 if(c<=0xffff) {
601 if(!U_IS_SURROGATE(c)) {
602 i2Block=c>>UTRIE2_SHIFT_2;
603 } else if(U_IS_SURROGATE_LEAD(c)) {
604 /*
605 * Enumerate values for lead surrogate code points, not code units:
606 * This special block has half the normal length.
607 */
608 i2Block=UTRIE2_LSCP_INDEX_2_OFFSET;
609 tempLimit=MIN_VALUE(0xdc00, limit);
610 } else {
611 /*
612 * Switch back to the normal part of the index-2 table.
613 * Enumerate the second half of the surrogates block.
614 */
615 i2Block=0xd800>>UTRIE2_SHIFT_2;
616 tempLimit=MIN_VALUE(0xe000, limit);
617 }
618 } else {
619 /* supplementary code points */
620 if(idx!=NULL) {
621 i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+
622 (c>>UTRIE2_SHIFT_1)];
623 } else {
624 i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1];
625 }
626 if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) {
627 /*
628 * The index-2 block is the same as the previous one, and filled with prevValue.
629 * Only possible for supplementary code points because the linear-BMP index-2
630 * table creates unique i2Block values.
631 */
632 c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
633 continue;
634 }
635 }
636 prevI2Block=i2Block;
637 if(i2Block==index2NullOffset) {
638 /* this is the null index-2 block */
639 if(prevValue!=initialValue) {
640 if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
641 return;
642 }
643 prevBlock=nullBlock;
644 prev=c;
645 prevValue=initialValue;
646 }
647 c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
648 } else {
649 /* enumerate data blocks for one index-2 block */
650 int32_t i2, i2Limit;
651 i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
652 if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) {
653 i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
654 } else {
655 i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH;
656 }
657 for(; i2<i2Limit; ++i2) {
658 if(idx!=NULL) {
659 block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT;
660 } else {
661 block=trie->newTrie->index2[i2Block+i2];
662 }
663 if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) {
664 /* the block is the same as the previous one, and filled with prevValue */
665 c+=UTRIE2_DATA_BLOCK_LENGTH;
666 continue;
667 }
668 prevBlock=block;
669 if(block==nullBlock) {
670 /* this is the null data block */
671 if(prevValue!=initialValue) {
672 if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
673 return;
674 }
675 prev=c;
676 prevValue=initialValue;
677 }
678 c+=UTRIE2_DATA_BLOCK_LENGTH;
679 } else {
680 for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) {
681 value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
682 if(value!=prevValue) {
683 if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
684 return;
685 }
686 prev=c;
687 prevValue=value;
688 }
689 ++c;
690 }
691 }
692 }
693 }
694 }
695
696 if(c>limit) {
697 c=limit; /* could be higher if in the index2NullOffset */
698 } else if(c<limit) {
699 /* c==highStart<limit */
700 uint32_t highValue;
701 if(idx!=NULL) {
702 highValue=
703 data32!=NULL ?
704 data32[trie->highValueIndex] :
705 idx[trie->highValueIndex];
706 } else {
707 highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY];
708 }
709 value=enumValue(context, highValue);
710 if(value!=prevValue) {
711 if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
712 return;
713 }
714 prev=c;
715 prevValue=value;
716 }
717 c=limit;
718 }
719
720 /* deliver last range */
721 enumRange(context, prev, c-1, prevValue);
722 }
723
724 U_CAPI void U_EXPORT2
utrie2_enum(const UTrie2 * trie,UTrie2EnumValue * enumValue,UTrie2EnumRange * enumRange,const void * context)725 utrie2_enum(const UTrie2 *trie,
726 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
727 enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context);
728 }
729
730 U_CAPI void U_EXPORT2
utrie2_enumForLeadSurrogate(const UTrie2 * trie,UChar32 lead,UTrie2EnumValue * enumValue,UTrie2EnumRange * enumRange,const void * context)731 utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead,
732 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange,
733 const void *context) {
734 if(!U16_IS_LEAD(lead)) {
735 return;
736 }
737 lead=(lead-0xd7c0)<<10; /* start code point */
738 enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context);
739 }
740
741 /* C++ convenience wrappers ------------------------------------------------- */
742
743 U_NAMESPACE_BEGIN
744
previous16()745 uint16_t BackwardUTrie2StringIterator::previous16() {
746 codePointLimit=codePointStart;
747 if(start>=codePointStart) {
748 codePoint=U_SENTINEL;
749 return 0;
750 }
751 uint16_t result;
752 UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result);
753 return result;
754 }
755
next16()756 uint16_t ForwardUTrie2StringIterator::next16() {
757 codePointStart=codePointLimit;
758 if(codePointLimit==limit) {
759 codePoint=U_SENTINEL;
760 return 0;
761 }
762 uint16_t result;
763 UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result);
764 return result;
765 }
766
767 U_NAMESPACE_END
768