1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 2001-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 * file name: utrie2.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2008aug16 (starting from a copy of utrie.c)
14 * created by: Markus W. Scherer
15 *
16 * This is a common implementation of a Unicode trie.
17 * It is a kind of compressed, serializable table of 16- or 32-bit values associated with
18 * Unicode code points (0..0x10ffff).
19 * This is the second common version of a Unicode trie (hence the name UTrie2).
20 * See utrie2.h for a comparison.
21 *
22 * This file contains only the runtime and enumeration code, for read-only access.
23 * See utrie2_builder.c for the builder code.
24 */
25 #ifdef UTRIE2_DEBUG
26 # include <stdio.h>
27 #endif
28
29 #include "unicode/utypes.h"
30 #include "cmemory.h"
31 #include "utrie2.h"
32 #include "utrie2_impl.h"
33
34 /* Public UTrie2 API implementation ----------------------------------------- */
35
36 static uint32_t
get32(const UNewTrie2 * trie,UChar32 c,UBool fromLSCP)37 get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) {
38 int32_t i2, block;
39
40 if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) {
41 return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY];
42 }
43
44 if(U_IS_LEAD(c) && fromLSCP) {
45 i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
46 (c>>UTRIE2_SHIFT_2);
47 } else {
48 i2=trie->index1[c>>UTRIE2_SHIFT_1]+
49 ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
50 }
51 block=trie->index2[i2];
52 return trie->data[block+(c&UTRIE2_DATA_MASK)];
53 }
54
55 U_CAPI uint32_t U_EXPORT2
utrie2_get32(const UTrie2 * trie,UChar32 c)56 utrie2_get32(const UTrie2 *trie, UChar32 c) {
57 if(trie->data16!=NULL) {
58 return UTRIE2_GET16(trie, c);
59 } else if(trie->data32!=NULL) {
60 return UTRIE2_GET32(trie, c);
61 } else if((uint32_t)c>0x10ffff) {
62 return trie->errorValue;
63 } else {
64 return get32(trie->newTrie, c, TRUE);
65 }
66 }
67
68 U_CAPI uint32_t U_EXPORT2
utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 * trie,UChar32 c)69 utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) {
70 if(!U_IS_LEAD(c)) {
71 return trie->errorValue;
72 }
73 if(trie->data16!=NULL) {
74 return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c);
75 } else if(trie->data32!=NULL) {
76 return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c);
77 } else {
78 return get32(trie->newTrie, c, FALSE);
79 }
80 }
81
82 static U_INLINE int32_t
u8Index(const UTrie2 * trie,UChar32 c,int32_t i)83 u8Index(const UTrie2 *trie, UChar32 c, int32_t i) {
84 int32_t idx=
85 _UTRIE2_INDEX_FROM_CP(
86 trie,
87 trie->data32==NULL ? trie->indexLength : 0,
88 c);
89 return (idx<<3)|i;
90 }
91
92 U_CAPI int32_t U_EXPORT2
utrie2_internalU8NextIndex(const UTrie2 * trie,UChar32 c,const uint8_t * src,const uint8_t * limit)93 utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c,
94 const uint8_t *src, const uint8_t *limit) {
95 int32_t i, length;
96 i=0;
97 /* support 64-bit pointers by avoiding cast of arbitrary difference */
98 if((limit-src)<=7) {
99 length=(int32_t)(limit-src);
100 } else {
101 length=7;
102 }
103 c=utf8_nextCharSafeBody(src, &i, length, c, -1);
104 return u8Index(trie, c, i);
105 }
106
107 U_CAPI int32_t U_EXPORT2
utrie2_internalU8PrevIndex(const UTrie2 * trie,UChar32 c,const uint8_t * start,const uint8_t * src)108 utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
109 const uint8_t *start, const uint8_t *src) {
110 int32_t i, length;
111 /* support 64-bit pointers by avoiding cast of arbitrary difference */
112 if((src-start)<=7) {
113 i=length=(int32_t)(src-start);
114 } else {
115 i=length=7;
116 start=src-7;
117 }
118 c=utf8_prevCharSafeBody(start, 0, &i, c, -1);
119 i=length-i; /* number of bytes read backward from src */
120 return u8Index(trie, c, i);
121 }
122
123 U_CAPI UTrie2 * U_EXPORT2
utrie2_openFromSerialized(UTrie2ValueBits valueBits,const void * data,int32_t length,int32_t * pActualLength,UErrorCode * pErrorCode)124 utrie2_openFromSerialized(UTrie2ValueBits valueBits,
125 const void *data, int32_t length, int32_t *pActualLength,
126 UErrorCode *pErrorCode) {
127 const UTrie2Header *header;
128 const uint16_t *p16;
129 int32_t actualLength;
130
131 UTrie2 tempTrie;
132 UTrie2 *trie;
133
134 if(U_FAILURE(*pErrorCode)) {
135 return 0;
136 }
137
138 if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) ||
139 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
140 ) {
141 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
142 return 0;
143 }
144
145 /* enough data for a trie header? */
146 if(length<(int32_t)sizeof(UTrie2Header)) {
147 *pErrorCode=U_INVALID_FORMAT_ERROR;
148 return 0;
149 }
150
151 /* check the signature */
152 header=(const UTrie2Header *)data;
153 if(header->signature!=UTRIE2_SIG) {
154 *pErrorCode=U_INVALID_FORMAT_ERROR;
155 return 0;
156 }
157
158 /* get the options */
159 if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) {
160 *pErrorCode=U_INVALID_FORMAT_ERROR;
161 return 0;
162 }
163
164 /* get the length values and offsets */
165 uprv_memset(&tempTrie, 0, sizeof(tempTrie));
166 tempTrie.indexLength=header->indexLength;
167 tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT;
168 tempTrie.index2NullOffset=header->index2NullOffset;
169 tempTrie.dataNullOffset=header->dataNullOffset;
170
171 tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1;
172 tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY;
173 if(valueBits==UTRIE2_16_VALUE_BITS) {
174 tempTrie.highValueIndex+=tempTrie.indexLength;
175 }
176
177 /* calculate the actual length */
178 actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2;
179 if(valueBits==UTRIE2_16_VALUE_BITS) {
180 actualLength+=tempTrie.dataLength*2;
181 } else {
182 actualLength+=tempTrie.dataLength*4;
183 }
184 if(length<actualLength) {
185 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not enough bytes */
186 return 0;
187 }
188
189 /* allocate the trie */
190 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
191 if(trie==NULL) {
192 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
193 return 0;
194 }
195 uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
196 trie->memory=(uint32_t *)data;
197 trie->length=actualLength;
198 trie->isMemoryOwned=FALSE;
199
200 /* set the pointers to its index and data arrays */
201 p16=(const uint16_t *)(header+1);
202 trie->index=p16;
203 p16+=trie->indexLength;
204
205 /* get the data */
206 switch(valueBits) {
207 case UTRIE2_16_VALUE_BITS:
208 trie->data16=p16;
209 trie->data32=NULL;
210 trie->initialValue=trie->index[trie->dataNullOffset];
211 trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET];
212 break;
213 case UTRIE2_32_VALUE_BITS:
214 trie->data16=NULL;
215 trie->data32=(const uint32_t *)p16;
216 trie->initialValue=trie->data32[trie->dataNullOffset];
217 trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET];
218 break;
219 default:
220 *pErrorCode=U_INVALID_FORMAT_ERROR;
221 return 0;
222 }
223
224 if(pActualLength!=NULL) {
225 *pActualLength=actualLength;
226 }
227 return trie;
228 }
229
230 U_CAPI UTrie2 * U_EXPORT2
utrie2_openDummy(UTrie2ValueBits valueBits,uint32_t initialValue,uint32_t errorValue,UErrorCode * pErrorCode)231 utrie2_openDummy(UTrie2ValueBits valueBits,
232 uint32_t initialValue, uint32_t errorValue,
233 UErrorCode *pErrorCode) {
234 UTrie2 *trie;
235 UTrie2Header *header;
236 uint32_t *p;
237 uint16_t *dest16;
238 int32_t indexLength, dataLength, length, i;
239 int32_t dataMove; /* >0 if the data is moved to the end of the index array */
240
241 if(U_FAILURE(*pErrorCode)) {
242 return 0;
243 }
244
245 if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) {
246 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
247 return 0;
248 }
249
250 /* calculate the total length of the dummy trie data */
251 indexLength=UTRIE2_INDEX_1_OFFSET;
252 dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY;
253 length=(int32_t)sizeof(UTrie2Header)+indexLength*2;
254 if(valueBits==UTRIE2_16_VALUE_BITS) {
255 length+=dataLength*2;
256 } else {
257 length+=dataLength*4;
258 }
259
260 /* allocate the trie */
261 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
262 if(trie==NULL) {
263 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
264 return 0;
265 }
266 uprv_memset(trie, 0, sizeof(UTrie2));
267 trie->memory=uprv_malloc(length);
268 if(trie->memory==NULL) {
269 uprv_free(trie);
270 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
271 return 0;
272 }
273 trie->length=length;
274 trie->isMemoryOwned=TRUE;
275
276 /* set the UTrie2 fields */
277 if(valueBits==UTRIE2_16_VALUE_BITS) {
278 dataMove=indexLength;
279 } else {
280 dataMove=0;
281 }
282
283 trie->indexLength=indexLength;
284 trie->dataLength=dataLength;
285 trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET;
286 trie->dataNullOffset=(uint16_t)dataMove;
287 trie->initialValue=initialValue;
288 trie->errorValue=errorValue;
289 trie->highStart=0;
290 trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET;
291
292 /* set the header fields */
293 header=(UTrie2Header *)trie->memory;
294
295 header->signature=UTRIE2_SIG; /* "Tri2" */
296 header->options=(uint16_t)valueBits;
297
298 header->indexLength=(uint16_t)indexLength;
299 header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT);
300 header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET;
301 header->dataNullOffset=(uint16_t)dataMove;
302 header->shiftedHighStart=0;
303
304 /* fill the index and data arrays */
305 dest16=(uint16_t *)(header+1);
306 trie->index=dest16;
307
308 /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */
309 for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
310 *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT); /* null data block */
311 }
312
313 /* write UTF-8 2-byte index-2 values, not right-shifted */
314 for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */
315 *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
316 }
317 for(; i<(0xe0-0xc0); ++i) { /* C2..DF */
318 *dest16++=(uint16_t)dataMove;
319 }
320
321 /* write the 16/32-bit data array */
322 switch(valueBits) {
323 case UTRIE2_16_VALUE_BITS:
324 /* write 16-bit data values */
325 trie->data16=dest16;
326 trie->data32=NULL;
327 for(i=0; i<0x80; ++i) {
328 *dest16++=(uint16_t)initialValue;
329 }
330 for(; i<0xc0; ++i) {
331 *dest16++=(uint16_t)errorValue;
332 }
333 /* highValue and reserved values */
334 for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
335 *dest16++=(uint16_t)initialValue;
336 }
337 break;
338 case UTRIE2_32_VALUE_BITS:
339 /* write 32-bit data values */
340 p=(uint32_t *)dest16;
341 trie->data16=NULL;
342 trie->data32=p;
343 for(i=0; i<0x80; ++i) {
344 *p++=initialValue;
345 }
346 for(; i<0xc0; ++i) {
347 *p++=errorValue;
348 }
349 /* highValue and reserved values */
350 for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
351 *p++=initialValue;
352 }
353 break;
354 default:
355 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
356 return 0;
357 }
358
359 return trie;
360 }
361
362 U_CAPI void U_EXPORT2
utrie2_close(UTrie2 * trie)363 utrie2_close(UTrie2 *trie) {
364 if(trie!=NULL) {
365 if(trie->isMemoryOwned) {
366 uprv_free(trie->memory);
367 }
368 if(trie->newTrie!=NULL) {
369 uprv_free(trie->newTrie->data);
370 uprv_free(trie->newTrie);
371 }
372 uprv_free(trie);
373 }
374 }
375
376 U_CAPI int32_t U_EXPORT2
utrie2_getVersion(const void * data,int32_t length,UBool anyEndianOk)377 utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) {
378 uint32_t signature;
379 if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) {
380 return 0;
381 }
382 signature=*(const uint32_t *)data;
383 if(signature==UTRIE2_SIG) {
384 return 2;
385 }
386 if(anyEndianOk && signature==UTRIE2_OE_SIG) {
387 return 2;
388 }
389 if(signature==UTRIE_SIG) {
390 return 1;
391 }
392 if(anyEndianOk && signature==UTRIE_OE_SIG) {
393 return 1;
394 }
395 return 0;
396 }
397
398 U_CAPI int32_t U_EXPORT2
utrie2_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)399 utrie2_swap(const UDataSwapper *ds,
400 const void *inData, int32_t length, void *outData,
401 UErrorCode *pErrorCode) {
402 const UTrie2Header *inTrie;
403 UTrie2Header trie;
404 int32_t dataLength, size;
405 UTrie2ValueBits valueBits;
406
407 if(U_FAILURE(*pErrorCode)) {
408 return 0;
409 }
410 if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
411 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
412 return 0;
413 }
414
415 /* setup and swapping */
416 if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
417 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
418 return 0;
419 }
420
421 inTrie=(const UTrie2Header *)inData;
422 trie.signature=ds->readUInt32(inTrie->signature);
423 trie.options=ds->readUInt16(inTrie->options);
424 trie.indexLength=ds->readUInt16(inTrie->indexLength);
425 trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
426
427 valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
428 dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
429
430 if( trie.signature!=UTRIE2_SIG ||
431 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
432 trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
433 dataLength<UTRIE2_DATA_START_OFFSET
434 ) {
435 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
436 return 0;
437 }
438
439 size=sizeof(UTrie2Header)+trie.indexLength*2;
440 switch(valueBits) {
441 case UTRIE2_16_VALUE_BITS:
442 size+=dataLength*2;
443 break;
444 case UTRIE2_32_VALUE_BITS:
445 size+=dataLength*4;
446 break;
447 default:
448 *pErrorCode=U_INVALID_FORMAT_ERROR;
449 return 0;
450 }
451
452 if(length>=0) {
453 UTrie2Header *outTrie;
454
455 if(length<size) {
456 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
457 return 0;
458 }
459
460 outTrie=(UTrie2Header *)outData;
461
462 /* swap the header */
463 ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
464 ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
465
466 /* swap the index and the data */
467 switch(valueBits) {
468 case UTRIE2_16_VALUE_BITS:
469 ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
470 break;
471 case UTRIE2_32_VALUE_BITS:
472 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
473 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
474 (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
475 break;
476 default:
477 *pErrorCode=U_INVALID_FORMAT_ERROR;
478 return 0;
479 }
480 }
481
482 return size;
483 }
484
485 // utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c
486 // to avoid a dependency from utrie2.cpp on utrie.c.
487
488 /* enumeration -------------------------------------------------------------- */
489
490 #define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b))
491
492 /* default UTrie2EnumValue() returns the input value itself */
493 static uint32_t U_CALLCONV
enumSameValue(const void *,uint32_t value)494 enumSameValue(const void * /*context*/, uint32_t value) {
495 return value;
496 }
497
498 /**
499 * Enumerate all ranges of code points with the same relevant values.
500 * The values are transformed from the raw trie entries by the enumValue function.
501 *
502 * Currently requires start<limit and both start and limit must be multiples
503 * of UTRIE2_DATA_BLOCK_LENGTH.
504 *
505 * Optimizations:
506 * - Skip a whole block if we know that it is filled with a single value,
507 * and it is the same as we visited just before.
508 * - Handle the null block specially because we know a priori that it is filled
509 * with a single value.
510 */
511 static void
enumEitherTrie(const UTrie2 * trie,UChar32 start,UChar32 limit,UTrie2EnumValue * enumValue,UTrie2EnumRange * enumRange,const void * context)512 enumEitherTrie(const UTrie2 *trie,
513 UChar32 start, UChar32 limit,
514 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
515 const uint32_t *data32;
516 const uint16_t *idx;
517
518 uint32_t value, prevValue, initialValue;
519 UChar32 c, prev, highStart;
520 int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
521
522 if(enumRange==NULL) {
523 return;
524 }
525 if(enumValue==NULL) {
526 enumValue=enumSameValue;
527 }
528
529 if(trie->newTrie==NULL) {
530 /* frozen trie */
531 idx=trie->index;
532 data32=trie->data32;
533
534 index2NullOffset=trie->index2NullOffset;
535 nullBlock=trie->dataNullOffset;
536 } else {
537 /* unfrozen, mutable trie */
538 idx=NULL;
539 data32=trie->newTrie->data;
540
541 index2NullOffset=trie->newTrie->index2NullOffset;
542 nullBlock=trie->newTrie->dataNullOffset;
543 }
544
545 highStart=trie->highStart;
546
547 /* get the enumeration value that corresponds to an initial-value trie data entry */
548 initialValue=enumValue(context, trie->initialValue);
549
550 /* set variables for previous range */
551 prevI2Block=-1;
552 prevBlock=-1;
553 prev=start;
554 prevValue=0;
555
556 /* enumerate index-2 blocks */
557 for(c=start; c<limit && c<highStart;) {
558 /* Code point limit for iterating inside this i2Block. */
559 UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY;
560 if(limit<tempLimit) {
561 tempLimit=limit;
562 }
563 if(c<=0xffff) {
564 if(!U_IS_SURROGATE(c)) {
565 i2Block=c>>UTRIE2_SHIFT_2;
566 } else if(U_IS_SURROGATE_LEAD(c)) {
567 /*
568 * Enumerate values for lead surrogate code points, not code units:
569 * This special block has half the normal length.
570 */
571 i2Block=UTRIE2_LSCP_INDEX_2_OFFSET;
572 tempLimit=MIN_VALUE(0xdc00, limit);
573 } else {
574 /*
575 * Switch back to the normal part of the index-2 table.
576 * Enumerate the second half of the surrogates block.
577 */
578 i2Block=0xd800>>UTRIE2_SHIFT_2;
579 tempLimit=MIN_VALUE(0xe000, limit);
580 }
581 } else {
582 /* supplementary code points */
583 if(idx!=NULL) {
584 i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+
585 (c>>UTRIE2_SHIFT_1)];
586 } else {
587 i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1];
588 }
589 if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) {
590 /*
591 * The index-2 block is the same as the previous one, and filled with prevValue.
592 * Only possible for supplementary code points because the linear-BMP index-2
593 * table creates unique i2Block values.
594 */
595 c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
596 continue;
597 }
598 }
599 prevI2Block=i2Block;
600 if(i2Block==index2NullOffset) {
601 /* this is the null index-2 block */
602 if(prevValue!=initialValue) {
603 if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
604 return;
605 }
606 prevBlock=nullBlock;
607 prev=c;
608 prevValue=initialValue;
609 }
610 c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
611 } else {
612 /* enumerate data blocks for one index-2 block */
613 int32_t i2, i2Limit;
614 i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
615 if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) {
616 i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
617 } else {
618 i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH;
619 }
620 for(; i2<i2Limit; ++i2) {
621 if(idx!=NULL) {
622 block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT;
623 } else {
624 block=trie->newTrie->index2[i2Block+i2];
625 }
626 if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) {
627 /* the block is the same as the previous one, and filled with prevValue */
628 c+=UTRIE2_DATA_BLOCK_LENGTH;
629 continue;
630 }
631 prevBlock=block;
632 if(block==nullBlock) {
633 /* this is the null data block */
634 if(prevValue!=initialValue) {
635 if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
636 return;
637 }
638 prev=c;
639 prevValue=initialValue;
640 }
641 c+=UTRIE2_DATA_BLOCK_LENGTH;
642 } else {
643 for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) {
644 value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
645 if(value!=prevValue) {
646 if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
647 return;
648 }
649 prev=c;
650 prevValue=value;
651 }
652 ++c;
653 }
654 }
655 }
656 }
657 }
658
659 if(c>limit) {
660 c=limit; /* could be higher if in the index2NullOffset */
661 } else if(c<limit) {
662 /* c==highStart<limit */
663 uint32_t highValue;
664 if(idx!=NULL) {
665 highValue=
666 data32!=NULL ?
667 data32[trie->highValueIndex] :
668 idx[trie->highValueIndex];
669 } else {
670 highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY];
671 }
672 value=enumValue(context, highValue);
673 if(value!=prevValue) {
674 if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
675 return;
676 }
677 prev=c;
678 prevValue=value;
679 }
680 c=limit;
681 }
682
683 /* deliver last range */
684 enumRange(context, prev, c-1, prevValue);
685 }
686
687 U_CAPI void U_EXPORT2
utrie2_enum(const UTrie2 * trie,UTrie2EnumValue * enumValue,UTrie2EnumRange * enumRange,const void * context)688 utrie2_enum(const UTrie2 *trie,
689 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
690 enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context);
691 }
692
693 U_CAPI void U_EXPORT2
utrie2_enumForLeadSurrogate(const UTrie2 * trie,UChar32 lead,UTrie2EnumValue * enumValue,UTrie2EnumRange * enumRange,const void * context)694 utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead,
695 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange,
696 const void *context) {
697 if(!U16_IS_LEAD(lead)) {
698 return;
699 }
700 lead=(lead-0xd7c0)<<10; /* start code point */
701 enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context);
702 }
703
704 /* C++ convenience wrappers ------------------------------------------------- */
705
706 U_NAMESPACE_BEGIN
707
previous16()708 uint16_t BackwardUTrie2StringIterator::previous16() {
709 codePointLimit=codePointStart;
710 if(start>=codePointStart) {
711 codePoint=U_SENTINEL;
712 return 0;
713 }
714 uint16_t result;
715 UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result);
716 return result;
717 }
718
next16()719 uint16_t ForwardUTrie2StringIterator::next16() {
720 codePointStart=codePointLimit;
721 if(codePointLimit==limit) {
722 codePoint=U_SENTINEL;
723 return 0;
724 }
725 uint16_t result;
726 UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result);
727 return result;
728 }
729
getInstance(InstantiatorFn * instantiator,const void * context,UErrorCode & errorCode)730 UTrie2 *UTrie2Singleton::getInstance(InstantiatorFn *instantiator, const void *context,
731 UErrorCode &errorCode) {
732 void *duplicate;
733 UTrie2 *instance=(UTrie2 *)singleton.getInstance(instantiator, context, duplicate, errorCode);
734 utrie2_close((UTrie2 *)duplicate);
735 return instance;
736 }
737
738 U_NAMESPACE_END
739