1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2002-2011, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: uset.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002mar07
16 * created by: Markus W. Scherer
17 *
18 * There are functions to efficiently serialize a USet into an array of uint16_t
19 * and functions to use such a serialized form efficiently without
20 * instantiating a new USet.
21 */
22
23 #include "unicode/utypes.h"
24 #include "unicode/char16ptr.h"
25 #include "unicode/uobject.h"
26 #include "unicode/uset.h"
27 #include "unicode/uniset.h"
28 #include "cmemory.h"
29 #include "unicode/ustring.h"
30 #include "unicode/parsepos.h"
31
32 U_NAMESPACE_USE
33
34 U_CAPI USet* U_EXPORT2
uset_openEmpty()35 uset_openEmpty() {
36 return (USet*) new UnicodeSet();
37 }
38
39 U_CAPI USet* U_EXPORT2
uset_open(UChar32 start,UChar32 end)40 uset_open(UChar32 start, UChar32 end) {
41 return (USet*) new UnicodeSet(start, end);
42 }
43
44 U_CAPI void U_EXPORT2
uset_close(USet * set)45 uset_close(USet* set) {
46 delete (UnicodeSet*) set;
47 }
48
49 U_CAPI USet * U_EXPORT2
uset_clone(const USet * set)50 uset_clone(const USet *set) {
51 return (USet*) (((UnicodeSet*) set)->UnicodeSet::clone());
52 }
53
54 U_CAPI UBool U_EXPORT2
uset_isFrozen(const USet * set)55 uset_isFrozen(const USet *set) {
56 return ((UnicodeSet*) set)->UnicodeSet::isFrozen();
57 }
58
59 U_CAPI void U_EXPORT2
uset_freeze(USet * set)60 uset_freeze(USet *set) {
61 ((UnicodeSet*) set)->UnicodeSet::freeze();
62 }
63
64 U_CAPI USet * U_EXPORT2
uset_cloneAsThawed(const USet * set)65 uset_cloneAsThawed(const USet *set) {
66 return (USet*) (((UnicodeSet*) set)->UnicodeSet::cloneAsThawed());
67 }
68
69 U_CAPI void U_EXPORT2
uset_set(USet * set,UChar32 start,UChar32 end)70 uset_set(USet* set,
71 UChar32 start, UChar32 end) {
72 ((UnicodeSet*) set)->UnicodeSet::set(start, end);
73 }
74
75 U_CAPI void U_EXPORT2
uset_addAll(USet * set,const USet * additionalSet)76 uset_addAll(USet* set, const USet *additionalSet) {
77 ((UnicodeSet*) set)->UnicodeSet::addAll(*((const UnicodeSet*)additionalSet));
78 }
79
80 U_CAPI void U_EXPORT2
uset_add(USet * set,UChar32 c)81 uset_add(USet* set, UChar32 c) {
82 ((UnicodeSet*) set)->UnicodeSet::add(c);
83 }
84
85 U_CAPI void U_EXPORT2
uset_addRange(USet * set,UChar32 start,UChar32 end)86 uset_addRange(USet* set, UChar32 start, UChar32 end) {
87 ((UnicodeSet*) set)->UnicodeSet::add(start, end);
88 }
89
90 U_CAPI void U_EXPORT2
uset_addString(USet * set,const char16_t * str,int32_t strLen)91 uset_addString(USet* set, const char16_t* str, int32_t strLen) {
92 // UnicodeString handles -1 for strLen
93 UnicodeString s(strLen<0, str, strLen);
94 ((UnicodeSet*) set)->UnicodeSet::add(s);
95 }
96
97 U_CAPI void U_EXPORT2
uset_addAllCodePoints(USet * set,const char16_t * str,int32_t strLen)98 uset_addAllCodePoints(USet* set, const char16_t *str, int32_t strLen) {
99 // UnicodeString handles -1 for strLen
100 UnicodeString s(str, strLen);
101 ((UnicodeSet*) set)->UnicodeSet::addAll(s);
102 }
103
104 U_CAPI void U_EXPORT2
uset_remove(USet * set,UChar32 c)105 uset_remove(USet* set, UChar32 c) {
106 ((UnicodeSet*) set)->UnicodeSet::remove(c);
107 }
108
109 U_CAPI void U_EXPORT2
uset_removeRange(USet * set,UChar32 start,UChar32 end)110 uset_removeRange(USet* set, UChar32 start, UChar32 end) {
111 ((UnicodeSet*) set)->UnicodeSet::remove(start, end);
112 }
113
114 U_CAPI void U_EXPORT2
uset_removeString(USet * set,const char16_t * str,int32_t strLen)115 uset_removeString(USet* set, const char16_t* str, int32_t strLen) {
116 UnicodeString s(strLen==-1, str, strLen);
117 ((UnicodeSet*) set)->UnicodeSet::remove(s);
118 }
119
120 U_CAPI void U_EXPORT2
uset_removeAllCodePoints(USet * set,const char16_t * str,int32_t length)121 uset_removeAllCodePoints(USet *set, const char16_t *str, int32_t length) {
122 UnicodeString s(length==-1, str, length);
123 ((UnicodeSet*) set)->UnicodeSet::removeAll(s);
124 }
125
126 U_CAPI void U_EXPORT2
uset_removeAll(USet * set,const USet * remove)127 uset_removeAll(USet* set, const USet* remove) {
128 ((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove);
129 }
130
131 U_CAPI void U_EXPORT2
uset_retain(USet * set,UChar32 start,UChar32 end)132 uset_retain(USet* set, UChar32 start, UChar32 end) {
133 ((UnicodeSet*) set)->UnicodeSet::retain(start, end);
134 }
135
136 U_CAPI void U_EXPORT2
uset_retainString(USet * set,const char16_t * str,int32_t length)137 uset_retainString(USet *set, const char16_t *str, int32_t length) {
138 UnicodeString s(length==-1, str, length);
139 ((UnicodeSet*) set)->UnicodeSet::retain(s);
140 }
141
142 U_CAPI void U_EXPORT2
uset_retainAllCodePoints(USet * set,const char16_t * str,int32_t length)143 uset_retainAllCodePoints(USet *set, const char16_t *str, int32_t length) {
144 UnicodeString s(length==-1, str, length);
145 ((UnicodeSet*) set)->UnicodeSet::retainAll(s);
146 }
147
148 U_CAPI void U_EXPORT2
uset_retainAll(USet * set,const USet * retain)149 uset_retainAll(USet* set, const USet* retain) {
150 ((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain);
151 }
152
153 U_CAPI void U_EXPORT2
uset_compact(USet * set)154 uset_compact(USet* set) {
155 ((UnicodeSet*) set)->UnicodeSet::compact();
156 }
157
158 U_CAPI void U_EXPORT2
uset_complement(USet * set)159 uset_complement(USet* set) {
160 ((UnicodeSet*) set)->UnicodeSet::complement();
161 }
162
163 U_CAPI void U_EXPORT2
uset_complementRange(USet * set,UChar32 start,UChar32 end)164 uset_complementRange(USet *set, UChar32 start, UChar32 end) {
165 ((UnicodeSet*) set)->UnicodeSet::complement(start, end);
166 }
167
168 U_CAPI void U_EXPORT2
uset_complementString(USet * set,const char16_t * str,int32_t length)169 uset_complementString(USet *set, const char16_t *str, int32_t length) {
170 UnicodeString s(length==-1, str, length);
171 ((UnicodeSet*) set)->UnicodeSet::complement(s);
172 }
173
174 U_CAPI void U_EXPORT2
uset_complementAllCodePoints(USet * set,const char16_t * str,int32_t length)175 uset_complementAllCodePoints(USet *set, const char16_t *str, int32_t length) {
176 UnicodeString s(length==-1, str, length);
177 ((UnicodeSet*) set)->UnicodeSet::complementAll(s);
178 }
179
180 U_CAPI void U_EXPORT2
uset_complementAll(USet * set,const USet * complement)181 uset_complementAll(USet* set, const USet* complement) {
182 ((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement);
183 }
184
185 U_CAPI void U_EXPORT2
uset_clear(USet * set)186 uset_clear(USet* set) {
187 ((UnicodeSet*) set)->UnicodeSet::clear();
188 }
189
190 U_CAPI void U_EXPORT2
uset_removeAllStrings(USet * set)191 uset_removeAllStrings(USet* set) {
192 ((UnicodeSet*) set)->UnicodeSet::removeAllStrings();
193 }
194
195 U_CAPI UBool U_EXPORT2
uset_isEmpty(const USet * set)196 uset_isEmpty(const USet* set) {
197 return ((const UnicodeSet*) set)->UnicodeSet::isEmpty();
198 }
199
200 U_CAPI UBool U_EXPORT2
uset_hasStrings(const USet * set)201 uset_hasStrings(const USet* set) {
202 return ((const UnicodeSet*) set)->UnicodeSet::hasStrings();
203 }
204
205 U_CAPI UBool U_EXPORT2
uset_contains(const USet * set,UChar32 c)206 uset_contains(const USet* set, UChar32 c) {
207 return ((const UnicodeSet*) set)->UnicodeSet::contains(c);
208 }
209
210 U_CAPI UBool U_EXPORT2
uset_containsRange(const USet * set,UChar32 start,UChar32 end)211 uset_containsRange(const USet* set, UChar32 start, UChar32 end) {
212 return ((const UnicodeSet*) set)->UnicodeSet::contains(start, end);
213 }
214
215 U_CAPI UBool U_EXPORT2
uset_containsString(const USet * set,const char16_t * str,int32_t strLen)216 uset_containsString(const USet* set, const char16_t* str, int32_t strLen) {
217 UnicodeString s(strLen==-1, str, strLen);
218 return ((const UnicodeSet*) set)->UnicodeSet::contains(s);
219 }
220
221 U_CAPI UBool U_EXPORT2
uset_containsAll(const USet * set1,const USet * set2)222 uset_containsAll(const USet* set1, const USet* set2) {
223 return ((const UnicodeSet*) set1)->UnicodeSet::containsAll(* (const UnicodeSet*) set2);
224 }
225
226 U_CAPI UBool U_EXPORT2
uset_containsAllCodePoints(const USet * set,const char16_t * str,int32_t strLen)227 uset_containsAllCodePoints(const USet* set, const char16_t *str, int32_t strLen) {
228 // Create a string alias, since nothing is being added to the set.
229 UnicodeString s(strLen==-1, str, strLen);
230 return ((const UnicodeSet*) set)->UnicodeSet::containsAll(s);
231 }
232
233 U_CAPI UBool U_EXPORT2
uset_containsNone(const USet * set1,const USet * set2)234 uset_containsNone(const USet* set1, const USet* set2) {
235 return ((const UnicodeSet*) set1)->UnicodeSet::containsNone(* (const UnicodeSet*) set2);
236 }
237
238 U_CAPI UBool U_EXPORT2
uset_containsSome(const USet * set1,const USet * set2)239 uset_containsSome(const USet* set1, const USet* set2) {
240 return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2);
241 }
242
243 U_CAPI int32_t U_EXPORT2
uset_span(const USet * set,const char16_t * s,int32_t length,USetSpanCondition spanCondition)244 uset_span(const USet *set, const char16_t *s, int32_t length, USetSpanCondition spanCondition) {
245 return ((UnicodeSet*) set)->UnicodeSet::span(s, length, spanCondition);
246 }
247
248 U_CAPI int32_t U_EXPORT2
uset_spanBack(const USet * set,const char16_t * s,int32_t length,USetSpanCondition spanCondition)249 uset_spanBack(const USet *set, const char16_t *s, int32_t length, USetSpanCondition spanCondition) {
250 return ((UnicodeSet*) set)->UnicodeSet::spanBack(s, length, spanCondition);
251 }
252
253 U_CAPI int32_t U_EXPORT2
uset_spanUTF8(const USet * set,const char * s,int32_t length,USetSpanCondition spanCondition)254 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
255 return ((UnicodeSet*) set)->UnicodeSet::spanUTF8(s, length, spanCondition);
256 }
257
258 U_CAPI int32_t U_EXPORT2
uset_spanBackUTF8(const USet * set,const char * s,int32_t length,USetSpanCondition spanCondition)259 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
260 return ((UnicodeSet*) set)->UnicodeSet::spanBackUTF8(s, length, spanCondition);
261 }
262
263 U_CAPI UBool U_EXPORT2
uset_equals(const USet * set1,const USet * set2)264 uset_equals(const USet* set1, const USet* set2) {
265 return *(const UnicodeSet*)set1 == *(const UnicodeSet*)set2;
266 }
267
268 U_CAPI int32_t U_EXPORT2
uset_indexOf(const USet * set,UChar32 c)269 uset_indexOf(const USet* set, UChar32 c) {
270 return ((UnicodeSet*) set)->UnicodeSet::indexOf(c);
271 }
272
273 U_CAPI UChar32 U_EXPORT2
uset_charAt(const USet * set,int32_t index)274 uset_charAt(const USet* set, int32_t index) {
275 return ((UnicodeSet*) set)->UnicodeSet::charAt(index);
276 }
277
278 U_CAPI int32_t U_EXPORT2
uset_size(const USet * set)279 uset_size(const USet* set) {
280 return ((const UnicodeSet*) set)->UnicodeSet::size();
281 }
282
283 U_NAMESPACE_BEGIN
284 /**
285 * This class only exists to provide access to the UnicodeSet private
286 * USet support API. Declaring a class a friend is more portable than
287 * trying to declare extern "C" functions as friends.
288 */
289 class USetAccess /* not : public UObject because all methods are static */ {
290 public:
291 /* Try to have the compiler inline these*/
getStringCount(const UnicodeSet & set)292 inline static int32_t getStringCount(const UnicodeSet& set) {
293 return set.stringsSize();
294 }
getString(const UnicodeSet & set,int32_t i)295 inline static const UnicodeString* getString(const UnicodeSet& set,
296 int32_t i) {
297 return set.getString(i);
298 }
299 private:
300 /* do not instantiate*/
301 USetAccess();
302 };
303 U_NAMESPACE_END
304
305 U_CAPI int32_t U_EXPORT2
uset_getRangeCount(const USet * set)306 uset_getRangeCount(const USet *set) {
307 return ((const UnicodeSet *)set)->UnicodeSet::getRangeCount();
308 }
309
310 U_CAPI int32_t U_EXPORT2
uset_getStringCount(const USet * uset)311 uset_getStringCount(const USet *uset) {
312 const UnicodeSet &set = *(const UnicodeSet *)uset;
313 return USetAccess::getStringCount(set);
314 }
315
316 U_CAPI int32_t U_EXPORT2
uset_getItemCount(const USet * uset)317 uset_getItemCount(const USet* uset) {
318 const UnicodeSet& set = *(const UnicodeSet*)uset;
319 return set.getRangeCount() + USetAccess::getStringCount(set);
320 }
321
322 U_CAPI const UChar* U_EXPORT2
uset_getString(const USet * uset,int32_t index,int32_t * pLength)323 uset_getString(const USet *uset, int32_t index, int32_t *pLength) {
324 if (pLength == nullptr) { return nullptr; }
325 const UnicodeSet &set = *(const UnicodeSet *)uset;
326 int32_t count = USetAccess::getStringCount(set);
327 if (index < 0 || count <= index) {
328 *pLength = 0;
329 return nullptr;
330 }
331 const UnicodeString *s = USetAccess::getString(set, index);
332 *pLength = s->length();
333 return toUCharPtr(s->getBuffer());
334 }
335
336 U_CAPI int32_t U_EXPORT2
uset_getItem(const USet * uset,int32_t itemIndex,UChar32 * start,UChar32 * end,char16_t * str,int32_t strCapacity,UErrorCode * ec)337 uset_getItem(const USet* uset, int32_t itemIndex,
338 UChar32* start, UChar32* end,
339 char16_t* str, int32_t strCapacity,
340 UErrorCode* ec) {
341 if (U_FAILURE(*ec)) return 0;
342 const UnicodeSet& set = *(const UnicodeSet*)uset;
343 int32_t rangeCount;
344
345 if (itemIndex < 0) {
346 *ec = U_ILLEGAL_ARGUMENT_ERROR;
347 return -1;
348 } else if (itemIndex < (rangeCount = set.getRangeCount())) {
349 *start = set.getRangeStart(itemIndex);
350 *end = set.getRangeEnd(itemIndex);
351 return 0;
352 } else {
353 itemIndex -= rangeCount;
354 if (itemIndex < USetAccess::getStringCount(set)) {
355 const UnicodeString* s = USetAccess::getString(set, itemIndex);
356 return s->extract(str, strCapacity, *ec);
357 } else {
358 *ec = U_INDEX_OUTOFBOUNDS_ERROR;
359 return -1;
360 }
361 }
362 }
363
364 //U_CAPI UBool U_EXPORT2
365 //uset_getRange(const USet* set, int32_t rangeIndex,
366 // UChar32* pStart, UChar32* pEnd) {
367 // if ((uint32_t) rangeIndex >= (uint32_t) uset_getRangeCount(set)) {
368 // return false;
369 // }
370 // const UnicodeSet* us = (const UnicodeSet*) set;
371 // *pStart = us->getRangeStart(rangeIndex);
372 // *pEnd = us->getRangeEnd(rangeIndex);
373 // return true;
374 //}
375
376 /*
377 * Serialize a USet into 16-bit units.
378 * Store BMP code points as themselves with one 16-bit unit each.
379 *
380 * Important: the code points in the array are in ascending order,
381 * therefore all BMP code points precede all supplementary code points.
382 *
383 * Store each supplementary code point in 2 16-bit units,
384 * simply with higher-then-lower 16-bit halves.
385 *
386 * Precede the entire list with the length.
387 * If there are supplementary code points, then set bit 15 in the length
388 * and add the bmpLength between it and the array.
389 *
390 * In other words:
391 * - all BMP: (length=bmpLength) BMP, .., BMP
392 * - some supplementary: (length|0x8000) (bmpLength<length) BMP, .., BMP, supp-high, supp-low, ..
393 */
394 U_CAPI int32_t U_EXPORT2
uset_serialize(const USet * set,uint16_t * dest,int32_t destCapacity,UErrorCode * ec)395 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* ec) {
396 if (ec==nullptr || U_FAILURE(*ec)) {
397 return 0;
398 }
399
400 return ((const UnicodeSet*) set)->UnicodeSet::serialize(dest, destCapacity,* ec);
401 }
402
403 U_CAPI UBool U_EXPORT2
uset_getSerializedSet(USerializedSet * fillSet,const uint16_t * src,int32_t srcLength)404 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength) {
405 int32_t length;
406
407 if(fillSet==nullptr) {
408 return false;
409 }
410 if(src==nullptr || srcLength<=0) {
411 fillSet->length=fillSet->bmpLength=0;
412 return false;
413 }
414
415 length=*src++;
416 if(length&0x8000) {
417 /* there are supplementary values */
418 length&=0x7fff;
419 if(srcLength<(2+length)) {
420 fillSet->length=fillSet->bmpLength=0;
421 return false;
422 }
423 fillSet->bmpLength=*src++;
424 } else {
425 /* only BMP values */
426 if(srcLength<(1+length)) {
427 fillSet->length=fillSet->bmpLength=0;
428 return false;
429 }
430 fillSet->bmpLength=length;
431 }
432 fillSet->array=src;
433 fillSet->length=length;
434 return true;
435 }
436
437 U_CAPI void U_EXPORT2
uset_setSerializedToOne(USerializedSet * fillSet,UChar32 c)438 uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c) {
439 if(fillSet==nullptr || (uint32_t)c>0x10ffff) {
440 return;
441 }
442
443 fillSet->array=fillSet->staticArray;
444 if(c<0xffff) {
445 fillSet->bmpLength=fillSet->length=2;
446 fillSet->staticArray[0]=(uint16_t)c;
447 fillSet->staticArray[1]=(uint16_t)c+1;
448 } else if(c==0xffff) {
449 fillSet->bmpLength=1;
450 fillSet->length=3;
451 fillSet->staticArray[0]=0xffff;
452 fillSet->staticArray[1]=1;
453 fillSet->staticArray[2]=0;
454 } else if(c<0x10ffff) {
455 fillSet->bmpLength=0;
456 fillSet->length=4;
457 fillSet->staticArray[0]=(uint16_t)(c>>16);
458 fillSet->staticArray[1]=(uint16_t)c;
459 ++c;
460 fillSet->staticArray[2]=(uint16_t)(c>>16);
461 fillSet->staticArray[3]=(uint16_t)c;
462 } else /* c==0x10ffff */ {
463 fillSet->bmpLength=0;
464 fillSet->length=2;
465 fillSet->staticArray[0]=0x10;
466 fillSet->staticArray[1]=0xffff;
467 }
468 }
469
470 U_CAPI UBool U_EXPORT2
uset_serializedContains(const USerializedSet * set,UChar32 c)471 uset_serializedContains(const USerializedSet* set, UChar32 c) {
472 const uint16_t* array;
473
474 if(set==nullptr || (uint32_t)c>0x10ffff) {
475 return false;
476 }
477
478 array=set->array;
479 if(c<=0xffff) {
480 /* find c in the BMP part */
481 int32_t lo = 0;
482 int32_t hi = set->bmpLength-1;
483 if (c < array[0]) {
484 hi = 0;
485 } else if (c < array[hi]) {
486 for(;;) {
487 int32_t i = (lo + hi) >> 1;
488 if (i == lo) {
489 break; // Done!
490 } else if (c < array[i]) {
491 hi = i;
492 } else {
493 lo = i;
494 }
495 }
496 } else {
497 hi += 1;
498 }
499 return hi&1;
500 } else {
501 /* find c in the supplementary part */
502 uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c;
503 int32_t base = set->bmpLength;
504 int32_t lo = 0;
505 int32_t hi = set->length - 2 - base;
506 if (high < array[base] || (high==array[base] && low<array[base+1])) {
507 hi = 0;
508 } else if (high < array[base+hi] || (high==array[base+hi] && low<array[base+hi+1])) {
509 for (;;) {
510 int32_t i = ((lo + hi) >> 1) & ~1; // Guarantee even result
511 int32_t iabs = i + base;
512 if (i == lo) {
513 break; // Done!
514 } else if (high < array[iabs] || (high==array[iabs] && low<array[iabs+1])) {
515 hi = i;
516 } else {
517 lo = i;
518 }
519 }
520 } else {
521 hi += 2;
522 }
523 /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
524 return ((hi+(base<<1))&2)!=0;
525 }
526 }
527
528 U_CAPI int32_t U_EXPORT2
uset_getSerializedRangeCount(const USerializedSet * set)529 uset_getSerializedRangeCount(const USerializedSet* set) {
530 if(set==nullptr) {
531 return 0;
532 }
533
534 return (set->bmpLength+(set->length-set->bmpLength)/2+1)/2;
535 }
536
537 U_CAPI UBool U_EXPORT2
uset_getSerializedRange(const USerializedSet * set,int32_t rangeIndex,UChar32 * pStart,UChar32 * pEnd)538 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
539 UChar32* pStart, UChar32* pEnd) {
540 const uint16_t* array;
541 int32_t bmpLength, length;
542
543 if(set==nullptr || rangeIndex<0 || pStart==nullptr || pEnd==nullptr) {
544 return false;
545 }
546
547 array=set->array;
548 length=set->length;
549 bmpLength=set->bmpLength;
550
551 rangeIndex*=2; /* address start/limit pairs */
552 if(rangeIndex<bmpLength) {
553 *pStart=array[rangeIndex++];
554 if(rangeIndex<bmpLength) {
555 *pEnd=array[rangeIndex]-1;
556 } else if(rangeIndex<length) {
557 *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
558 } else {
559 *pEnd=0x10ffff;
560 }
561 return true;
562 } else {
563 rangeIndex-=bmpLength;
564 rangeIndex*=2; /* address pairs of pairs of units */
565 length-=bmpLength;
566 if(rangeIndex<length) {
567 array+=bmpLength;
568 *pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
569 rangeIndex+=2;
570 if(rangeIndex<length) {
571 *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
572 } else {
573 *pEnd=0x10ffff;
574 }
575 return true;
576 } else {
577 return false;
578 }
579 }
580 }
581
582 // TODO The old, internal uset.c had an efficient uset_containsOne function.
583 // Returned the one and only code point, or else -1 or something.
584 // Consider adding such a function to both C and C++ UnicodeSet/uset.
585 // See tools/gennorm/store.c for usage, now usetContainsOne there.
586
587 // TODO Investigate incorporating this code into UnicodeSet to improve
588 // efficiency.
589 // ---
590 // #define USET_GROW_DELTA 20
591 //
592 // static int32_t
593 // findChar(const UChar32* array, int32_t length, UChar32 c) {
594 // int32_t i;
595 //
596 // /* check the last range limit first for more efficient appending */
597 // if(length>0) {
598 // if(c>=array[length-1]) {
599 // return length;
600 // }
601 //
602 // /* do not check the last range limit again in the loop below */
603 // --length;
604 // }
605 //
606 // for(i=0; i<length && c>=array[i]; ++i) {}
607 // return i;
608 // }
609 //
610 // static UBool
611 // addRemove(USet* set, UChar32 c, int32_t doRemove) {
612 // int32_t i, length, more;
613 //
614 // if(set==nullptr || (uint32_t)c>0x10ffff) {
615 // return false;
616 // }
617 //
618 // length=set->length;
619 // i=findChar(set->array, length, c);
620 // if((i&1)^doRemove) {
621 // /* c is already in the set */
622 // return true;
623 // }
624 //
625 // /* how many more array items do we need? */
626 // if(i<length && (c+1)==set->array[i]) {
627 // /* c is just before the following range, extend that in-place by one */
628 // set->array[i]=c;
629 // if(i>0) {
630 // --i;
631 // if(c==set->array[i]) {
632 // /* the previous range collapsed, remove it */
633 // set->length=length-=2;
634 // if(i<length) {
635 // uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
636 // }
637 // }
638 // }
639 // return true;
640 // } else if(i>0 && c==set->array[i-1]) {
641 // /* c is just after the previous range, extend that in-place by one */
642 // if(++c<=0x10ffff) {
643 // set->array[i-1]=c;
644 // if(i<length && c==set->array[i]) {
645 // /* the following range collapsed, remove it */
646 // --i;
647 // set->length=length-=2;
648 // if(i<length) {
649 // uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
650 // }
651 // }
652 // } else {
653 // /* extend the previous range (had limit 0x10ffff) to the end of Unicode */
654 // set->length=i-1;
655 // }
656 // return true;
657 // } else if(i==length && c==0x10ffff) {
658 // /* insert one range limit c */
659 // more=1;
660 // } else {
661 // /* insert two range limits c, c+1 */
662 // more=2;
663 // }
664 //
665 // /* insert <more> range limits */
666 // if(length+more>set->capacity) {
667 // /* reallocate */
668 // int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA;
669 // UChar32* newArray=(UChar32* )uprv_malloc(newCapacity*4);
670 // if(newArray==nullptr) {
671 // return false;
672 // }
673 // set->capacity=newCapacity;
674 // uprv_memcpy(newArray, set->array, length*4);
675 //
676 // if(set->array!=set->staticBuffer) {
677 // uprv_free(set->array);
678 // }
679 // set->array=newArray;
680 // }
681 //
682 // if(i<length) {
683 // uprv_memmove(set->array+i+more, set->array+i, (length-i)*4);
684 // }
685 // set->array[i]=c;
686 // if(more==2) {
687 // set->array[i+1]=c+1;
688 // }
689 // set->length+=more;
690 //
691 // return true;
692 // }
693 //
694 // U_CAPI UBool U_EXPORT2
695 // uset_add(USet* set, UChar32 c) {
696 // return addRemove(set, c, 0);
697 // }
698 //
699 // U_CAPI void U_EXPORT2
700 // uset_remove(USet* set, UChar32 c) {
701 // addRemove(set, c, 1);
702 // }
703