• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 *
4 *   Copyright (C) 2007, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *   file name:  bmpset.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2007jan29
14 *   created by: Markus W. Scherer
15 */
16 
17 #include "unicode/utypes.h"
18 #include "unicode/uniset.h"
19 #include "cmemory.h"
20 #include "bmpset.h"
21 
22 U_NAMESPACE_BEGIN
23 
BMPSet(const int32_t * parentList,int32_t parentListLength)24 BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
25         list(parentList), listLength(parentListLength) {
26     uprv_memset(asciiBytes, 0, sizeof(asciiBytes));
27     uprv_memset(table7FF, 0, sizeof(table7FF));
28     uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));
29 
30     /*
31      * Set the list indexes for binary searches for
32      * U+0800, U+1000, U+2000, .., U+F000, U+10000.
33      * U+0800 is the first 3-byte-UTF-8 code point. Lower code points are
34      * looked up in the bit tables.
35      * The last pair of indexes is for finding supplementary code points.
36      */
37     list4kStarts[0]=findCodePoint(0x800, 0, listLength-1);
38     int32_t i;
39     for(i=1; i<=0x10; ++i) {
40         list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
41     }
42     list4kStarts[0x11]=listLength-1;
43 
44     initBits();
45     overrideIllegal();
46 }
47 
BMPSet(const BMPSet & otherBMPSet,const int32_t * newParentList,int32_t newParentListLength)48 BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
49         list(newParentList), listLength(newParentListLength) {
50     uprv_memcpy(asciiBytes, otherBMPSet.asciiBytes, sizeof(asciiBytes));
51     uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
52     uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
53     uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
54 }
55 
~BMPSet()56 BMPSet::~BMPSet() {
57 }
58 
59 /*
60  * Set bits in a bit rectangle in "vertical" bit organization.
61  * start<limit<=0x800
62  */
set32x64Bits(uint32_t table[64],int32_t start,int32_t limit)63 static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
64     int32_t lead=start>>6;
65     int32_t trail=start&0x3f;
66 
67     // Set one bit indicating an all-one block.
68     uint32_t bits=(uint32_t)1<<lead;
69     if((start+1)==limit) {  // Single-character shortcut.
70         table[trail]|=bits;
71         return;
72     }
73 
74     int32_t limitLead=limit>>6;
75     int32_t limitTrail=limit&0x3f;
76 
77     if(lead==limitLead) {
78         // Partial vertical bit column.
79         while(trail<limitTrail) {
80             table[trail++]|=bits;
81         }
82     } else {
83         // Partial vertical bit column,
84         // followed by a bit rectangle,
85         // followed by another partial vertical bit column.
86         if(trail>0) {
87             do {
88                 table[trail++]|=bits;
89             } while(trail<64);
90             ++lead;
91         }
92         if(lead<limitLead) {
93             bits=~((1<<lead)-1);
94             if(limitLead<0x20) {
95                 bits&=(1<<limitLead)-1;
96             }
97             for(trail=0; trail<64; ++trail) {
98                 table[trail]|=bits;
99             }
100         }
101         bits=1<<limitLead;
102         for(trail=0; trail<limitTrail; ++trail) {
103             table[trail]|=bits;
104         }
105     }
106 }
107 
initBits()108 void BMPSet::initBits() {
109     UChar32 start, limit;
110     int32_t listIndex=0;
111 
112     // Set asciiBytes[].
113     do {
114         start=list[listIndex++];
115         if(listIndex<listLength) {
116             limit=list[listIndex++];
117         } else {
118             limit=0x110000;
119         }
120         if(start>=0x80) {
121             break;
122         }
123         do {
124             asciiBytes[start++]=1;
125         } while(start<limit && start<0x80);
126     } while(limit<=0x80);
127 
128     // Set table7FF[].
129     while(start<0x800) {
130         set32x64Bits(table7FF, start, limit<=0x800 ? limit : 0x800);
131         if(limit>0x800) {
132             start=0x800;
133             break;
134         }
135 
136         start=list[listIndex++];
137         if(listIndex<listLength) {
138             limit=list[listIndex++];
139         } else {
140             limit=0x110000;
141         }
142     }
143 
144     // Set bmpBlockBits[].
145     int32_t minStart=0x800;
146     while(start<0x10000) {
147         if(limit>0x10000) {
148             limit=0x10000;
149         }
150 
151         if(start<minStart) {
152             start=minStart;
153         }
154         if(start<limit) {  // Else: Another range entirely in a known mixed-value block.
155             if(start&0x3f) {
156                 // Mixed-value block of 64 code points.
157                 start>>=6;
158                 bmpBlockBits[start&0x3f]|=0x10001<<(start>>6);
159                 start=(start+1)<<6;  // Round up to the next block boundary.
160                 minStart=start;      // Ignore further ranges in this block.
161             }
162             if(start<limit) {
163                 if(start<(limit&~0x3f)) {
164                     // Multiple all-ones blocks of 64 code points each.
165                     set32x64Bits(bmpBlockBits, start>>6, limit>>6);
166                 }
167 
168                 if(limit&0x3f) {
169                     // Mixed-value block of 64 code points.
170                     limit>>=6;
171                     bmpBlockBits[limit&0x3f]|=0x10001<<(limit>>6);
172                     limit=(limit+1)<<6;  // Round up to the next block boundary.
173                     minStart=limit;      // Ignore further ranges in this block.
174                 }
175             }
176         }
177 
178         if(limit==0x10000) {
179             break;
180         }
181 
182         start=list[listIndex++];
183         if(listIndex<listLength) {
184             limit=list[listIndex++];
185         } else {
186             limit=0x110000;
187         }
188     }
189 }
190 
191 /*
192  * Override some bits and bytes to the result of contains(FFFD)
193  * for faster validity checking at runtime.
194  * No need to set 0 values where they were reset to 0 in the constructor
195  * and not modified by initBits().
196  * (asciiBytes[] trail bytes, table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
197  * Need to set 0 values for surrogates D800..DFFF.
198  */
overrideIllegal()199 void BMPSet::overrideIllegal() {
200     uint32_t bits, mask;
201     int32_t i;
202 
203     if(containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10])) {
204         // contains(FFFD)==TRUE
205         for(i=0x80; i<0xc0; ++i) {
206             asciiBytes[i]=1;
207         }
208 
209         bits=3;                 // Lead bytes 0xC0 and 0xC1.
210         for(i=0; i<64; ++i) {
211             table7FF[i]|=bits;
212         }
213 
214         bits=1;                 // Lead byte 0xE0.
215         for(i=0; i<32; ++i) {   // First half of 4k block.
216             bmpBlockBits[i]|=bits;
217         }
218 
219         mask=~(0x10001<<0xd);   // Lead byte 0xED.
220         bits=1<<0xd;
221         for(i=32; i<64; ++i) {  // Second half of 4k block.
222             bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
223         }
224     } else {
225         // contains(FFFD)==FALSE
226         mask=~(0x10001<<0xd);   // Lead byte 0xED.
227         for(i=32; i<64; ++i) {  // Second half of 4k block.
228             bmpBlockBits[i]&=mask;
229         }
230     }
231 }
232 
findCodePoint(UChar32 c,int32_t lo,int32_t hi) const233 int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
234     /* Examples:
235                                        findCodePoint(c)
236        set              list[]         c=0 1 3 4 7 8
237        ===              ==============   ===========
238        []               [110000]         0 0 0 0 0 0
239        [\u0000-\u0003]  [0, 4, 110000]   1 1 1 2 2 2
240        [\u0004-\u0007]  [4, 8, 110000]   0 0 0 1 1 2
241        [:Any:]          [0, 110000]      1 1 1 1 1 1
242      */
243 
244     // Return the smallest i such that c < list[i].  Assume
245     // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
246     if (c < list[lo])
247         return lo;
248     // High runner test.  c is often after the last range, so an
249     // initial check for this condition pays off.
250     if (lo >= hi || c >= list[hi-1])
251         return hi;
252     // invariant: c >= list[lo]
253     // invariant: c < list[hi]
254     for (;;) {
255         int32_t i = (lo + hi) >> 1;
256         if (i == lo) {
257             break; // Found!
258         } else if (c < list[i]) {
259             hi = i;
260         } else {
261             lo = i;
262         }
263     }
264     return hi;
265 }
266 
267 UBool
contains(UChar32 c) const268 BMPSet::contains(UChar32 c) const {
269     if((uint32_t)c<=0x7f) {
270         return (UBool)asciiBytes[c];
271     } else if((uint32_t)c<=0x7ff) {
272         return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
273     } else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
274         int lead=c>>12;
275         uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
276         if(twoBits<=1) {
277             // All 64 code points with the same bits 15..6
278             // are either in the set or not.
279             return (UBool)twoBits;
280         } else {
281             // Look up the code point in its 4k block of code points.
282             return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
283         }
284     } else if((uint32_t)c<=0x10ffff) {
285         // surrogate or supplementary code point
286         return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
287     } else {
288         // Out-of-range code points get FALSE, consistent with long-standing
289         // behavior of UnicodeSet::contains(c).
290         return FALSE;
291     }
292 }
293 
294 /*
295  * Check for sufficient length for trail unit for each surrogate pair.
296  * Handle single surrogates as surrogate code points as usual in ICU.
297  */
298 const UChar *
span(const UChar * s,const UChar * limit,USetSpanCondition spanCondition) const299 BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
300     UChar c, c2;
301 
302     if(spanCondition) {
303         // span
304         do {
305             c=*s;
306             if(c<=0x7f) {
307                 if(!asciiBytes[c]) {
308                     break;
309                 }
310             } else if(c<=0x7ff) {
311                 if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
312                     break;
313                 }
314             } else if(c<0xd800 || c>=0xe000) {
315                 int lead=c>>12;
316                 uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
317                 if(twoBits<=1) {
318                     // All 64 code points with the same bits 15..6
319                     // are either in the set or not.
320                     if(twoBits==0) {
321                         break;
322                     }
323                 } else {
324                     // Look up the code point in its 4k block of code points.
325                     if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
326                         break;
327                     }
328                 }
329             } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
330                 // surrogate code point
331                 if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
332                     break;
333                 }
334             } else {
335                 // surrogate pair
336                 if(!containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
337                     break;
338                 }
339                 ++s;
340             }
341         } while(++s<limit);
342     } else {
343         // span not
344         do {
345             c=*s;
346             if(c<=0x7f) {
347                 if(asciiBytes[c]) {
348                     break;
349                 }
350             } else if(c<=0x7ff) {
351                 if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
352                     break;
353                 }
354             } else if(c<0xd800 || c>=0xe000) {
355                 int lead=c>>12;
356                 uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
357                 if(twoBits<=1) {
358                     // All 64 code points with the same bits 15..6
359                     // are either in the set or not.
360                     if(twoBits!=0) {
361                         break;
362                     }
363                 } else {
364                     // Look up the code point in its 4k block of code points.
365                     if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
366                         break;
367                     }
368                 }
369             } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
370                 // surrogate code point
371                 if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
372                     break;
373                 }
374             } else {
375                 // surrogate pair
376                 if(containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
377                     break;
378                 }
379                 ++s;
380             }
381         } while(++s<limit);
382     }
383     return s;
384 }
385 
386 /* Symmetrical with span(). */
387 const UChar *
spanBack(const UChar * s,const UChar * limit,USetSpanCondition spanCondition) const388 BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
389     UChar c, c2;
390 
391     if(spanCondition) {
392         // span
393         for(;;) {
394             c=*(--limit);
395             if(c<=0x7f) {
396                 if(!asciiBytes[c]) {
397                     break;
398                 }
399             } else if(c<=0x7ff) {
400                 if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
401                     break;
402                 }
403             } else if(c<0xd800 || c>=0xe000) {
404                 int lead=c>>12;
405                 uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
406                 if(twoBits<=1) {
407                     // All 64 code points with the same bits 15..6
408                     // are either in the set or not.
409                     if(twoBits==0) {
410                         break;
411                     }
412                 } else {
413                     // Look up the code point in its 4k block of code points.
414                     if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
415                         break;
416                     }
417                 }
418             } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
419                 // surrogate code point
420                 if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
421                     break;
422                 }
423             } else {
424                 // surrogate pair
425                 if(!containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
426                     break;
427                 }
428                 --limit;
429             }
430             if(s==limit) {
431                 return s;
432             }
433         }
434     } else {
435         // span not
436         for(;;) {
437             c=*(--limit);
438             if(c<=0x7f) {
439                 if(asciiBytes[c]) {
440                     break;
441                 }
442             } else if(c<=0x7ff) {
443                 if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
444                     break;
445                 }
446             } else if(c<0xd800 || c>=0xe000) {
447                 int lead=c>>12;
448                 uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
449                 if(twoBits<=1) {
450                     // All 64 code points with the same bits 15..6
451                     // are either in the set or not.
452                     if(twoBits!=0) {
453                         break;
454                     }
455                 } else {
456                     // Look up the code point in its 4k block of code points.
457                     if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
458                         break;
459                     }
460                 }
461             } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
462                 // surrogate code point
463                 if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
464                     break;
465                 }
466             } else {
467                 // surrogate pair
468                 if(containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
469                     break;
470                 }
471                 --limit;
472             }
473             if(s==limit) {
474                 return s;
475             }
476         }
477     }
478     return limit+1;
479 }
480 
481 /*
482  * Precheck for sufficient trail bytes at end of string only once per span.
483  * Check validity.
484  */
485 const uint8_t *
spanUTF8(const uint8_t * s,int32_t length,USetSpanCondition spanCondition) const486 BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
487     const uint8_t *limit=s+length;
488     uint8_t b=*s;
489     if((int8_t)b>=0) {
490         // Initial all-ASCII span.
491         if(spanCondition) {
492             do {
493                 if(!asciiBytes[b] || ++s==limit) {
494                     return s;
495                 }
496                 b=*s;
497             } while((int8_t)b>=0);
498         } else {
499             do {
500                 if(asciiBytes[b] || ++s==limit) {
501                     return s;
502                 }
503                 b=*s;
504             } while((int8_t)b>=0);
505         }
506         length=(int32_t)(limit-s);
507     }
508 
509     if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
510         spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
511     }
512 
513     const uint8_t *limit0=limit;
514 
515     /*
516      * Make sure that the last 1/2/3/4-byte sequence before limit is complete
517      * or runs into a lead byte.
518      * In the span loop compare s with limit only once
519      * per multi-byte character.
520      *
521      * Give a trailing illegal sequence the same value as the result of contains(FFFD),
522      * including it if that is part of the span, otherwise set limit0 to before
523      * the truncated sequence.
524      */
525     b=*(limit-1);
526     if((int8_t)b<0) {
527         // b>=0x80: lead or trail byte
528         if(b<0xc0) {
529             // single trail byte, check for preceding 3- or 4-byte lead byte
530             if(length>=2 && (b=*(limit-2))>=0xe0) {
531                 limit-=2;
532                 if(asciiBytes[0x80]!=spanCondition) {
533                     limit0=limit;
534                 }
535             } else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
536                 // 4-byte lead byte with only two trail bytes
537                 limit-=3;
538                 if(asciiBytes[0x80]!=spanCondition) {
539                     limit0=limit;
540                 }
541             }
542         } else {
543             // lead byte with no trail bytes
544             --limit;
545             if(asciiBytes[0x80]!=spanCondition) {
546                 limit0=limit;
547             }
548         }
549     }
550 
551     uint8_t t1, t2, t3;
552 
553     while(s<limit) {
554         b=*s;
555         if(b<0xc0) {
556             // ASCII; or trail bytes with the result of contains(FFFD).
557             if(spanCondition) {
558                 do {
559                     if(!asciiBytes[b]) {
560                         return s;
561                     } else if(++s==limit) {
562                         return limit0;
563                     }
564                     b=*s;
565                 } while(b<0xc0);
566             } else {
567                 do {
568                     if(asciiBytes[b]) {
569                         return s;
570                     } else if(++s==limit) {
571                         return limit0;
572                     }
573                     b=*s;
574                 } while(b<0xc0);
575             }
576         }
577         ++s;  // Advance past the lead byte.
578         if(b>=0xe0) {
579             if(b<0xf0) {
580                 if( /* handle U+0000..U+FFFF inline */
581                     (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
582                     (t2=(uint8_t)(s[1]-0x80)) <= 0x3f
583                 ) {
584                     b&=0xf;
585                     uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
586                     if(twoBits<=1) {
587                         // All 64 code points with this lead byte and middle trail byte
588                         // are either in the set or not.
589                         if(twoBits!=(uint32_t)spanCondition) {
590                             return s-1;
591                         }
592                     } else {
593                         // Look up the code point in its 4k block of code points.
594                         UChar32 c=(b<<12)|(t1<<6)|t2;
595                         if(containsSlow(c, list4kStarts[b], list4kStarts[b+1]) != spanCondition) {
596                             return s-1;
597                         }
598                     }
599                     s+=2;
600                     continue;
601                 }
602             } else if( /* handle U+10000..U+10FFFF inline */
603                 (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
604                 (t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
605                 (t3=(uint8_t)(s[2]-0x80)) <= 0x3f
606             ) {
607                 // Give an illegal sequence the same value as the result of contains(FFFD).
608                 UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
609                 if( (   (0x10000<=c && c<=0x10ffff) ?
610                             containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
611                             asciiBytes[0x80]
612                     ) != spanCondition
613                 ) {
614                     return s-1;
615                 }
616                 s+=3;
617                 continue;
618             }
619         } else /* 0xc0<=b<0xe0 */ {
620             if( /* handle U+0000..U+07FF inline */
621                 (t1=(uint8_t)(*s-0x80)) <= 0x3f
622             ) {
623                 if(((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
624                     return s-1;
625                 }
626                 ++s;
627                 continue;
628             }
629         }
630 
631         // Give an illegal sequence the same value as the result of contains(FFFD).
632         // Handle each byte of an illegal sequence separately to simplify the code;
633         // no need to optimize error handling.
634         if(asciiBytes[0x80]!=spanCondition) {
635             return s-1;
636         }
637     }
638 
639     return limit0;
640 }
641 
642 /*
643  * While going backwards through UTF-8 optimize only for ASCII.
644  * Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not
645  * possible to tell from the last byte in a multi-byte sequence how many
646  * preceding bytes there should be. Therefore, going backwards through UTF-8
647  * is much harder than going forward.
648  */
649 int32_t
spanBackUTF8(const uint8_t * s,int32_t length,USetSpanCondition spanCondition) const650 BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
651     if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
652         spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
653     }
654 
655     uint8_t b;
656 
657     do {
658         b=s[--length];
659         if((int8_t)b>=0) {
660             // ASCII sub-span
661             if(spanCondition) {
662                 do {
663                     if(!asciiBytes[b]) {
664                         return length+1;
665                     } else if(length==0) {
666                         return 0;
667                     }
668                     b=s[--length];
669                 } while((int8_t)b>=0);
670             } else {
671                 do {
672                     if(asciiBytes[b]) {
673                         return length+1;
674                     } else if(length==0) {
675                         return 0;
676                     }
677                     b=s[--length];
678                 } while((int8_t)b>=0);
679             }
680         }
681 
682         int32_t prev=length;
683         UChar32 c;
684         if(b<0xc0) {
685             // trail byte: collect a multi-byte character
686             c=utf8_prevCharSafeBody(s, 0, &length, b, -1);
687             if(c<0) {
688                 c=0xfffd;
689             }
690         } else {
691             // lead byte in last-trail position
692             c=0xfffd;
693         }
694         // c is a valid code point, not ASCII, not a surrogate
695         if(c<=0x7ff) {
696             if(((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
697                 return prev+1;
698             }
699         } else if(c<=0xffff) {
700             int lead=c>>12;
701             uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
702             if(twoBits<=1) {
703                 // All 64 code points with the same bits 15..6
704                 // are either in the set or not.
705                 if(twoBits!=(uint32_t)spanCondition) {
706                     return prev+1;
707                 }
708             } else {
709                 // Look up the code point in its 4k block of code points.
710                 if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]) != spanCondition) {
711                     return prev+1;
712                 }
713             }
714         } else {
715             if(containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) != spanCondition) {
716                 return prev+1;
717             }
718         }
719     } while(length>0);
720     return 0;
721 }
722 
723 U_NAMESPACE_END
724