unisetspan.cpp - OpenGrok cross reference for /external/icu/icu4c/source/common/unisetspan.cpp

Lines Matching +full:string +full:- +full:length
6 *   Copyright (C) 2007-2012, International Business Machines
11 *   encoding:   UTF-8
32  * a code point or a string.
46  * max string length and U16_LENGTH/U8_LENGTH to account for
57 class OffsetList {  // Only ever stack-allocated, does not need to inherit UMemory.
59     OffsetList() : list(staticList), capacity(0), length(0), start(0) {}  in OffsetList()
83         start=length=0;  in clear()
87         return (UBool)(length==0);  in isEmpty()
98             i-=capacity;  in shift()
102             --length;  in shift()
112             i-=capacity;  in addOffset()
115         ++length;  in addOffset()
122             i-=capacity;  in containsOffset()
127     // Find the lowest stored offset from a non-empty list, remove it,
131         // Look for the next offset in list[start+1..capacity-1].  in popMinimum()
136                 --length;  in popMinimum()
137                 result=i-start;  in popMinimum()
146         result=capacity-start;  in popMinimum()
152         --length;  in popMinimum()
160     int32_t length;  member in OffsetList
166 // Get the number of UTF-8 bytes for a UTF-16 (sub)string.
168 getUTF8Length(const char16_t *s, int32_t length) {  in getUTF8Length()  argument
171     u_strToUTF8(nullptr, 0, &length8, s, length, &errorCode);  in getUTF8Length()
175         // The string contains an unpaired surrogate.  in getUTF8Length()
176         // Ignore this string.  in getUTF8Length()
181 // Append the UTF-8 version of the string to t and return the appended UTF-8 length.
183 appendUTF8(const char16_t *s, int32_t length, uint8_t *t, int32_t capacity) {  in appendUTF8()  argument
186     u_strToUTF8((char *)t, capacity, &length8, s, length, &errorCode);  in appendUTF8()
190         // The string contains an unpaired surrogate.  in appendUTF8()
191         // Ignore this string.  in appendUTF8()
220     // If any string is relevant, then all strings need to be used for  in UnicodeSetStringSpan()
223     //   and do not store UTF-8 strings if !thisRelevant and CONTAINED.  in UnicodeSetStringSpan()
224     //   (Only store irrelevant UTF-8 strings for LONGEST_MATCH where they are relevant after all.)  in UnicodeSetStringSpan()
225     // Also count the lengths of the UTF-8 versions of the strings for memory allocation.  in UnicodeSetStringSpan()
231         const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);  in UnicodeSetStringSpan()  local
232         const char16_t *s16=string.getBuffer();  in UnicodeSetStringSpan()
233         int32_t length16=string.length();  in UnicodeSetStringSpan()
235             continue;  // skip the empty string  in UnicodeSetStringSpan()
239         if(spanLength<length16) {  // Relevant string.  in UnicodeSetStringSpan()
273         // UTF-8 lengths, 4 sets of span lengths, UTF-8 strings.  in UnicodeSetStringSpan()
278             // UTF-8 lengths and UTF-8 strings.  in UnicodeSetStringSpan()
310     // Set the meta data and pSpanNotSet and write the UTF-8 strings.  in UnicodeSetStringSpan()
311     int32_t utf8Count=0;  // Count UTF-8 bytes written so far.  in UnicodeSetStringSpan()
314         const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);  in UnicodeSetStringSpan()  local
315         const char16_t *s16=string.getBuffer();  in UnicodeSetStringSpan()
316         int32_t length16=string.length();  in UnicodeSetStringSpan()
318         if(spanLength<length16 && length16>0) {  // Relevant string.  in UnicodeSetStringSpan()
325                         spanLength=length16-spanSet.spanBack(s16, length16, USET_SPAN_CONTAINED);  in UnicodeSetStringSpan()
334                 int32_t length8=appendUTF8(s16, length16, s8, utf8Length-utf8Count);  in UnicodeSetStringSpan()
336                 if(length8==0) {  // Irrelevant for UTF-8 because not representable in UTF-8.  in UnicodeSetStringSpan()
338                 } else {  // Relevant for UTF-8.  in UnicodeSetStringSpan()
345 …           spanLength=length8-spanSet.spanBackUTF8((const char *)s8, length8, USET_SPAN_CONTAINED);  in UnicodeSetStringSpan()
354                 // Add string start and end code points to the spanNotSet so that  in UnicodeSetStringSpan()
355                 // a span(while not contained) stops before any string.  in UnicodeSetStringSpan()
368         } else {  // Irrelevant string. (Also the empty string.)  in UnicodeSetStringSpan()
372                     int32_t length8=appendUTF8(s16, length16, s8, utf8Length-utf8Count);  in UnicodeSetStringSpan()
391         pSpanNotSet->freeze();  in UnicodeSetStringSpan()
406         pSpanNotSet=otherStringSpan.pSpanNotSet->clone();  in UnicodeSetStringSpan()
410     // UTF-8 lengths, 4 sets of span lengths, UTF-8 strings.  in UnicodeSetStringSpan()
449     pSpanNotSet->add(c);  in addToSpanNotSet()
452 // Compare strings without any argument checks. Requires length>0.
454 matches16(const char16_t *s, const char16_t *t, int32_t length) {  in matches16()  argument
459     } while(--length>0);  in matches16()
464 matches8(const uint8_t *s, const uint8_t *t, int32_t length) {  in matches8()  argument
469     } while(--length>0);  in matches8()
473 // Compare 16-bit Unicode strings (which may be malformed UTF-16)
477 matches16CPB(const char16_t *s, int32_t start, int32_t limit, const char16_t *t, int32_t length) {  in matches16CPB()  argument
479     limit-=start;  in matches16CPB()
480     return matches16(s, t, length) &&  in matches16CPB()
481            !(0<start && U16_IS_LEAD(s[-1]) && U16_IS_TRAIL(s[0])) &&  in matches16CPB()
482            !(length<limit && U16_IS_LEAD(s[length-1]) && U16_IS_TRAIL(s[length]));  in matches16CPB()
486 // If so, return its length; otherwise return its negative length.
488 spanOne(const UnicodeSet &set, const char16_t *s, int32_t length) {  in spanOne()  argument
490     if(c>=0xd800 && c<=0xdbff && length>=2 && U16_IS_TRAIL(c2=s[1])) {  in spanOne()
491         return set.contains(U16_GET_SUPPLEMENTARY(c, c2)) ? 2 : -2;  in spanOne()
493     return set.contains(c) ? 1 : -1;  in spanOne()
497 spanOneBack(const UnicodeSet &set, const char16_t *s, int32_t length) {  in spanOneBack()  argument
498     char16_t c=s[length-1], c2;  in spanOneBack()
499     if(c>=0xdc00 && c<=0xdfff && length>=2 && U16_IS_LEAD(c2=s[length-2])) {  in spanOneBack()
500         return set.contains(U16_GET_SUPPLEMENTARY(c2, c)) ? 2 : -2;  in spanOneBack()
502     return set.contains(c) ? 1 : -1;  in spanOneBack()
506 spanOneUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {  in spanOneUTF8()  argument
509         return set.contains(c) ? 1 : -1;  in spanOneUTF8()
511     // Take advantage of non-ASCII fastpaths in U8_NEXT_OR_FFFD().  in spanOneUTF8()
513     U8_NEXT_OR_FFFD(s, i, length, c);  in spanOneUTF8()
514     return set.contains(c) ? i : -i;  in spanOneUTF8()
518 spanOneBackUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {  in spanOneBackUTF8()  argument
519     UChar32 c=s[length-1];  in spanOneBackUTF8()
521         return set.contains(c) ? 1 : -1;  in spanOneBackUTF8()
523     int32_t i=length-1;  in spanOneBackUTF8()
524     c=utf8_prevCharSafeBody(s, 0, &i, c, -3);  in spanOneBackUTF8()
525     length-=i;  in spanOneBackUTF8()
526     return set.contains(c) ? length : -length;  in spanOneBackUTF8()
530  * Note: In span() when spanLength==0 (after a string match, or at the beginning
532  * string matching could use a binary search
533  * because all string matches are done from the same start index.
535  * For UTF-8, this would require a comparison function that returns UTF-16 order.
548  * - Iterate through the string, and at each code point boundary:
550  *   + If a set string matches at the current position, then remember to continue after it.
551  *   + Either recursively span for each code point or string match,
556  *     nor for any set string, then stop and return the longest recursive span length.
561  * A span using a string-less set is extremely fast.)
566  * - Start with spanLength=spanSet.span(USET_SPAN_CONTAINED).
567  * - Loop:
568  *   + Try to match each set string at the end of the spanLength.
569  *     ~ Set strings that start with set-contained code points must be matched
572  *     ~ Set strings that entirely consist of set-contained code points
576  *     ~ Rather than recursing, note each end point of a set string match.
577  *   + If no set string matched after spanSet.span(), then return
579  *   + If at least one set string matched after spanSet.span(), then
580  *     pop the shortest string match end point and continue
582  *   + If at least one more set string matched after a previous string match,
583  *     then test if the code point after the previous string match is also
586  *     or a matching set string.
587  *   + If no more set string matched after a previous string match,
591  * By noting each end point of a set string match,
592  * the function visits each string position at most once and finishes
595  * The recursive algorithm may visit the same string position many times
603  * - Iterate through the string, and at each code point boundary:
605  *   + If a set string matches at the current position, then remember to continue after it.
614  * - Start with spanLength=spanSet.span(USET_SPAN_CONTAINED).
615  * - Loop:
616  *   + Try to match each set string at the end of the spanLength.
617  *     ~ Set strings that start with set-contained code points must be matched
620  *     ~ Set strings that entirely consist of set-contained code points
621  *       must be matched with a full overlap because the longest-match algorithm
622  *       would hide set string matches that end earlier.
625  *       the set string match anyway.
626  *     ~ Remember the longest set string match (farthest end point) from the earliest
628  *   + If no set string matched after spanSet.span(), then return
630  *   + If at least one set string matched, then continue the loop after the
632  *   + If no more set string matched after a previous string match,
637 int32_t UnicodeSetStringSpan::span(const char16_t *s, int32_t length, USetSpanCondition spanConditi…  in span()  argument
639         return spanNot(s, length);  in span()
641     int32_t spanLength=spanSet.span(s, length, USET_SPAN_CONTAINED);  in span()
642     if(spanLength==length) {  in span()
643         return length;  in span()
652     int32_t pos=spanLength, rest=length-pos;  in span()
659                     continue;  // Irrelevant string. (Also the empty string.)  in span()
661                 const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);  in span()  local
662                 const char16_t *s16=string.getBuffer();  in span()
663                 int32_t length16=string.length();  in span()
664                 U_ASSERT(length>0);  in span()
666                 // Try to match this string at pos-overlap..pos.  in span()
670                     U16_BACK_1(s16, 0, overlap);  // Length of the string minus the last code point.  in span()
675                 int32_t inc=length16-overlap;  // Keep overlap+inc==length16.  in span()
681 …          if(!offsets.containsOffset(inc) && matches16CPB(s, pos-overlap, length, s16, length16)) {  in span()
683                             return length;  // Reached the end of the string.  in span()
690                     --overlap;  in span()
698                 // For longest match, we do need to try to match even an all-contained string  in span()
701                 const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);  in span()  local
702                 const char16_t *s16=string.getBuffer();  in span()
703                 int32_t length16=string.length();  in span()
705                     continue;  // skip the empty string  in span()
708                 // Try to match this string at pos-overlap..pos.  in span()
717                 int32_t inc=length16-overlap;  // Keep overlap+inc==length16.  in span()
722                     // Try to match if the string is longer or starts earlier.  in span()
724                         matches16CPB(s, pos-overlap, length, s16, length16)  in span()
730                     --overlap;  in span()
736                 // Longest-match algorithm, and there was a string match.  in span()
739                 rest-=maxInc;  in span()
741                     return length;  // Reached the end of the string.  in span()
743                 spanLength=0;  // Match strings from after a string match.  in span()
751             // not after a string match.  in span()
754             // strings match, and if such a non-initial span fails we stop.  in span()
758             // Match strings from after the next string match.  in span()
760             // The position is after a string match (or a single code point).  in span()
762                 // No more strings matched after a previous string match.  in span()
763                 // Try another code point span from after the last string match.  in span()
765                 if( spanLength==rest || // Reached the end of the string, or  in span()
771                 rest-=spanLength;  in span()
774                 // Try to match only one code point from after a string match if some  in span()
775                 // string matched beyond it, so that we try all possible positions  in span()
780                         return length;  // Reached the end of the string.  in span()
786                     rest-=spanLength;  in span()
791                 // Match strings from after the next string match.  in span()
796         rest-=minOffset;  in span()
797         spanLength=0;  // Match strings from after a string match.  in span()
801 int32_t UnicodeSetStringSpan::spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCon…  in spanBack()  argument
803         return spanNotBack(s, length);  in spanBack()
805     int32_t pos=spanSet.spanBack(s, length, USET_SPAN_CONTAINED);  in spanBack()
809     int32_t spanLength=length-pos;  in spanBack()
827                     continue;  // Irrelevant string. (Also the empty string.)  in spanBack()
829                 const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);  in spanBack()  local
830                 const char16_t *s16=string.getBuffer();  in spanBack()
831                 int32_t length16=string.length();  in spanBack()
832                 U_ASSERT(length>0);  in spanBack()
834                 // Try to match this string at pos-(length16-overlap)..pos-length16.  in spanBack()
840                     overlap-=len1;  // Length of the string minus the first code point.  in spanBack()
845                 int32_t dec=length16-overlap;  // Keep dec+overlap==length16.  in spanBack()
851 …              if(!offsets.containsOffset(dec) && matches16CPB(s, pos-dec, length, s16, length16)) {  in spanBack()
853                             return 0;  // Reached the start of the string.  in spanBack()
860                     --overlap;  in spanBack()
868                 // For longest match, we do need to try to match even an all-contained string  in spanBack()
871                 const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);  in spanBack()  local
872                 const char16_t *s16=string.getBuffer();  in spanBack()
873                 int32_t length16=string.length();  in spanBack()
875                     continue;  // skip the empty string  in spanBack()
878                 // Try to match this string at pos-(length16-overlap)..pos-length16.  in spanBack()
887                 int32_t dec=length16-overlap;  // Keep dec+overlap==length16.  in spanBack()
892                     // Try to match if the string is longer or ends later.  in spanBack()
894                         matches16CPB(s, pos-dec, length, s16, length16)  in spanBack()
900                     --overlap;  in spanBack()
906                 // Longest-match algorithm, and there was a string match.  in spanBack()
908                 pos-=maxDec;  in spanBack()
910                     return 0;  // Reached the start of the string.  in spanBack()
912                 spanLength=0;  // Match strings from before a string match.  in spanBack()
918         if(spanLength!=0 || pos==length) {  in spanBack()
920             // not before a string match.  in spanBack()
921             // The only position where spanLength==0 before a span is pos==length.  in spanBack()
923             // strings match, and if such a non-initial span fails we stop.  in spanBack()
927             // Match strings from before the next string match.  in spanBack()
929             // The position is before a string match (or a single code point).  in spanBack()
931                 // No more strings matched before a previous string match.  in spanBack()
932                 // Try another code point span from before the last string match.  in spanBack()
935                 spanLength=oldPos-pos;  in spanBack()
936                 if( pos==0 ||           // Reached the start of the string, or  in spanBack()
943                 // Try to match only one code point from before a string match if some  in spanBack()
944                 // string matched beyond it, so that we try all possible positions  in spanBack()
949                         return 0;  // Reached the start of the string.  in spanBack()
954                     pos-=spanLength;  in spanBack()
959                 // Match strings from before the next string match.  in spanBack()
962         pos-=offsets.popMinimum();  in spanBack()
963         spanLength=0;  // Match strings from before a string match.  in spanBack()
967 int32_t UnicodeSetStringSpan::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCond…  in spanUTF8()  argument
969         return spanNotUTF8(s, length);  in spanUTF8()
971     int32_t spanLength=spanSet.spanUTF8((const char *)s, length, USET_SPAN_CONTAINED);  in spanUTF8()
972     if(spanLength==length) {  in spanUTF8()
973         return length;  in spanUTF8()
982     int32_t pos=spanLength, rest=length-pos;  in spanUTF8()
995                     continue;  // String not representable in UTF-8.  in spanUTF8()
1000                     continue;  // Irrelevant string.  in spanUTF8()
1003                 // Try to match this string at pos-overlap..pos.  in spanUTF8()
1007                     U8_BACK_1(s8, 0, overlap);  // Length of the string minus the last code point.  in spanUTF8()
1012                 int32_t inc=length8-overlap;  // Keep overlap+inc==length8.  in spanUTF8()
1018                     // Match at code point boundaries. (The UTF-8 strings were converted  in spanUTF8()
1019                     // from UTF-16 and are guaranteed to be well-formed.)  in spanUTF8()
1020                     if(!U8_IS_TRAIL(s[pos-overlap]) &&  in spanUTF8()
1022                             matches8(s+pos-overlap, s8, length8)) {  in spanUTF8()
1024                             return length;  // Reached the end of the string.  in spanUTF8()
1031                     --overlap;  in spanUTF8()
1041                     continue;  // String not representable in UTF-8.  in spanUTF8()
1044                 // For longest match, we do need to try to match even an all-contained string  in spanUTF8()
1047                 // Try to match this string at pos-overlap..pos.  in spanUTF8()
1056                 int32_t inc=length8-overlap;  // Keep overlap+inc==length8.  in spanUTF8()
1061                     // Try to match if the string is longer or starts earlier.  in spanUTF8()
1062                     // Match at code point boundaries. (The UTF-8 strings were converted  in spanUTF8()
1063                     // from UTF-16 and are guaranteed to be well-formed.)  in spanUTF8()
1064                     if(!U8_IS_TRAIL(s[pos-overlap]) &&  in spanUTF8()
1067                             matches8(s+pos-overlap, s8, length8)) {  in spanUTF8()
1072                     --overlap;  in spanUTF8()
1079                 // Longest-match algorithm, and there was a string match.  in spanUTF8()
1082                 rest-=maxInc;  in spanUTF8()
1084                     return length;  // Reached the end of the string.  in spanUTF8()
1086                 spanLength=0;  // Match strings from after a string match.  in spanUTF8()
1094             // not after a string match.  in spanUTF8()
1097             // strings match, and if such a non-initial span fails we stop.  in spanUTF8()
1101             // Match strings from after the next string match.  in spanUTF8()
1103             // The position is after a string match (or a single code point).  in spanUTF8()
1105                 // No more strings matched after a previous string match.  in spanUTF8()
1106                 // Try another code point span from after the last string match.  in spanUTF8()
1108                 if( spanLength==rest || // Reached the end of the string, or  in spanUTF8()
1114                 rest-=spanLength;  in spanUTF8()
1117                 // Try to match only one code point from after a string match if some  in spanUTF8()
1118                 // string matched beyond it, so that we try all possible positions  in spanUTF8()
1123                         return length;  // Reached the end of the string.  in spanUTF8()
1129                     rest-=spanLength;  in spanUTF8()
1134                 // Match strings from after the next string match.  in spanUTF8()
1139         rest-=minOffset;  in spanUTF8()
1140         spanLength=0;  // Match strings from after a string match.  in spanUTF8()
1144 int32_t UnicodeSetStringSpan::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition span…  in spanBackUTF8()  argument
1146         return spanNotBackUTF8(s, length);  in spanBackUTF8()
1148     int32_t pos=spanSet.spanBackUTF8((const char *)s, length, USET_SPAN_CONTAINED);  in spanBackUTF8()
1152     int32_t spanLength=length-pos;  in spanBackUTF8()
1172                     continue;  // String not representable in UTF-8.  in spanBackUTF8()
1177                     continue;  // Irrelevant string.  in spanBackUTF8()
1180                 // Try to match this string at pos-(length8-overlap)..pos-length8.  in spanBackUTF8()
1186                     overlap-=len1;  // Length of the string minus the first code point.  in spanBackUTF8()
1191                 int32_t dec=length8-overlap;  // Keep dec+overlap==length8.  in spanBackUTF8()
1197                     // Match at code point boundaries. (The UTF-8 strings were converted  in spanBackUTF8()
1198                     // from UTF-16 and are guaranteed to be well-formed.)  in spanBackUTF8()
1199                     if( !U8_IS_TRAIL(s[pos-dec]) &&  in spanBackUTF8()
1201                         matches8(s+pos-dec, s8, length8)  in spanBackUTF8()
1204                             return 0;  // Reached the start of the string.  in spanBackUTF8()
1211                     --overlap;  in spanBackUTF8()
1221                     continue;  // String not representable in UTF-8.  in spanBackUTF8()
1224                 // For longest match, we do need to try to match even an all-contained string  in spanBackUTF8()
1227                 // Try to match this string at pos-(length8-overlap)..pos-length8.  in spanBackUTF8()
1236                 int32_t dec=length8-overlap;  // Keep dec+overlap==length8.  in spanBackUTF8()
1241                     // Try to match if the string is longer or ends later.  in spanBackUTF8()
1242                     // Match at code point boundaries. (The UTF-8 strings were converted  in spanBackUTF8()
1243                     // from UTF-16 and are guaranteed to be well-formed.)  in spanBackUTF8()
1244                     if( !U8_IS_TRAIL(s[pos-dec]) &&  in spanBackUTF8()
1246                         matches8(s+pos-dec, s8, length8)  in spanBackUTF8()
1252                     --overlap;  in spanBackUTF8()
1259                 // Longest-match algorithm, and there was a string match.  in spanBackUTF8()
1261                 pos-=maxDec;  in spanBackUTF8()
1263                     return 0;  // Reached the start of the string.  in spanBackUTF8()
1265                 spanLength=0;  // Match strings from before a string match.  in spanBackUTF8()
1271         if(spanLength!=0 || pos==length) {  in spanBackUTF8()
1273             // not before a string match.  in spanBackUTF8()
1274             // The only position where spanLength==0 before a span is pos==length.  in spanBackUTF8()
1276             // strings match, and if such a non-initial span fails we stop.  in spanBackUTF8()
1280             // Match strings from before the next string match.  in spanBackUTF8()
1282             // The position is before a string match (or a single code point).  in spanBackUTF8()
1284                 // No more strings matched before a previous string match.  in spanBackUTF8()
1285                 // Try another code point span from before the last string match.  in spanBackUTF8()
1288                 spanLength=oldPos-pos;  in spanBackUTF8()
1289                 if( pos==0 ||           // Reached the start of the string, or  in spanBackUTF8()
1296                 // Try to match only one code point from before a string match if some  in spanBackUTF8()
1297                 // string matched beyond it, so that we try all possible positions  in spanBackUTF8()
1302                         return 0;  // Reached the start of the string.  in spanBackUTF8()
1307                     pos-=spanLength;  in spanBackUTF8()
1312                 // Match strings from before the next string match.  in spanBackUTF8()
1315         pos-=offsets.popMinimum();  in spanBackUTF8()
1316         spanLength=0;  // Match strings from before a string match.  in spanBackUTF8()
1324  * - Iterate through the string, and at each code point boundary:
1326  *   + If a set string matches at the current position, then return with the current position.
1334  * For each set string add its initial code point to the spanNotSet.
1337  * - Loop:
1341  *   + If any set string matches at the current position, then
1344  *     nor for any set string, then skip this code point and continue the loop.
1345  *     This happens for set-string-initial code points that were added to spanNotSet
1346  *     when there is not actually a match for such a set string.
1349 int32_t UnicodeSetStringSpan::spanNot(const char16_t *s, int32_t length) const {  in spanNot()
1350     int32_t pos=0, rest=length;  in spanNot()
1354         // or a code point that starts or ends some string.  in spanNot()
1355         i=pSpanNotSet->span(s+pos, rest, USET_SPAN_NOT_CONTAINED);  in spanNot()
1357             return length;  // Reached the end of the string.  in spanNot()
1360         rest-=i;  in spanNot()
1363         // without the string starts and ends.  in spanNot()
1372                 continue;  // Irrelevant string. (Also the empty string.)  in spanNot()
1374             const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);  in spanNot()  local
1375             const char16_t *s16=string.getBuffer();  in spanNot()
1376             int32_t length16=string.length();  in spanNot()
1377             U_ASSERT(length>0);  in spanNot()
1378             if(length16<=rest && matches16CPB(s, pos, length, s16, length16)) {  in spanNot()
1383         // The span(while not contained) ended on a string start/end which is  in spanNot()
1386         pos-=cpLength;  in spanNot()
1389     return length;  // Reached the end of the string.  in spanNot()
1392 int32_t UnicodeSetStringSpan::spanNotBack(const char16_t *s, int32_t length) const {  in spanNotBack()
1393     int32_t pos=length;  in spanNotBack()
1397         // or a code point that starts or ends some string.  in spanNotBack()
1398         pos=pSpanNotSet->spanBack(s, pos, USET_SPAN_NOT_CONTAINED);  in spanNotBack()
1400             return 0;  // Reached the start of the string.  in spanNotBack()
1404         // without the string starts and ends.  in spanNotBack()
1413             // it is easier and we only need to know whether the string is irrelevant  in spanNotBack()
1416                 continue;  // Irrelevant string. (Also the empty string.)  in spanNotBack()
1418             const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);  in spanNotBack()  local
1419             const char16_t *s16=string.getBuffer();  in spanNotBack()
1420             int32_t length16=string.length();  in spanNotBack()
1421             U_ASSERT(length>0);  in spanNotBack()
1422             if(length16<=pos && matches16CPB(s, pos-length16, length, s16, length16)) {  in spanNotBack()
1427         // The span(while not contained) ended on a string start/end which is  in spanNotBack()
1432     return 0;  // Reached the start of the string.  in spanNotBack()
1435 int32_t UnicodeSetStringSpan::spanNotUTF8(const uint8_t *s, int32_t length) const {  in spanNotUTF8()
1436     int32_t pos=0, rest=length;  in spanNotUTF8()
1444         // or a code point that starts or ends some string.  in spanNotUTF8()
1445         i=pSpanNotSet->spanUTF8((const char *)s+pos, rest, USET_SPAN_NOT_CONTAINED);  in spanNotUTF8()
1447             return length;  // Reached the end of the string.  in spanNotUTF8()
1450         rest-=i;  in spanNotUTF8()
1453         // without the string starts and ends.  in spanNotUTF8()
1464             // ALL_CP_CONTAINED: Irrelevant string.  in spanNotUTF8()
1471         // The span(while not contained) ended on a string start/end which is  in spanNotUTF8()
1474         pos-=cpLength;  in spanNotUTF8()
1477     return length;  // Reached the end of the string.  in spanNotUTF8()
1480 int32_t UnicodeSetStringSpan::spanNotBackUTF8(const uint8_t *s, int32_t length) const {  in spanNotBackUTF8()
1481     int32_t pos=length;  in spanNotBackUTF8()
1489         // or a code point that starts or ends some string.  in spanNotBackUTF8()
1490         pos=pSpanNotSet->spanBackUTF8((const char *)s, pos, USET_SPAN_NOT_CONTAINED);  in spanNotBackUTF8()
1492             return 0;  // Reached the start of the string.  in spanNotBackUTF8()
1496         // without the string starts and ends.  in spanNotBackUTF8()
1507             // ALL_CP_CONTAINED: Irrelevant string.  in spanNotBackUTF8()
1508 …& spanBackUTF8Lengths[i]!=ALL_CP_CONTAINED && length8<=pos && matches8(s+pos-length8, s8, length8)…  in spanNotBackUTF8()
1514         // The span(while not contained) ended on a string start/end which is  in spanNotBackUTF8()
1519     return 0;  // Reached the start of the string.  in spanNotBackUTF8()