• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  *   Copyright (C) 1996-2010, International Business Machines                 *
4  *   Corporation and others.  All Rights Reserved.                            *
5  ******************************************************************************
6  */
7 
8 #include "unicode/utypes.h"
9 
10 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
11 
12 #include "unicode/unistr.h"
13 #include "unicode/putil.h"
14 #include "unicode/usearch.h"
15 
16 #include "cmemory.h"
17 #include "unicode/coll.h"
18 #include "unicode/tblcoll.h"
19 #include "unicode/coleitr.h"
20 #include "unicode/ucoleitr.h"
21 
22 #include "unicode/regex.h"        // TODO: make conditional on regexp being built.
23 
24 #include "unicode/uniset.h"
25 #include "unicode/uset.h"
26 #include "unicode/ustring.h"
27 #include "hash.h"
28 #include "uhash.h"
29 #include "ucol_imp.h"
30 #include "normalizer2impl.h"
31 
32 #include "unicode/colldata.h"
33 #include "unicode/bmsearch.h"
34 
35 U_NAMESPACE_BEGIN
36 
37 #define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
38 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
39 #define DELETE_ARRAY(array) uprv_free((void *) (array))
40 
41 
42 struct CEI
43 {
44     uint32_t order;
45     int32_t  lowOffset;
46     int32_t  highOffset;
47 };
48 
49 class Target : public UMemory
50 {
51 public:
52     Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status);
53     ~Target();
54 
55     void setTargetString(const UnicodeString *target);
56 
57     const CEI *nextCE(int32_t offset);
58     const CEI *prevCE(int32_t offset);
59 
60     int32_t stringLength();
61     UChar charAt(int32_t offset);
62 
63     UBool isBreakBoundary(int32_t offset);
64     int32_t nextBreakBoundary(int32_t offset);
65     int32_t nextSafeBoundary(int32_t offset);
66 
67     UBool isIdentical(UnicodeString &pattern, int32_t start, int32_t end);
68 
69     void setOffset(int32_t offset);
70     void setLast(int32_t last);
71     int32_t getOffset();
72 
73 private:
74     CEI *ceb;
75     int32_t bufferSize;
76     int32_t bufferMin;
77     int32_t bufferMax;
78 
79     uint32_t strengthMask;
80     UCollationStrength strength;
81     uint32_t variableTop;
82     UBool toShift;
83     UCollator *coll;
84     const Normalizer2 &nfd;
85 
86     const UnicodeString *targetString;
87     const UChar *targetBuffer;
88     int32_t targetLength;
89 
90     UCollationElements *elements;
91     UBreakIterator *charBreakIterator;
92 };
93 
Target(UCollator * theCollator,const UnicodeString * target,int32_t patternLength,UErrorCode & status)94 Target::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status)
95     : bufferSize(0), bufferMin(0), bufferMax(0),
96       strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator),
97       nfd(*Normalizer2Factory::getNFDInstance(status)),
98       targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL)
99 {
100     strength = ucol_getStrength(coll);
101     toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) ==  UCOL_SHIFTED;
102     variableTop = ucol_getVariableTop(coll, &status);
103 
104     // find the largest expansion
105     uint8_t maxExpansion = 0;
106     for (const uint8_t *expansion = coll->expansionCESize; *expansion != 0; expansion += 1) {
107         if (*expansion > maxExpansion) {
108             maxExpansion = *expansion;
109         }
110     }
111 
112     // room for an extra character on each end, plus 4 for safety
113     bufferSize = patternLength + (2 * maxExpansion) + 4;
114 
115     ceb = NEW_ARRAY(CEI, bufferSize);
116 
117     if (ceb == NULL) {
118         status = U_MEMORY_ALLOCATION_ERROR;
119         return;
120     }
121 
122     if (target != NULL) {
123         setTargetString(target);
124     }
125 
126     switch (strength)
127     {
128     default:
129         strengthMask |= UCOL_TERTIARYORDERMASK;
130         /* fall through */
131 
132     case UCOL_SECONDARY:
133         strengthMask |= UCOL_SECONDARYORDERMASK;
134         /* fall through */
135 
136     case UCOL_PRIMARY:
137         strengthMask |= UCOL_PRIMARYORDERMASK;
138     }
139 }
140 
~Target()141 Target::~Target()
142 {
143     ubrk_close(charBreakIterator);
144     ucol_closeElements(elements);
145 
146     DELETE_ARRAY(ceb);
147 }
148 
setTargetString(const UnicodeString * target)149 void Target::setTargetString(const UnicodeString *target)
150 {
151     if (charBreakIterator != NULL) {
152         ubrk_close(charBreakIterator);
153         ucol_closeElements(elements);
154     }
155 
156     targetString = target;
157 
158     if (targetString != NULL) {
159         UErrorCode status = U_ZERO_ERROR;
160 
161         targetBuffer = targetString->getBuffer();
162         targetLength = targetString->length();
163 
164         elements = ucol_openElements(coll, target->getBuffer(), target->length(), &status);
165         ucol_forceHanImplicit(elements, &status);
166 
167         charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status),
168                                       targetBuffer, targetLength, &status);
169     } else {
170         targetBuffer = NULL;
171         targetLength = 0;
172     }
173 }
174 
nextCE(int32_t offset)175 const CEI *Target::nextCE(int32_t offset)
176 {
177     UErrorCode status = U_ZERO_ERROR;
178     int32_t low = -1, high = -1;
179     uint32_t order;
180     UBool cont = FALSE;
181 
182     if (offset >= bufferMin && offset < bufferMax) {
183         return &ceb[offset];
184     }
185 
186     if (bufferMax >= bufferSize || offset != bufferMax) {
187         return NULL;
188     }
189 
190     do {
191         low   = ucol_getOffset(elements);
192         order = ucol_next(elements, &status);
193         high  = ucol_getOffset(elements);
194 
195         if (order == (uint32_t)UCOL_NULLORDER) {
196           //high = low = -1;
197             break;
198         }
199 
200         cont = isContinuation(order);
201         order &= strengthMask;
202 
203         if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
204             if (strength >= UCOL_QUATERNARY) {
205                 order &= UCOL_PRIMARYORDERMASK;
206             } else {
207                 order = UCOL_IGNORABLE;
208             }
209         }
210     } while (order == UCOL_IGNORABLE);
211 
212     if (cont) {
213         order |= UCOL_CONTINUATION_MARKER;
214     }
215 
216     ceb[offset].order = order;
217     ceb[offset].lowOffset = low;
218     ceb[offset].highOffset = high;
219 
220     bufferMax += 1;
221 
222     return &ceb[offset];
223 }
224 
prevCE(int32_t offset)225 const CEI *Target::prevCE(int32_t offset)
226 {
227     UErrorCode status = U_ZERO_ERROR;
228     int32_t low = -1, high = -1;
229     uint32_t order;
230     UBool cont = FALSE;
231 
232     if (offset >= bufferMin && offset < bufferMax) {
233         return &ceb[offset];
234     }
235 
236     if (bufferMax >= bufferSize || offset != bufferMax) {
237         return NULL;
238     }
239 
240     do {
241         high  = ucol_getOffset(elements);
242         order = ucol_previous(elements, &status);
243         low   = ucol_getOffset(elements);
244 
245         if (order == (uint32_t)UCOL_NULLORDER) {
246             break;
247         }
248 
249         cont = isContinuation(order);
250         order &= strengthMask;
251 
252         if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
253             if (strength >= UCOL_QUATERNARY) {
254                 order &= UCOL_PRIMARYORDERMASK;
255             } else {
256                 order = UCOL_IGNORABLE;
257             }
258         }
259     } while (order == UCOL_IGNORABLE);
260 
261     bufferMax += 1;
262 
263     if (cont) {
264         order |= UCOL_CONTINUATION_MARKER;
265     }
266 
267     ceb[offset].order       = order;
268     ceb[offset].lowOffset   = low;
269     ceb[offset].highOffset = high;
270 
271     return &ceb[offset];
272 }
273 
stringLength()274 int32_t Target::stringLength()
275 {
276     if (targetString != NULL) {
277         return targetLength;
278     }
279 
280     return 0;
281 }
282 
charAt(int32_t offset)283 UChar Target::charAt(int32_t offset)
284 {
285     if (targetString != NULL) {
286         return targetBuffer[offset];
287     }
288 
289     return 0x0000;
290 }
291 
setOffset(int32_t offset)292 void Target::setOffset(int32_t offset)
293 {
294     UErrorCode status = U_ZERO_ERROR;
295 
296     bufferMin = 0;
297     bufferMax = 0;
298 
299     ucol_setOffset(elements, offset, &status);
300 }
301 
setLast(int32_t last)302 void Target::setLast(int32_t last)
303 {
304     UErrorCode status = U_ZERO_ERROR;
305 
306     bufferMin = 0;
307     bufferMax = 1;
308 
309     ceb[0].order      = UCOL_NULLORDER;
310     ceb[0].lowOffset  = last;
311     ceb[0].highOffset = last;
312 
313     ucol_setOffset(elements, last, &status);
314 }
315 
getOffset()316 int32_t Target::getOffset()
317 {
318     return ucol_getOffset(elements);
319 }
320 
isBreakBoundary(int32_t offset)321 UBool Target::isBreakBoundary(int32_t offset)
322 {
323     return ubrk_isBoundary(charBreakIterator, offset);
324 }
325 
nextBreakBoundary(int32_t offset)326 int32_t Target::nextBreakBoundary(int32_t offset)
327 {
328     return ubrk_following(charBreakIterator, offset);
329 }
330 
nextSafeBoundary(int32_t offset)331 int32_t Target::nextSafeBoundary(int32_t offset)
332 {
333     while (offset < targetLength) {
334       //UChar ch = charAt(offset);
335         UChar ch = targetBuffer[offset];
336 
337         if (U_IS_LEAD(ch) || ! ucol_unsafeCP(ch, coll)) {
338             return offset;
339         }
340 
341         offset += 1;
342     }
343 
344     return targetLength;
345 }
346 
isIdentical(UnicodeString & pattern,int32_t start,int32_t end)347 UBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end)
348 {
349     if (strength < UCOL_IDENTICAL) {
350         return TRUE;
351     }
352 
353     // Note: We could use Normalizer::compare() or similar, but for short strings
354     // which may not be in FCD it might be faster to just NFD them.
355     UErrorCode status = U_ZERO_ERROR;
356     UnicodeString t2, p2;
357     nfd.normalize(UnicodeString(FALSE, targetBuffer + start, end - start), t2, status);
358     nfd.normalize(pattern, p2, status);
359     // return FALSE if NFD failed
360     return U_SUCCESS(status) && t2 == p2;
361 }
362 
363 #define HASH_TABLE_SIZE 257
364 
365 class BadCharacterTable : public UMemory
366 {
367 public:
368     BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status);
369     ~BadCharacterTable();
370 
371     int32_t operator[](uint32_t ce) const;
372     int32_t getMaxSkip() const;
373     int32_t minLengthInChars(int32_t index);
374 
375 private:
376     static int32_t hash(uint32_t ce);
377 
378     int32_t maxSkip;
379     int32_t badCharacterTable[HASH_TABLE_SIZE];
380 
381     int32_t *minLengthCache;
382 };
383 
BadCharacterTable(CEList & patternCEs,CollData * data,UErrorCode & status)384 BadCharacterTable::BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status)
385     : minLengthCache(NULL)
386 {
387     int32_t plen = patternCEs.size();
388 
389     // **** need a better way to deal with this ****
390     if (U_FAILURE(status) || plen == 0) {
391         return;
392     }
393 
394     int32_t *history = NEW_ARRAY(int32_t, plen);
395 
396     if (history == NULL) {
397         status = U_MEMORY_ALLOCATION_ERROR;
398         return;
399     }
400 
401     for (int32_t i = 0; i < plen; i += 1) {
402         history[i] = -1;
403     }
404 
405     minLengthCache = NEW_ARRAY(int32_t, plen + 1);
406 
407     if (minLengthCache == NULL) {
408         DELETE_ARRAY(history);
409         status = U_MEMORY_ALLOCATION_ERROR;
410         return;
411     }
412 
413     maxSkip = minLengthCache[0] = data->minLengthInChars(&patternCEs, 0, history);
414 
415     for(int32_t j = 0; j < HASH_TABLE_SIZE; j += 1) {
416         badCharacterTable[j] = maxSkip;
417     }
418 
419     for(int32_t p = 1; p < plen; p += 1) {
420         minLengthCache[p] = data->minLengthInChars(&patternCEs, p, history);
421 
422         // Make sure this entry is not bigger than the previous one.
423         // Otherwise, we might skip too far in some cases.
424         if (minLengthCache[p] < 0 || minLengthCache[p] > minLengthCache[p - 1]) {
425             minLengthCache[p] = minLengthCache[p - 1];
426         }
427     }
428 
429     minLengthCache[plen] = 0;
430 
431     for(int32_t p = 0; p < plen - 1; p += 1) {
432         badCharacterTable[hash(patternCEs[p])] = minLengthCache[p + 1];
433     }
434 
435     DELETE_ARRAY(history);
436 }
437 
~BadCharacterTable()438 BadCharacterTable::~BadCharacterTable()
439 {
440     DELETE_ARRAY(minLengthCache);
441 }
442 
operator [](uint32_t ce) const443 int32_t BadCharacterTable::operator[](uint32_t ce) const
444 {
445     return badCharacterTable[hash(ce)];
446 }
447 
getMaxSkip() const448 int32_t BadCharacterTable::getMaxSkip() const
449 {
450     return maxSkip;
451 }
452 
minLengthInChars(int32_t index)453 int32_t BadCharacterTable::minLengthInChars(int32_t index)
454 {
455     return minLengthCache[index];
456 }
457 
hash(uint32_t ce)458 int32_t BadCharacterTable::hash(uint32_t ce)
459 {
460     return UCOL_PRIMARYORDER(ce) % HASH_TABLE_SIZE;
461 }
462 
463 class GoodSuffixTable : public UMemory
464 {
465 public:
466     GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status);
467     ~GoodSuffixTable();
468 
469     int32_t operator[](int32_t offset) const;
470 
471 private:
472     int32_t *goodSuffixTable;
473 };
474 
GoodSuffixTable(CEList & patternCEs,BadCharacterTable & badCharacterTable,UErrorCode & status)475 GoodSuffixTable::GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status)
476     : goodSuffixTable(NULL)
477 {
478     int32_t patlen = patternCEs.size();
479 
480     // **** need a better way to deal with this ****
481     if (U_FAILURE(status) || patlen <= 0) {
482         return;
483     }
484 
485     int32_t *suff  = NEW_ARRAY(int32_t, patlen);
486     int32_t start = patlen - 1, end = - 1;
487     int32_t maxSkip = badCharacterTable.getMaxSkip();
488 
489     if (suff == NULL) {
490         status = U_MEMORY_ALLOCATION_ERROR;
491         return;
492     }
493 
494     // initialze suff
495     suff[patlen - 1] = patlen;
496 
497     for (int32_t i = patlen - 2; i >= 0; i -= 1) {
498         // (i > start) means we're inside the last suffix match we found
499         // ((patlen - 1) - end) is how far the end of that match is from end of pattern
500         // (i - start) is how far we are from start of that match
501         // (i + (patlen - 1) - end) is index of same character at end of pattern
502         // so if any suffix match at that character doesn't extend beyond the last match,
503         // it's the suffix for this character as well
504         if (i > start && suff[i + patlen - 1 - end] < i - start) {
505             suff[i] = suff[i + patlen - 1 - end];
506         } else {
507             start = end = i;
508 
509             int32_t s = patlen;
510 
511             while (start >= 0 && patternCEs[start] == patternCEs[--s]) {
512                 start -= 1;
513             }
514 
515             suff[i] = end - start;
516         }
517     }
518 
519     // now build goodSuffixTable
520     goodSuffixTable  = NEW_ARRAY(int32_t, patlen);
521 
522     if (goodSuffixTable == NULL) {
523         DELETE_ARRAY(suff);
524         status = U_MEMORY_ALLOCATION_ERROR;
525         return;
526     }
527 
528 
529     // initialize entries to minLengthInChars of the pattern
530     for (int32_t i = 0; i < patlen; i += 1) {
531         goodSuffixTable[i] = maxSkip;
532     }
533 
534     int32_t prefix = 0;
535 
536     for (int32_t i = patlen - /*1*/ 2; i >= 0; i -= 1) {
537         if (suff[i] == i + 1) {
538             // this matching suffix is a prefix of the pattern
539             int32_t prefixSkip = badCharacterTable.minLengthInChars(i + 1);
540 
541             // for any mis-match before this suffix, we should skip
542             // so that the front of the pattern (i.e. the prefix)
543             // lines up with the front of the suffix.
544             // (patlen - 1 - i) is the start of the suffix
545             while (prefix < patlen - 1 - i) {
546                 // value of maxSkip means never set...
547                 if (goodSuffixTable[prefix] == maxSkip) {
548                     goodSuffixTable[prefix] = prefixSkip;
549                 }
550 
551                 prefix += 1;
552             }
553         }
554     }
555 
556     for (int32_t i = 0; i < patlen - 1; i += 1) {
557         goodSuffixTable[patlen - 1 - suff[i]] = badCharacterTable.minLengthInChars(i + 1);
558     }
559 
560     DELETE_ARRAY(suff);
561 }
562 
~GoodSuffixTable()563 GoodSuffixTable::~GoodSuffixTable()
564 {
565     DELETE_ARRAY(goodSuffixTable);
566 }
567 
operator [](int32_t offset) const568 int32_t GoodSuffixTable::operator[](int32_t offset) const
569 {
570     return goodSuffixTable[offset];
571 }
572 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch)573 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch)
574 
575 
576 UBool BoyerMooreSearch::empty()
577 {
578     return patCEs->size() <= 0;
579 }
580 
getData()581 CollData *BoyerMooreSearch::getData()
582 {
583     return data;
584 }
585 
getPatternCEs()586 CEList *BoyerMooreSearch::getPatternCEs()
587 {
588     return patCEs;
589 }
590 
getBadCharacterTable()591 BadCharacterTable *BoyerMooreSearch::getBadCharacterTable()
592 {
593     return badCharacterTable;
594 }
595 
getGoodSuffixTable()596 GoodSuffixTable *BoyerMooreSearch::getGoodSuffixTable()
597 {
598     return goodSuffixTable;
599 }
600 
BoyerMooreSearch(CollData * theData,const UnicodeString & patternString,const UnicodeString * targetString,UErrorCode & status)601 BoyerMooreSearch::BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString,
602                                    UErrorCode &status)
603     : data(theData), patCEs(NULL), badCharacterTable(NULL), goodSuffixTable(NULL), pattern(patternString), target(NULL)
604 {
605 
606     if (U_FAILURE(status)) {
607         return;
608     }
609 
610     UCollator *collator = data->getCollator();
611 
612     patCEs = new CEList(collator, patternString, status);
613 
614     if (patCEs == NULL || U_FAILURE(status)) {
615         return;
616     }
617 
618     badCharacterTable = new BadCharacterTable(*patCEs, data, status);
619 
620     if (badCharacterTable == NULL || U_FAILURE(status)) {
621         return;
622     }
623 
624     goodSuffixTable = new GoodSuffixTable(*patCEs, *badCharacterTable, status);
625 
626     if (targetString != NULL) {
627         target = new Target(collator, targetString, patCEs->size(), status);
628     }
629 }
630 
~BoyerMooreSearch()631 BoyerMooreSearch::~BoyerMooreSearch()
632 {
633     delete target;
634     delete goodSuffixTable;
635     delete badCharacterTable;
636     delete patCEs;
637 }
638 
setTargetString(const UnicodeString * targetString,UErrorCode & status)639 void BoyerMooreSearch::setTargetString(const UnicodeString *targetString, UErrorCode &status)
640 {
641     if (U_FAILURE(status)) {
642         return;
643     }
644 
645     if (target == NULL) {
646         target = new Target(data->getCollator(), targetString, patCEs->size(), status);
647     } else {
648         target->setTargetString(targetString);
649     }
650 }
651 
652 // **** main flow of this code from Laura Werner's "Unicode Text Searching in Java" paper. ****
653 /*
654  * TODO:
655  *  * deal with trailing (and leading?) ignorables.
656  *  * Adding BoyerMooreSearch object slowed it down. How can we speed it up?
657  */
search(int32_t offset,int32_t & start,int32_t & end)658 UBool BoyerMooreSearch::search(int32_t offset, int32_t &start, int32_t &end)
659 {
660     /*UCollator *coll =*/ data->getCollator();
661     int32_t plen = patCEs->size();
662     int32_t tlen = target->stringLength();
663     int32_t maxSkip = badCharacterTable->getMaxSkip();
664     int32_t tOffset = offset + maxSkip;
665 
666     if (plen <= 0) {
667         // Searching for a zero length pattern always fails.
668         start = end = -1;
669         return FALSE;
670     }
671 
672     while (tOffset <= tlen) {
673         int32_t pIndex = plen - 1;
674         int32_t tIndex = 0;
675         int32_t lIndex = 0;
676 
677         if (tOffset < tlen) {
678             // **** we really want to skip ahead enough to  ****
679             // **** be sure we get at least 1 non-ignorable ****
680             // **** CE after the end of the pattern.        ****
681             int32_t next = target->nextSafeBoundary(tOffset + 1);
682 
683             target->setOffset(next);
684 
685             for (lIndex = 0; ; lIndex += 1) {
686                 const CEI *cei = target->prevCE(lIndex);
687                 int32_t low = cei->lowOffset;
688                 int32_t high = cei->highOffset;
689 
690                 if (high == 0 || (low < high && low <= tOffset)) {
691                     if (low < tOffset) {
692                         while (lIndex >= 0 && target->prevCE(lIndex)->highOffset == high) {
693                             lIndex -= 1;
694                         }
695 
696                         if (high > tOffset) {
697                             tOffset = high;
698                         }
699                     }
700 
701                     break;
702                 }
703             }
704         } else {
705             target->setLast(tOffset);
706             lIndex = 0;
707         }
708 
709         tIndex = ++lIndex;
710 
711         // Iterate backward until we hit the beginning of the pattern
712         while (pIndex >= 0) {
713             uint32_t pce = (*patCEs)[pIndex];
714             const CEI *tcei = target->prevCE(tIndex++);
715 
716 
717             if (tcei->order != pce) {
718                 // There is a mismatch at this position.  Decide how far
719                 // over to shift the pattern, then try again.
720 
721                 int32_t gsOffset = tOffset + (*goodSuffixTable)[pIndex];
722 #ifdef EXTRA_CAUTIOUS
723                 int32_t old = tOffset;
724 #endif
725 
726                 tOffset += (*badCharacterTable)[tcei->order] - badCharacterTable->minLengthInChars(pIndex + 1);
727 
728                 if (gsOffset > tOffset) {
729                     tOffset = gsOffset;
730                 }
731 
732 #ifdef EXTRA_CAUTIOUS
733                 // Make sure we don't skip backwards...
734                 if (tOffset <= old) {
735                     tOffset = old + 1;
736                 }
737 #endif
738 
739                 break;
740             }
741 
742             pIndex -= 1;
743         }
744 
745         if (pIndex < 0) {
746             // We made it back to the beginning of the pattern,
747             // which means we matched it all.  Return the location.
748             const CEI firstCEI = *target->prevCE(tIndex - 1);
749             const CEI lastCEI  = *target->prevCE(lIndex);
750             int32_t mStart   = firstCEI.lowOffset;
751             int32_t minLimit = lastCEI.lowOffset;
752             int32_t maxLimit = lastCEI.highOffset;
753             int32_t mLimit;
754             UBool found = TRUE;
755 
756             target->setOffset(/*tOffset*/maxLimit);
757 
758             const CEI nextCEI = *target->nextCE(0);
759 
760             if (nextCEI.lowOffset > maxLimit) {
761                 maxLimit = nextCEI.lowOffset;
762             }
763 
764             if (nextCEI.lowOffset == nextCEI.highOffset && nextCEI.order != (uint32_t)UCOL_NULLORDER) {
765                 found = FALSE;
766             }
767 
768             if (! target->isBreakBoundary(mStart)) {
769                 found = FALSE;
770             }
771 
772             if (firstCEI.lowOffset == firstCEI.highOffset) {
773                 found = FALSE;
774             }
775 
776             mLimit = maxLimit;
777             if (minLimit < maxLimit) {
778                 int32_t nbb = target->nextBreakBoundary(minLimit);
779 
780                 if (nbb >= lastCEI.highOffset) {
781                     mLimit = nbb;
782                 }
783             }
784 
785             if (mLimit > maxLimit) {
786                 found = FALSE;
787             }
788 
789             if (! target->isBreakBoundary(mLimit)) {
790                 found = FALSE;
791             }
792 
793             if (! target->isIdentical(pattern, mStart, mLimit)) {
794                 found = FALSE;
795             }
796 
797             if (found) {
798                 start = mStart;
799                 end   = mLimit;
800 
801                 return TRUE;
802             }
803 
804             tOffset += (*goodSuffixTable)[0]; // really? Maybe += 1 or += maxSkip?
805         }
806         // Otherwise, we're here because of a mismatch, so keep going....
807     }
808 
809     // no match
810    start = -1;
811    end = -1;
812    return FALSE;
813 }
814 
815 U_NAMESPACE_END
816 
817 #endif // #if !UCONFIG_NO_COLLATION
818