1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2010, International Business Machines *
4 * Corporation and others. All Rights Reserved. *
5 ******************************************************************************
6 */
7
8 #include "unicode/utypes.h"
9
10 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
11
12 #include "unicode/unistr.h"
13 #include "unicode/putil.h"
14 #include "unicode/usearch.h"
15
16 #include "cmemory.h"
17 #include "unicode/coll.h"
18 #include "unicode/tblcoll.h"
19 #include "unicode/coleitr.h"
20 #include "unicode/ucoleitr.h"
21
22 #include "unicode/regex.h" // TODO: make conditional on regexp being built.
23
24 #include "unicode/uniset.h"
25 #include "unicode/uset.h"
26 #include "unicode/ustring.h"
27 #include "hash.h"
28 #include "uhash.h"
29 #include "ucol_imp.h"
30 #include "normalizer2impl.h"
31
32 #include "unicode/colldata.h"
33 #include "unicode/bmsearch.h"
34
35 U_NAMESPACE_BEGIN
36
37 #define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
38 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
39 #define DELETE_ARRAY(array) uprv_free((void *) (array))
40
41
42 struct CEI
43 {
44 uint32_t order;
45 int32_t lowOffset;
46 int32_t highOffset;
47 };
48
49 class Target : public UMemory
50 {
51 public:
52 Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status);
53 ~Target();
54
55 void setTargetString(const UnicodeString *target);
56
57 const CEI *nextCE(int32_t offset);
58 const CEI *prevCE(int32_t offset);
59
60 int32_t stringLength();
61 UChar charAt(int32_t offset);
62
63 UBool isBreakBoundary(int32_t offset);
64 int32_t nextBreakBoundary(int32_t offset);
65 int32_t nextSafeBoundary(int32_t offset);
66
67 UBool isIdentical(UnicodeString &pattern, int32_t start, int32_t end);
68
69 void setOffset(int32_t offset);
70 void setLast(int32_t last);
71 int32_t getOffset();
72
73 private:
74 CEI *ceb;
75 int32_t bufferSize;
76 int32_t bufferMin;
77 int32_t bufferMax;
78
79 uint32_t strengthMask;
80 UCollationStrength strength;
81 uint32_t variableTop;
82 UBool toShift;
83 UCollator *coll;
84 const Normalizer2 &nfd;
85
86 const UnicodeString *targetString;
87 const UChar *targetBuffer;
88 int32_t targetLength;
89
90 UCollationElements *elements;
91 UBreakIterator *charBreakIterator;
92 };
93
Target(UCollator * theCollator,const UnicodeString * target,int32_t patternLength,UErrorCode & status)94 Target::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status)
95 : bufferSize(0), bufferMin(0), bufferMax(0),
96 strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator),
97 nfd(*Normalizer2Factory::getNFDInstance(status)),
98 targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL)
99 {
100 strength = ucol_getStrength(coll);
101 toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED;
102 variableTop = ucol_getVariableTop(coll, &status);
103
104 // find the largest expansion
105 uint8_t maxExpansion = 0;
106 for (const uint8_t *expansion = coll->expansionCESize; *expansion != 0; expansion += 1) {
107 if (*expansion > maxExpansion) {
108 maxExpansion = *expansion;
109 }
110 }
111
112 // room for an extra character on each end, plus 4 for safety
113 bufferSize = patternLength + (2 * maxExpansion) + 4;
114
115 ceb = NEW_ARRAY(CEI, bufferSize);
116
117 if (ceb == NULL) {
118 status = U_MEMORY_ALLOCATION_ERROR;
119 return;
120 }
121
122 if (target != NULL) {
123 setTargetString(target);
124 }
125
126 switch (strength)
127 {
128 default:
129 strengthMask |= UCOL_TERTIARYORDERMASK;
130 /* fall through */
131
132 case UCOL_SECONDARY:
133 strengthMask |= UCOL_SECONDARYORDERMASK;
134 /* fall through */
135
136 case UCOL_PRIMARY:
137 strengthMask |= UCOL_PRIMARYORDERMASK;
138 }
139 }
140
~Target()141 Target::~Target()
142 {
143 ubrk_close(charBreakIterator);
144 ucol_closeElements(elements);
145
146 DELETE_ARRAY(ceb);
147 }
148
setTargetString(const UnicodeString * target)149 void Target::setTargetString(const UnicodeString *target)
150 {
151 if (charBreakIterator != NULL) {
152 ubrk_close(charBreakIterator);
153 ucol_closeElements(elements);
154 }
155
156 targetString = target;
157
158 if (targetString != NULL) {
159 UErrorCode status = U_ZERO_ERROR;
160
161 targetBuffer = targetString->getBuffer();
162 targetLength = targetString->length();
163
164 elements = ucol_openElements(coll, target->getBuffer(), target->length(), &status);
165 ucol_forceHanImplicit(elements, &status);
166
167 charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status),
168 targetBuffer, targetLength, &status);
169 } else {
170 targetBuffer = NULL;
171 targetLength = 0;
172 }
173 }
174
nextCE(int32_t offset)175 const CEI *Target::nextCE(int32_t offset)
176 {
177 UErrorCode status = U_ZERO_ERROR;
178 int32_t low = -1, high = -1;
179 uint32_t order;
180 UBool cont = FALSE;
181
182 if (offset >= bufferMin && offset < bufferMax) {
183 return &ceb[offset];
184 }
185
186 if (bufferMax >= bufferSize || offset != bufferMax) {
187 return NULL;
188 }
189
190 do {
191 low = ucol_getOffset(elements);
192 order = ucol_next(elements, &status);
193 high = ucol_getOffset(elements);
194
195 if (order == (uint32_t)UCOL_NULLORDER) {
196 //high = low = -1;
197 break;
198 }
199
200 cont = isContinuation(order);
201 order &= strengthMask;
202
203 if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
204 if (strength >= UCOL_QUATERNARY) {
205 order &= UCOL_PRIMARYORDERMASK;
206 } else {
207 order = UCOL_IGNORABLE;
208 }
209 }
210 } while (order == UCOL_IGNORABLE);
211
212 if (cont) {
213 order |= UCOL_CONTINUATION_MARKER;
214 }
215
216 ceb[offset].order = order;
217 ceb[offset].lowOffset = low;
218 ceb[offset].highOffset = high;
219
220 bufferMax += 1;
221
222 return &ceb[offset];
223 }
224
prevCE(int32_t offset)225 const CEI *Target::prevCE(int32_t offset)
226 {
227 UErrorCode status = U_ZERO_ERROR;
228 int32_t low = -1, high = -1;
229 uint32_t order;
230 UBool cont = FALSE;
231
232 if (offset >= bufferMin && offset < bufferMax) {
233 return &ceb[offset];
234 }
235
236 if (bufferMax >= bufferSize || offset != bufferMax) {
237 return NULL;
238 }
239
240 do {
241 high = ucol_getOffset(elements);
242 order = ucol_previous(elements, &status);
243 low = ucol_getOffset(elements);
244
245 if (order == (uint32_t)UCOL_NULLORDER) {
246 break;
247 }
248
249 cont = isContinuation(order);
250 order &= strengthMask;
251
252 if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
253 if (strength >= UCOL_QUATERNARY) {
254 order &= UCOL_PRIMARYORDERMASK;
255 } else {
256 order = UCOL_IGNORABLE;
257 }
258 }
259 } while (order == UCOL_IGNORABLE);
260
261 bufferMax += 1;
262
263 if (cont) {
264 order |= UCOL_CONTINUATION_MARKER;
265 }
266
267 ceb[offset].order = order;
268 ceb[offset].lowOffset = low;
269 ceb[offset].highOffset = high;
270
271 return &ceb[offset];
272 }
273
stringLength()274 int32_t Target::stringLength()
275 {
276 if (targetString != NULL) {
277 return targetLength;
278 }
279
280 return 0;
281 }
282
charAt(int32_t offset)283 UChar Target::charAt(int32_t offset)
284 {
285 if (targetString != NULL) {
286 return targetBuffer[offset];
287 }
288
289 return 0x0000;
290 }
291
setOffset(int32_t offset)292 void Target::setOffset(int32_t offset)
293 {
294 UErrorCode status = U_ZERO_ERROR;
295
296 bufferMin = 0;
297 bufferMax = 0;
298
299 ucol_setOffset(elements, offset, &status);
300 }
301
setLast(int32_t last)302 void Target::setLast(int32_t last)
303 {
304 UErrorCode status = U_ZERO_ERROR;
305
306 bufferMin = 0;
307 bufferMax = 1;
308
309 ceb[0].order = UCOL_NULLORDER;
310 ceb[0].lowOffset = last;
311 ceb[0].highOffset = last;
312
313 ucol_setOffset(elements, last, &status);
314 }
315
getOffset()316 int32_t Target::getOffset()
317 {
318 return ucol_getOffset(elements);
319 }
320
isBreakBoundary(int32_t offset)321 UBool Target::isBreakBoundary(int32_t offset)
322 {
323 return ubrk_isBoundary(charBreakIterator, offset);
324 }
325
nextBreakBoundary(int32_t offset)326 int32_t Target::nextBreakBoundary(int32_t offset)
327 {
328 return ubrk_following(charBreakIterator, offset);
329 }
330
nextSafeBoundary(int32_t offset)331 int32_t Target::nextSafeBoundary(int32_t offset)
332 {
333 while (offset < targetLength) {
334 //UChar ch = charAt(offset);
335 UChar ch = targetBuffer[offset];
336
337 if (U_IS_LEAD(ch) || ! ucol_unsafeCP(ch, coll)) {
338 return offset;
339 }
340
341 offset += 1;
342 }
343
344 return targetLength;
345 }
346
isIdentical(UnicodeString & pattern,int32_t start,int32_t end)347 UBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end)
348 {
349 if (strength < UCOL_IDENTICAL) {
350 return TRUE;
351 }
352
353 // Note: We could use Normalizer::compare() or similar, but for short strings
354 // which may not be in FCD it might be faster to just NFD them.
355 UErrorCode status = U_ZERO_ERROR;
356 UnicodeString t2, p2;
357 nfd.normalize(UnicodeString(FALSE, targetBuffer + start, end - start), t2, status);
358 nfd.normalize(pattern, p2, status);
359 // return FALSE if NFD failed
360 return U_SUCCESS(status) && t2 == p2;
361 }
362
363 #define HASH_TABLE_SIZE 257
364
365 class BadCharacterTable : public UMemory
366 {
367 public:
368 BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status);
369 ~BadCharacterTable();
370
371 int32_t operator[](uint32_t ce) const;
372 int32_t getMaxSkip() const;
373 int32_t minLengthInChars(int32_t index);
374
375 private:
376 static int32_t hash(uint32_t ce);
377
378 int32_t maxSkip;
379 int32_t badCharacterTable[HASH_TABLE_SIZE];
380
381 int32_t *minLengthCache;
382 };
383
BadCharacterTable(CEList & patternCEs,CollData * data,UErrorCode & status)384 BadCharacterTable::BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status)
385 : minLengthCache(NULL)
386 {
387 int32_t plen = patternCEs.size();
388
389 // **** need a better way to deal with this ****
390 if (U_FAILURE(status) || plen == 0) {
391 return;
392 }
393
394 int32_t *history = NEW_ARRAY(int32_t, plen);
395
396 if (history == NULL) {
397 status = U_MEMORY_ALLOCATION_ERROR;
398 return;
399 }
400
401 for (int32_t i = 0; i < plen; i += 1) {
402 history[i] = -1;
403 }
404
405 minLengthCache = NEW_ARRAY(int32_t, plen + 1);
406
407 if (minLengthCache == NULL) {
408 DELETE_ARRAY(history);
409 status = U_MEMORY_ALLOCATION_ERROR;
410 return;
411 }
412
413 maxSkip = minLengthCache[0] = data->minLengthInChars(&patternCEs, 0, history);
414
415 for(int32_t j = 0; j < HASH_TABLE_SIZE; j += 1) {
416 badCharacterTable[j] = maxSkip;
417 }
418
419 for(int32_t p = 1; p < plen; p += 1) {
420 minLengthCache[p] = data->minLengthInChars(&patternCEs, p, history);
421
422 // Make sure this entry is not bigger than the previous one.
423 // Otherwise, we might skip too far in some cases.
424 if (minLengthCache[p] < 0 || minLengthCache[p] > minLengthCache[p - 1]) {
425 minLengthCache[p] = minLengthCache[p - 1];
426 }
427 }
428
429 minLengthCache[plen] = 0;
430
431 for(int32_t p = 0; p < plen - 1; p += 1) {
432 badCharacterTable[hash(patternCEs[p])] = minLengthCache[p + 1];
433 }
434
435 DELETE_ARRAY(history);
436 }
437
~BadCharacterTable()438 BadCharacterTable::~BadCharacterTable()
439 {
440 DELETE_ARRAY(minLengthCache);
441 }
442
operator [](uint32_t ce) const443 int32_t BadCharacterTable::operator[](uint32_t ce) const
444 {
445 return badCharacterTable[hash(ce)];
446 }
447
getMaxSkip() const448 int32_t BadCharacterTable::getMaxSkip() const
449 {
450 return maxSkip;
451 }
452
minLengthInChars(int32_t index)453 int32_t BadCharacterTable::minLengthInChars(int32_t index)
454 {
455 return minLengthCache[index];
456 }
457
hash(uint32_t ce)458 int32_t BadCharacterTable::hash(uint32_t ce)
459 {
460 return UCOL_PRIMARYORDER(ce) % HASH_TABLE_SIZE;
461 }
462
463 class GoodSuffixTable : public UMemory
464 {
465 public:
466 GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status);
467 ~GoodSuffixTable();
468
469 int32_t operator[](int32_t offset) const;
470
471 private:
472 int32_t *goodSuffixTable;
473 };
474
GoodSuffixTable(CEList & patternCEs,BadCharacterTable & badCharacterTable,UErrorCode & status)475 GoodSuffixTable::GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status)
476 : goodSuffixTable(NULL)
477 {
478 int32_t patlen = patternCEs.size();
479
480 // **** need a better way to deal with this ****
481 if (U_FAILURE(status) || patlen <= 0) {
482 return;
483 }
484
485 int32_t *suff = NEW_ARRAY(int32_t, patlen);
486 int32_t start = patlen - 1, end = - 1;
487 int32_t maxSkip = badCharacterTable.getMaxSkip();
488
489 if (suff == NULL) {
490 status = U_MEMORY_ALLOCATION_ERROR;
491 return;
492 }
493
494 // initialze suff
495 suff[patlen - 1] = patlen;
496
497 for (int32_t i = patlen - 2; i >= 0; i -= 1) {
498 // (i > start) means we're inside the last suffix match we found
499 // ((patlen - 1) - end) is how far the end of that match is from end of pattern
500 // (i - start) is how far we are from start of that match
501 // (i + (patlen - 1) - end) is index of same character at end of pattern
502 // so if any suffix match at that character doesn't extend beyond the last match,
503 // it's the suffix for this character as well
504 if (i > start && suff[i + patlen - 1 - end] < i - start) {
505 suff[i] = suff[i + patlen - 1 - end];
506 } else {
507 start = end = i;
508
509 int32_t s = patlen;
510
511 while (start >= 0 && patternCEs[start] == patternCEs[--s]) {
512 start -= 1;
513 }
514
515 suff[i] = end - start;
516 }
517 }
518
519 // now build goodSuffixTable
520 goodSuffixTable = NEW_ARRAY(int32_t, patlen);
521
522 if (goodSuffixTable == NULL) {
523 DELETE_ARRAY(suff);
524 status = U_MEMORY_ALLOCATION_ERROR;
525 return;
526 }
527
528
529 // initialize entries to minLengthInChars of the pattern
530 for (int32_t i = 0; i < patlen; i += 1) {
531 goodSuffixTable[i] = maxSkip;
532 }
533
534 int32_t prefix = 0;
535
536 for (int32_t i = patlen - /*1*/ 2; i >= 0; i -= 1) {
537 if (suff[i] == i + 1) {
538 // this matching suffix is a prefix of the pattern
539 int32_t prefixSkip = badCharacterTable.minLengthInChars(i + 1);
540
541 // for any mis-match before this suffix, we should skip
542 // so that the front of the pattern (i.e. the prefix)
543 // lines up with the front of the suffix.
544 // (patlen - 1 - i) is the start of the suffix
545 while (prefix < patlen - 1 - i) {
546 // value of maxSkip means never set...
547 if (goodSuffixTable[prefix] == maxSkip) {
548 goodSuffixTable[prefix] = prefixSkip;
549 }
550
551 prefix += 1;
552 }
553 }
554 }
555
556 for (int32_t i = 0; i < patlen - 1; i += 1) {
557 goodSuffixTable[patlen - 1 - suff[i]] = badCharacterTable.minLengthInChars(i + 1);
558 }
559
560 DELETE_ARRAY(suff);
561 }
562
~GoodSuffixTable()563 GoodSuffixTable::~GoodSuffixTable()
564 {
565 DELETE_ARRAY(goodSuffixTable);
566 }
567
operator [](int32_t offset) const568 int32_t GoodSuffixTable::operator[](int32_t offset) const
569 {
570 return goodSuffixTable[offset];
571 }
572
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch)573 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch)
574
575
576 UBool BoyerMooreSearch::empty()
577 {
578 return patCEs->size() <= 0;
579 }
580
getData()581 CollData *BoyerMooreSearch::getData()
582 {
583 return data;
584 }
585
getPatternCEs()586 CEList *BoyerMooreSearch::getPatternCEs()
587 {
588 return patCEs;
589 }
590
getBadCharacterTable()591 BadCharacterTable *BoyerMooreSearch::getBadCharacterTable()
592 {
593 return badCharacterTable;
594 }
595
getGoodSuffixTable()596 GoodSuffixTable *BoyerMooreSearch::getGoodSuffixTable()
597 {
598 return goodSuffixTable;
599 }
600
BoyerMooreSearch(CollData * theData,const UnicodeString & patternString,const UnicodeString * targetString,UErrorCode & status)601 BoyerMooreSearch::BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString,
602 UErrorCode &status)
603 : data(theData), patCEs(NULL), badCharacterTable(NULL), goodSuffixTable(NULL), pattern(patternString), target(NULL)
604 {
605
606 if (U_FAILURE(status)) {
607 return;
608 }
609
610 UCollator *collator = data->getCollator();
611
612 patCEs = new CEList(collator, patternString, status);
613
614 if (patCEs == NULL || U_FAILURE(status)) {
615 return;
616 }
617
618 badCharacterTable = new BadCharacterTable(*patCEs, data, status);
619
620 if (badCharacterTable == NULL || U_FAILURE(status)) {
621 return;
622 }
623
624 goodSuffixTable = new GoodSuffixTable(*patCEs, *badCharacterTable, status);
625
626 if (targetString != NULL) {
627 target = new Target(collator, targetString, patCEs->size(), status);
628 }
629 }
630
~BoyerMooreSearch()631 BoyerMooreSearch::~BoyerMooreSearch()
632 {
633 delete target;
634 delete goodSuffixTable;
635 delete badCharacterTable;
636 delete patCEs;
637 }
638
setTargetString(const UnicodeString * targetString,UErrorCode & status)639 void BoyerMooreSearch::setTargetString(const UnicodeString *targetString, UErrorCode &status)
640 {
641 if (U_FAILURE(status)) {
642 return;
643 }
644
645 if (target == NULL) {
646 target = new Target(data->getCollator(), targetString, patCEs->size(), status);
647 } else {
648 target->setTargetString(targetString);
649 }
650 }
651
652 // **** main flow of this code from Laura Werner's "Unicode Text Searching in Java" paper. ****
653 /*
654 * TODO:
655 * * deal with trailing (and leading?) ignorables.
656 * * Adding BoyerMooreSearch object slowed it down. How can we speed it up?
657 */
search(int32_t offset,int32_t & start,int32_t & end)658 UBool BoyerMooreSearch::search(int32_t offset, int32_t &start, int32_t &end)
659 {
660 /*UCollator *coll =*/ data->getCollator();
661 int32_t plen = patCEs->size();
662 int32_t tlen = target->stringLength();
663 int32_t maxSkip = badCharacterTable->getMaxSkip();
664 int32_t tOffset = offset + maxSkip;
665
666 if (plen <= 0) {
667 // Searching for a zero length pattern always fails.
668 start = end = -1;
669 return FALSE;
670 }
671
672 while (tOffset <= tlen) {
673 int32_t pIndex = plen - 1;
674 int32_t tIndex = 0;
675 int32_t lIndex = 0;
676
677 if (tOffset < tlen) {
678 // **** we really want to skip ahead enough to ****
679 // **** be sure we get at least 1 non-ignorable ****
680 // **** CE after the end of the pattern. ****
681 int32_t next = target->nextSafeBoundary(tOffset + 1);
682
683 target->setOffset(next);
684
685 for (lIndex = 0; ; lIndex += 1) {
686 const CEI *cei = target->prevCE(lIndex);
687 int32_t low = cei->lowOffset;
688 int32_t high = cei->highOffset;
689
690 if (high == 0 || (low < high && low <= tOffset)) {
691 if (low < tOffset) {
692 while (lIndex >= 0 && target->prevCE(lIndex)->highOffset == high) {
693 lIndex -= 1;
694 }
695
696 if (high > tOffset) {
697 tOffset = high;
698 }
699 }
700
701 break;
702 }
703 }
704 } else {
705 target->setLast(tOffset);
706 lIndex = 0;
707 }
708
709 tIndex = ++lIndex;
710
711 // Iterate backward until we hit the beginning of the pattern
712 while (pIndex >= 0) {
713 uint32_t pce = (*patCEs)[pIndex];
714 const CEI *tcei = target->prevCE(tIndex++);
715
716
717 if (tcei->order != pce) {
718 // There is a mismatch at this position. Decide how far
719 // over to shift the pattern, then try again.
720
721 int32_t gsOffset = tOffset + (*goodSuffixTable)[pIndex];
722 #ifdef EXTRA_CAUTIOUS
723 int32_t old = tOffset;
724 #endif
725
726 tOffset += (*badCharacterTable)[tcei->order] - badCharacterTable->minLengthInChars(pIndex + 1);
727
728 if (gsOffset > tOffset) {
729 tOffset = gsOffset;
730 }
731
732 #ifdef EXTRA_CAUTIOUS
733 // Make sure we don't skip backwards...
734 if (tOffset <= old) {
735 tOffset = old + 1;
736 }
737 #endif
738
739 break;
740 }
741
742 pIndex -= 1;
743 }
744
745 if (pIndex < 0) {
746 // We made it back to the beginning of the pattern,
747 // which means we matched it all. Return the location.
748 const CEI firstCEI = *target->prevCE(tIndex - 1);
749 const CEI lastCEI = *target->prevCE(lIndex);
750 int32_t mStart = firstCEI.lowOffset;
751 int32_t minLimit = lastCEI.lowOffset;
752 int32_t maxLimit = lastCEI.highOffset;
753 int32_t mLimit;
754 UBool found = TRUE;
755
756 target->setOffset(/*tOffset*/maxLimit);
757
758 const CEI nextCEI = *target->nextCE(0);
759
760 if (nextCEI.lowOffset > maxLimit) {
761 maxLimit = nextCEI.lowOffset;
762 }
763
764 if (nextCEI.lowOffset == nextCEI.highOffset && nextCEI.order != (uint32_t)UCOL_NULLORDER) {
765 found = FALSE;
766 }
767
768 if (! target->isBreakBoundary(mStart)) {
769 found = FALSE;
770 }
771
772 if (firstCEI.lowOffset == firstCEI.highOffset) {
773 found = FALSE;
774 }
775
776 mLimit = maxLimit;
777 if (minLimit < maxLimit) {
778 int32_t nbb = target->nextBreakBoundary(minLimit);
779
780 if (nbb >= lastCEI.highOffset) {
781 mLimit = nbb;
782 }
783 }
784
785 if (mLimit > maxLimit) {
786 found = FALSE;
787 }
788
789 if (! target->isBreakBoundary(mLimit)) {
790 found = FALSE;
791 }
792
793 if (! target->isIdentical(pattern, mStart, mLimit)) {
794 found = FALSE;
795 }
796
797 if (found) {
798 start = mStart;
799 end = mLimit;
800
801 return TRUE;
802 }
803
804 tOffset += (*goodSuffixTable)[0]; // really? Maybe += 1 or += maxSkip?
805 }
806 // Otherwise, we're here because of a mismatch, so keep going....
807 }
808
809 // no match
810 start = -1;
811 end = -1;
812 return FALSE;
813 }
814
815 U_NAMESPACE_END
816
817 #endif // #if !UCONFIG_NO_COLLATION
818