1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // file: rbbi_cache.cpp
5
6 #include "unicode/utypes.h"
7
8 #if !UCONFIG_NO_BREAK_ITERATION
9
10 #include "unicode/ubrk.h"
11 #include "unicode/rbbi.h"
12
13 #include "rbbi_cache.h"
14
15 #include "brkeng.h"
16 #include "cmemory.h"
17 #include "rbbidata.h"
18 #include "rbbirb.h"
19 #include "uassert.h"
20 #include "uvectr32.h"
21
22 U_NAMESPACE_BEGIN
23
24 /*
25 * DictionaryCache implementation
26 */
27
DictionaryCache(RuleBasedBreakIterator * bi,UErrorCode & status)28 RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
29 fBI(bi), fBreaks(status), fPositionInCache(-1),
30 fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
31 }
32
~DictionaryCache()33 RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
34 }
35
reset()36 void RuleBasedBreakIterator::DictionaryCache::reset() {
37 fPositionInCache = -1;
38 fStart = 0;
39 fLimit = 0;
40 fFirstRuleStatusIndex = 0;
41 fOtherRuleStatusIndex = 0;
42 fBreaks.removeAllElements();
43 }
44
following(int32_t fromPos,int32_t * result,int32_t * statusIndex)45 UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
46 if (fromPos >= fLimit || fromPos < fStart) {
47 fPositionInCache = -1;
48 return FALSE;
49 }
50
51 // Sequential iteration, move from previous boundary to the following
52
53 int32_t r = 0;
54 if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
55 ++fPositionInCache;
56 if (fPositionInCache >= fBreaks.size()) {
57 fPositionInCache = -1;
58 return FALSE;
59 }
60 r = fBreaks.elementAti(fPositionInCache);
61 U_ASSERT(r > fromPos);
62 *result = r;
63 *statusIndex = fOtherRuleStatusIndex;
64 return TRUE;
65 }
66
67 // Random indexing. Linear search for the boundary following the given position.
68
69 for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
70 r= fBreaks.elementAti(fPositionInCache);
71 if (r > fromPos) {
72 *result = r;
73 *statusIndex = fOtherRuleStatusIndex;
74 return TRUE;
75 }
76 }
77 UPRV_UNREACHABLE;
78 }
79
80
preceding(int32_t fromPos,int32_t * result,int32_t * statusIndex)81 UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
82 if (fromPos <= fStart || fromPos > fLimit) {
83 fPositionInCache = -1;
84 return FALSE;
85 }
86
87 if (fromPos == fLimit) {
88 fPositionInCache = fBreaks.size() - 1;
89 if (fPositionInCache >= 0) {
90 U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
91 }
92 }
93
94 int32_t r;
95 if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
96 --fPositionInCache;
97 r = fBreaks.elementAti(fPositionInCache);
98 U_ASSERT(r < fromPos);
99 *result = r;
100 *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
101 return TRUE;
102 }
103
104 if (fPositionInCache == 0) {
105 fPositionInCache = -1;
106 return FALSE;
107 }
108
109 for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
110 r = fBreaks.elementAti(fPositionInCache);
111 if (r < fromPos) {
112 *result = r;
113 *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
114 return TRUE;
115 }
116 }
117 UPRV_UNREACHABLE;
118 }
119
populateDictionary(int32_t startPos,int32_t endPos,int32_t firstRuleStatus,int32_t otherRuleStatus)120 void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
121 int32_t firstRuleStatus, int32_t otherRuleStatus) {
122 if ((endPos - startPos) <= 1) {
123 return;
124 }
125
126 reset();
127 fFirstRuleStatusIndex = firstRuleStatus;
128 fOtherRuleStatusIndex = otherRuleStatus;
129
130 int32_t rangeStart = startPos;
131 int32_t rangeEnd = endPos;
132
133 uint16_t category;
134 int32_t current;
135 UErrorCode status = U_ZERO_ERROR;
136 int32_t foundBreakCount = 0;
137 UText *text = &fBI->fText;
138
139 // Loop through the text, looking for ranges of dictionary characters.
140 // For each span, find the appropriate break engine, and ask it to find
141 // any breaks within the span.
142
143 utext_setNativeIndex(text, rangeStart);
144 UChar32 c = utext_current32(text);
145 category = UTRIE2_GET16(fBI->fData->fTrie, c);
146
147 while(U_SUCCESS(status)) {
148 while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd && (category & 0x4000) == 0) {
149 utext_next32(text); // TODO: cleaner loop structure.
150 c = utext_current32(text);
151 category = UTRIE2_GET16(fBI->fData->fTrie, c);
152 }
153 if (current >= rangeEnd) {
154 break;
155 }
156
157 // We now have a dictionary character. Get the appropriate language object
158 // to deal with it.
159 const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
160
161 // Ask the language object if there are any breaks. It will add them to the cache and
162 // leave the text pointer on the other side of its range, ready to search for the next one.
163 if (lbe != NULL) {
164 foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
165 }
166
167 // Reload the loop variables for the next go-round
168 c = utext_current32(text);
169 category = UTRIE2_GET16(fBI->fData->fTrie, c);
170 }
171
172 // If we found breaks, ensure that the first and last entries are
173 // the original starting and ending position. And initialize the
174 // cache iteration position to the first entry.
175
176 // printf("foundBreakCount = %d\n", foundBreakCount);
177 if (foundBreakCount > 0) {
178 U_ASSERT(foundBreakCount == fBreaks.size());
179 if (startPos < fBreaks.elementAti(0)) {
180 // The dictionary did not place a boundary at the start of the segment of text.
181 // Add one now. This should not commonly happen, but it would be easy for interactions
182 // of the rules for dictionary segments and the break engine implementations to
183 // inadvertently cause it. Cover it here, just in case.
184 fBreaks.insertElementAt(startPos, 0, status);
185 }
186 if (endPos > fBreaks.peeki()) {
187 fBreaks.push(endPos, status);
188 }
189 fPositionInCache = 0;
190 // Note: Dictionary matching may extend beyond the original limit.
191 fStart = fBreaks.elementAti(0);
192 fLimit = fBreaks.peeki();
193 } else {
194 // there were no language-based breaks, even though the segment contained
195 // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
196 // for this range will fail, and the calling code will fall back to the rule based boundaries.
197 }
198 }
199
200
201 /*
202 * BreakCache implemetation
203 */
204
BreakCache(RuleBasedBreakIterator * bi,UErrorCode & status)205 RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
206 fBI(bi), fSideBuffer(status) {
207 reset();
208 }
209
210
~BreakCache()211 RuleBasedBreakIterator::BreakCache::~BreakCache() {
212 }
213
214
reset(int32_t pos,int32_t ruleStatus)215 void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
216 fStartBufIdx = 0;
217 fEndBufIdx = 0;
218 fTextIdx = pos;
219 fBufIdx = 0;
220 fBoundaries[0] = pos;
221 fStatuses[0] = (uint16_t)ruleStatus;
222 }
223
224
current()225 int32_t RuleBasedBreakIterator::BreakCache::current() {
226 fBI->fPosition = fTextIdx;
227 fBI->fRuleStatusIndex = fStatuses[fBufIdx];
228 fBI->fDone = FALSE;
229 return fTextIdx;
230 }
231
232
following(int32_t startPos,UErrorCode & status)233 void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
234 if (U_FAILURE(status)) {
235 return;
236 }
237 if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
238 // startPos is in the cache. Do a next() from that position.
239 // TODO: an awkward set of interactions with bi->fDone
240 // seek() does not clear it; it can't because of interactions with populateNear().
241 // next() does not clear it in the fast-path case, where everything matters. Maybe it should.
242 // So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
243 fBI->fDone = false;
244 next();
245 }
246 return;
247 }
248
249
preceding(int32_t startPos,UErrorCode & status)250 void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
251 if (U_FAILURE(status)) {
252 return;
253 }
254 if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
255 if (startPos == fTextIdx) {
256 previous(status);
257 } else {
258 // seek() leaves the BreakCache positioned at the preceding boundary
259 // if the requested position is between two bounaries.
260 // current() pushes the BreakCache position out to the BreakIterator itself.
261 U_ASSERT(startPos > fTextIdx);
262 current();
263 }
264 }
265 return;
266 }
267
268
269 /*
270 * Out-of-line code for BreakCache::next().
271 * Cache does not already contain the boundary
272 */
nextOL()273 void RuleBasedBreakIterator::BreakCache::nextOL() {
274 fBI->fDone = !populateFollowing();
275 fBI->fPosition = fTextIdx;
276 fBI->fRuleStatusIndex = fStatuses[fBufIdx];
277 return;
278 }
279
280
previous(UErrorCode & status)281 void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
282 if (U_FAILURE(status)) {
283 return;
284 }
285 int32_t initialBufIdx = fBufIdx;
286 if (fBufIdx == fStartBufIdx) {
287 // At start of cache. Prepend to it.
288 populatePreceding(status);
289 } else {
290 // Cache already holds the next boundary
291 fBufIdx = modChunkSize(fBufIdx - 1);
292 fTextIdx = fBoundaries[fBufIdx];
293 }
294 fBI->fDone = (fBufIdx == initialBufIdx);
295 fBI->fPosition = fTextIdx;
296 fBI->fRuleStatusIndex = fStatuses[fBufIdx];
297 return;
298 }
299
300
seek(int32_t pos)301 UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
302 if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
303 return FALSE;
304 }
305 if (pos == fBoundaries[fStartBufIdx]) {
306 // Common case: seek(0), from BreakIterator::first()
307 fBufIdx = fStartBufIdx;
308 fTextIdx = fBoundaries[fBufIdx];
309 return TRUE;
310 }
311 if (pos == fBoundaries[fEndBufIdx]) {
312 fBufIdx = fEndBufIdx;
313 fTextIdx = fBoundaries[fBufIdx];
314 return TRUE;
315 }
316
317 int32_t min = fStartBufIdx;
318 int32_t max = fEndBufIdx;
319 while (min != max) {
320 int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
321 probe = modChunkSize(probe);
322 if (fBoundaries[probe] > pos) {
323 max = probe;
324 } else {
325 min = modChunkSize(probe + 1);
326 }
327 }
328 U_ASSERT(fBoundaries[max] > pos);
329 fBufIdx = modChunkSize(max - 1);
330 fTextIdx = fBoundaries[fBufIdx];
331 U_ASSERT(fTextIdx <= pos);
332 return TRUE;
333 }
334
335
populateNear(int32_t position,UErrorCode & status)336 UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
337 if (U_FAILURE(status)) {
338 return FALSE;
339 }
340 U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
341
342 // Find a boundary somewhere in the vicinity of the requested position.
343 // Depending on the safe rules and the text data, it could be either before, at, or after
344 // the requested position.
345
346
347 // If the requested position is not near already cached positions, clear the existing cache,
348 // find a near-by boundary and begin new cache contents there.
349
350 if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
351 int32_t aBoundary = 0;
352 int32_t ruleStatusIndex = 0;
353 if (position > 20) {
354 int32_t backupPos = fBI->handleSafePrevious(position);
355
356 if (backupPos > 0) {
357 // Advance to the boundary following the backup position.
358 // There is a complication: the safe reverse rules identify pairs of code points
359 // that are safe. If advancing from the safe point moves forwards by less than
360 // two code points, we need to advance one more time to ensure that the boundary
361 // is good, including a correct rules status value.
362 //
363 fBI->fPosition = backupPos;
364 aBoundary = fBI->handleNext();
365 if (aBoundary <= backupPos + 4) {
366 // +4 is a quick test for possibly having advanced only one codepoint.
367 // Four being the length of the longest potential code point, a supplementary in UTF-8
368 utext_setNativeIndex(&fBI->fText, aBoundary);
369 if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) {
370 // The initial handleNext() only advanced by a single code point. Go again.
371 aBoundary = fBI->handleNext(); // Safe rules identify safe pairs.
372 }
373 }
374 ruleStatusIndex = fBI->fRuleStatusIndex;
375 }
376 }
377 reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point.
378 }
379
380 // Fill in boundaries between existing cache content and the new requested position.
381
382 if (fBoundaries[fEndBufIdx] < position) {
383 // The last position in the cache precedes the requested position.
384 // Add following position(s) to the cache.
385 while (fBoundaries[fEndBufIdx] < position) {
386 if (!populateFollowing()) {
387 UPRV_UNREACHABLE;
388 }
389 }
390 fBufIdx = fEndBufIdx; // Set iterator position to the end of the buffer.
391 fTextIdx = fBoundaries[fBufIdx]; // Required because populateFollowing may add extra boundaries.
392 while (fTextIdx > position) { // Move backwards to a position at or preceding the requested pos.
393 previous(status);
394 }
395 return true;
396 }
397
398 if (fBoundaries[fStartBufIdx] > position) {
399 // The first position in the cache is beyond the requested position.
400 // back up more until we get a boundary <= the requested position.
401 while (fBoundaries[fStartBufIdx] > position) {
402 populatePreceding(status);
403 }
404 fBufIdx = fStartBufIdx; // Set iterator position to the start of the buffer.
405 fTextIdx = fBoundaries[fBufIdx]; // Required because populatePreceding may add extra boundaries.
406 while (fTextIdx < position) { // Move forwards to a position at or following the requested pos.
407 next();
408 }
409 if (fTextIdx > position) {
410 // If position is not itself a boundary, the next() loop above will overshoot.
411 // Back up one, leaving cache position at the boundary preceding the requested position.
412 previous(status);
413 }
414 return true;
415 }
416
417 U_ASSERT(fTextIdx == position);
418 return true;
419 }
420
421
422
populateFollowing()423 UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
424 int32_t fromPosition = fBoundaries[fEndBufIdx];
425 int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
426 int32_t pos = 0;
427 int32_t ruleStatusIdx = 0;
428
429 if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
430 addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
431 return TRUE;
432 }
433
434 fBI->fPosition = fromPosition;
435 pos = fBI->handleNext();
436 if (pos == UBRK_DONE) {
437 return FALSE;
438 }
439
440 ruleStatusIdx = fBI->fRuleStatusIndex;
441 if (fBI->fDictionaryCharCount > 0) {
442 // The text segment obtained from the rules includes dictionary characters.
443 // Subdivide it, with subdivided results going into the dictionary cache.
444 fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
445 if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
446 addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
447 return TRUE;
448 // TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
449 // But be careful with interactions with populateNear().
450 }
451 }
452
453 // Rule based segment did not include dictionary characters.
454 // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
455 // meaning that we didn't take the return, above.
456 // Add its end point to the cache.
457 addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
458
459 // Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
460 // (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
461 //
462 for (int count=0; count<6; ++count) {
463 pos = fBI->handleNext();
464 if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) {
465 break;
466 }
467 addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
468 }
469
470 return TRUE;
471 }
472
473
populatePreceding(UErrorCode & status)474 UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
475 if (U_FAILURE(status)) {
476 return FALSE;
477 }
478
479 int32_t fromPosition = fBoundaries[fStartBufIdx];
480 if (fromPosition == 0) {
481 return FALSE;
482 }
483
484 int32_t position = 0;
485 int32_t positionStatusIdx = 0;
486
487 if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
488 addPreceding(position, positionStatusIdx, UpdateCachePosition);
489 return TRUE;
490 }
491
492 int32_t backupPosition = fromPosition;
493
494 // Find a boundary somewhere preceding the first already-cached boundary
495 do {
496 backupPosition = backupPosition - 30;
497 if (backupPosition <= 0) {
498 backupPosition = 0;
499 } else {
500 backupPosition = fBI->handleSafePrevious(backupPosition);
501 }
502 if (backupPosition == UBRK_DONE || backupPosition == 0) {
503 position = 0;
504 positionStatusIdx = 0;
505 } else {
506 // Advance to the boundary following the backup position.
507 // There is a complication: the safe reverse rules identify pairs of code points
508 // that are safe. If advancing from the safe point moves forwards by less than
509 // two code points, we need to advance one more time to ensure that the boundary
510 // is good, including a correct rules status value.
511 //
512 fBI->fPosition = backupPosition;
513 position = fBI->handleNext();
514 if (position <= backupPosition + 4) {
515 // +4 is a quick test for possibly having advanced only one codepoint.
516 // Four being the length of the longest potential code point, a supplementary in UTF-8
517 utext_setNativeIndex(&fBI->fText, position);
518 if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) {
519 // The initial handleNext() only advanced by a single code point. Go again.
520 position = fBI->handleNext(); // Safe rules identify safe pairs.
521 }
522 }
523 positionStatusIdx = fBI->fRuleStatusIndex;
524 }
525 } while (position >= fromPosition);
526
527 // Find boundaries between the one we just located and the first already-cached boundary
528 // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
529
530 fSideBuffer.removeAllElements();
531 fSideBuffer.addElement(position, status);
532 fSideBuffer.addElement(positionStatusIdx, status);
533
534 do {
535 int32_t prevPosition = fBI->fPosition = position;
536 int32_t prevStatusIdx = positionStatusIdx;
537 position = fBI->handleNext();
538 positionStatusIdx = fBI->fRuleStatusIndex;
539 if (position == UBRK_DONE) {
540 break;
541 }
542
543 UBool segmentHandledByDictionary = FALSE;
544 if (fBI->fDictionaryCharCount != 0) {
545 // Segment from the rules includes dictionary characters.
546 // Subdivide it, with subdivided results going into the dictionary cache.
547 int32_t dictSegEndPosition = position;
548 fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
549 while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
550 segmentHandledByDictionary = true;
551 U_ASSERT(position > prevPosition);
552 if (position >= fromPosition) {
553 break;
554 }
555 U_ASSERT(position <= dictSegEndPosition);
556 fSideBuffer.addElement(position, status);
557 fSideBuffer.addElement(positionStatusIdx, status);
558 prevPosition = position;
559 }
560 U_ASSERT(position==dictSegEndPosition || position>=fromPosition);
561 }
562
563 if (!segmentHandledByDictionary && position < fromPosition) {
564 fSideBuffer.addElement(position, status);
565 fSideBuffer.addElement(positionStatusIdx, status);
566 }
567 } while (position < fromPosition);
568
569 // Move boundaries from the side buffer to the main circular buffer.
570 UBool success = FALSE;
571 if (!fSideBuffer.isEmpty()) {
572 positionStatusIdx = fSideBuffer.popi();
573 position = fSideBuffer.popi();
574 addPreceding(position, positionStatusIdx, UpdateCachePosition);
575 success = TRUE;
576 }
577
578 while (!fSideBuffer.isEmpty()) {
579 positionStatusIdx = fSideBuffer.popi();
580 position = fSideBuffer.popi();
581 if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
582 // No space in circular buffer to hold a new preceding result while
583 // also retaining the current cache (iteration) position.
584 // Bailing out is safe; the cache will refill again if needed.
585 break;
586 }
587 }
588
589 return success;
590 }
591
592
addFollowing(int32_t position,int32_t ruleStatusIdx,UpdatePositionValues update)593 void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
594 U_ASSERT(position > fBoundaries[fEndBufIdx]);
595 U_ASSERT(ruleStatusIdx <= UINT16_MAX);
596 int32_t nextIdx = modChunkSize(fEndBufIdx + 1);
597 if (nextIdx == fStartBufIdx) {
598 fStartBufIdx = modChunkSize(fStartBufIdx + 6); // TODO: experiment. Probably revert to 1.
599 }
600 fBoundaries[nextIdx] = position;
601 fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
602 fEndBufIdx = nextIdx;
603 if (update == UpdateCachePosition) {
604 // Set current position to the newly added boundary.
605 fBufIdx = nextIdx;
606 fTextIdx = position;
607 } else {
608 // Retaining the original cache position.
609 // Check if the added boundary wraps around the buffer, and would over-write the original position.
610 // It's the responsibility of callers of this function to not add too many.
611 U_ASSERT(nextIdx != fBufIdx);
612 }
613 }
614
addPreceding(int32_t position,int32_t ruleStatusIdx,UpdatePositionValues update)615 bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
616 U_ASSERT(position < fBoundaries[fStartBufIdx]);
617 U_ASSERT(ruleStatusIdx <= UINT16_MAX);
618 int32_t nextIdx = modChunkSize(fStartBufIdx - 1);
619 if (nextIdx == fEndBufIdx) {
620 if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
621 // Failure. The insertion of the new boundary would claim the buffer position that is the
622 // current iteration position. And we also want to retain the current iteration position.
623 // (The buffer is already completely full of entries that precede the iteration position.)
624 return false;
625 }
626 fEndBufIdx = modChunkSize(fEndBufIdx - 1);
627 }
628 fBoundaries[nextIdx] = position;
629 fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
630 fStartBufIdx = nextIdx;
631 if (update == UpdateCachePosition) {
632 fBufIdx = nextIdx;
633 fTextIdx = position;
634 }
635 return true;
636 }
637
638
dumpCache()639 void RuleBasedBreakIterator::BreakCache::dumpCache() {
640 #ifdef RBBI_DEBUG
641 RBBIDebugPrintf("fTextIdx:%d fBufIdx:%d\n", fTextIdx, fBufIdx);
642 for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) {
643 RBBIDebugPrintf("%d %d\n", i, fBoundaries[i]);
644 if (i == fEndBufIdx) {
645 break;
646 }
647 }
648 #endif
649 }
650
651 U_NAMESPACE_END
652
653 #endif // #if !UCONFIG_NO_BREAK_ITERATION
654