1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // file: rbbi_cache.cpp
5
6 #include "unicode/utypes.h"
7
8 #if !UCONFIG_NO_BREAK_ITERATION
9
10 #include "unicode/ubrk.h"
11 #include "unicode/rbbi.h"
12
13 #include "rbbi_cache.h"
14
15 #include "brkeng.h"
16 #include "cmemory.h"
17 #include "rbbidata.h"
18 #include "rbbirb.h"
19 #include "uassert.h"
20 #include "uvectr32.h"
21
22 U_NAMESPACE_BEGIN
23
24 /*
25 * DictionaryCache implementation
26 */
27
DictionaryCache(RuleBasedBreakIterator * bi,UErrorCode & status)28 RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
29 fBI(bi), fBreaks(status), fPositionInCache(-1),
30 fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
31 }
32
~DictionaryCache()33 RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
34 }
35
reset()36 void RuleBasedBreakIterator::DictionaryCache::reset() {
37 fPositionInCache = -1;
38 fStart = 0;
39 fLimit = 0;
40 fFirstRuleStatusIndex = 0;
41 fOtherRuleStatusIndex = 0;
42 fBreaks.removeAllElements();
43 }
44
following(int32_t fromPos,int32_t * result,int32_t * statusIndex)45 UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
46 if (fromPos >= fLimit || fromPos < fStart) {
47 fPositionInCache = -1;
48 return FALSE;
49 }
50
51 // Sequential iteration, move from previous boundary to the following
52
53 int32_t r = 0;
54 if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
55 ++fPositionInCache;
56 if (fPositionInCache >= fBreaks.size()) {
57 fPositionInCache = -1;
58 return FALSE;
59 }
60 r = fBreaks.elementAti(fPositionInCache);
61 U_ASSERT(r > fromPos);
62 *result = r;
63 *statusIndex = fOtherRuleStatusIndex;
64 return TRUE;
65 }
66
67 // Random indexing. Linear search for the boundary following the given position.
68
69 for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
70 r= fBreaks.elementAti(fPositionInCache);
71 if (r > fromPos) {
72 *result = r;
73 *statusIndex = fOtherRuleStatusIndex;
74 return TRUE;
75 }
76 }
77 U_ASSERT(FALSE);
78 fPositionInCache = -1;
79 return FALSE;
80 }
81
82
preceding(int32_t fromPos,int32_t * result,int32_t * statusIndex)83 UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
84 if (fromPos <= fStart || fromPos > fLimit) {
85 fPositionInCache = -1;
86 return FALSE;
87 }
88
89 if (fromPos == fLimit) {
90 fPositionInCache = fBreaks.size() - 1;
91 if (fPositionInCache >= 0) {
92 U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
93 }
94 }
95
96 int32_t r;
97 if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
98 --fPositionInCache;
99 r = fBreaks.elementAti(fPositionInCache);
100 U_ASSERT(r < fromPos);
101 *result = r;
102 *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
103 return TRUE;
104 }
105
106 if (fPositionInCache == 0) {
107 fPositionInCache = -1;
108 return FALSE;
109 }
110
111 for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
112 r = fBreaks.elementAti(fPositionInCache);
113 if (r < fromPos) {
114 *result = r;
115 *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
116 return TRUE;
117 }
118 }
119 U_ASSERT(FALSE);
120 fPositionInCache = -1;
121 return FALSE;
122 }
123
populateDictionary(int32_t startPos,int32_t endPos,int32_t firstRuleStatus,int32_t otherRuleStatus)124 void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
125 int32_t firstRuleStatus, int32_t otherRuleStatus) {
126 if ((endPos - startPos) <= 1) {
127 return;
128 }
129
130 reset();
131 fFirstRuleStatusIndex = firstRuleStatus;
132 fOtherRuleStatusIndex = otherRuleStatus;
133
134 int32_t rangeStart = startPos;
135 int32_t rangeEnd = endPos;
136
137 uint16_t category;
138 int32_t current;
139 UErrorCode status = U_ZERO_ERROR;
140 int32_t foundBreakCount = 0;
141 UText *text = &fBI->fText;
142
143 // Loop through the text, looking for ranges of dictionary characters.
144 // For each span, find the appropriate break engine, and ask it to find
145 // any breaks within the span.
146
147 utext_setNativeIndex(text, rangeStart);
148 UChar32 c = utext_current32(text);
149 category = UTRIE2_GET16(fBI->fData->fTrie, c);
150
151 while(U_SUCCESS(status)) {
152 while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd && (category & 0x4000) == 0) {
153 utext_next32(text); // TODO: cleaner loop structure.
154 c = utext_current32(text);
155 category = UTRIE2_GET16(fBI->fData->fTrie, c);
156 }
157 if (current >= rangeEnd) {
158 break;
159 }
160
161 // We now have a dictionary character. Get the appropriate language object
162 // to deal with it.
163 const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
164
165 // Ask the language object if there are any breaks. It will add them to the cache and
166 // leave the text pointer on the other side of its range, ready to search for the next one.
167 if (lbe != NULL) {
168 foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
169 }
170
171 // Reload the loop variables for the next go-round
172 c = utext_current32(text);
173 category = UTRIE2_GET16(fBI->fData->fTrie, c);
174 }
175
176 // If we found breaks, ensure that the first and last entries are
177 // the original starting and ending position. And initialize the
178 // cache iteration position to the first entry.
179
180 // printf("foundBreakCount = %d\n", foundBreakCount);
181 if (foundBreakCount > 0) {
182 U_ASSERT(foundBreakCount == fBreaks.size());
183 if (startPos < fBreaks.elementAti(0)) {
184 // The dictionary did not place a boundary at the start of the segment of text.
185 // Add one now. This should not commonly happen, but it would be easy for interactions
186 // of the rules for dictionary segments and the break engine implementations to
187 // inadvertently cause it. Cover it here, just in case.
188 fBreaks.insertElementAt(startPos, 0, status);
189 }
190 if (endPos > fBreaks.peeki()) {
191 fBreaks.push(endPos, status);
192 }
193 fPositionInCache = 0;
194 // Note: Dictionary matching may extend beyond the original limit.
195 fStart = fBreaks.elementAti(0);
196 fLimit = fBreaks.peeki();
197 } else {
198 // there were no language-based breaks, even though the segment contained
199 // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
200 // for this range will fail, and the calling code will fall back to the rule based boundaries.
201 }
202 }
203
204
205 /*
206 * BreakCache implemetation
207 */
208
BreakCache(RuleBasedBreakIterator * bi,UErrorCode & status)209 RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
210 fBI(bi), fSideBuffer(status) {
211 reset();
212 }
213
214
~BreakCache()215 RuleBasedBreakIterator::BreakCache::~BreakCache() {
216 }
217
218
reset(int32_t pos,int32_t ruleStatus)219 void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
220 fStartBufIdx = 0;
221 fEndBufIdx = 0;
222 fTextIdx = pos;
223 fBufIdx = 0;
224 fBoundaries[0] = pos;
225 fStatuses[0] = (uint16_t)ruleStatus;
226 }
227
228
current()229 int32_t RuleBasedBreakIterator::BreakCache::current() {
230 fBI->fPosition = fTextIdx;
231 fBI->fRuleStatusIndex = fStatuses[fBufIdx];
232 fBI->fDone = FALSE;
233 return fTextIdx;
234 }
235
236
following(int32_t startPos,UErrorCode & status)237 void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
238 if (U_FAILURE(status)) {
239 return;
240 }
241 if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
242 // startPos is in the cache. Do a next() from that position.
243 // TODO: an awkward set of interactions with bi->fDone
244 // seek() does not clear it; it can't because of interactions with populateNear().
245 // next() does not clear it in the fast-path case, where everything matters. Maybe it should.
246 // So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
247 fBI->fDone = false;
248 next();
249 }
250 return;
251 }
252
253
preceding(int32_t startPos,UErrorCode & status)254 void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
255 if (U_FAILURE(status)) {
256 return;
257 }
258 if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
259 if (startPos == fTextIdx) {
260 previous(status);
261 } else {
262 // seek() leaves the BreakCache positioned at the preceding boundary
263 // if the requested position is between two bounaries.
264 // current() pushes the BreakCache position out to the BreakIterator itself.
265 U_ASSERT(startPos > fTextIdx);
266 current();
267 }
268 }
269 return;
270 }
271
272
273 /*
274 * Out-of-line code for BreakCache::next().
275 * Cache does not already contain the boundary
276 */
nextOL()277 void RuleBasedBreakIterator::BreakCache::nextOL() {
278 fBI->fDone = !populateFollowing();
279 fBI->fPosition = fTextIdx;
280 fBI->fRuleStatusIndex = fStatuses[fBufIdx];
281 return;
282 }
283
284
previous(UErrorCode & status)285 void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
286 if (U_FAILURE(status)) {
287 return;
288 }
289 int32_t initialBufIdx = fBufIdx;
290 if (fBufIdx == fStartBufIdx) {
291 // At start of cache. Prepend to it.
292 populatePreceding(status);
293 } else {
294 // Cache already holds the next boundary
295 fBufIdx = modChunkSize(fBufIdx - 1);
296 fTextIdx = fBoundaries[fBufIdx];
297 }
298 fBI->fDone = (fBufIdx == initialBufIdx);
299 fBI->fPosition = fTextIdx;
300 fBI->fRuleStatusIndex = fStatuses[fBufIdx];
301 return;
302 }
303
304
seek(int32_t pos)305 UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
306 if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
307 return FALSE;
308 }
309 if (pos == fBoundaries[fStartBufIdx]) {
310 // Common case: seek(0), from BreakIterator::first()
311 fBufIdx = fStartBufIdx;
312 fTextIdx = fBoundaries[fBufIdx];
313 return TRUE;
314 }
315 if (pos == fBoundaries[fEndBufIdx]) {
316 fBufIdx = fEndBufIdx;
317 fTextIdx = fBoundaries[fBufIdx];
318 return TRUE;
319 }
320
321 int32_t min = fStartBufIdx;
322 int32_t max = fEndBufIdx;
323 while (min != max) {
324 int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
325 probe = modChunkSize(probe);
326 if (fBoundaries[probe] > pos) {
327 max = probe;
328 } else {
329 min = modChunkSize(probe + 1);
330 }
331 }
332 U_ASSERT(fBoundaries[max] > pos);
333 fBufIdx = modChunkSize(max - 1);
334 fTextIdx = fBoundaries[fBufIdx];
335 U_ASSERT(fTextIdx <= pos);
336 return TRUE;
337 }
338
339
populateNear(int32_t position,UErrorCode & status)340 UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
341 if (U_FAILURE(status)) {
342 return FALSE;
343 }
344 U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
345
346 // Find a boundary somewhere in the vicinity of the requested position.
347 // Depending on the safe rules and the text data, it could be either before, at, or after
348 // the requested position.
349
350
351 // If the requested position is not near already cached positions, clear the existing cache,
352 // find a near-by boundary and begin new cache contents there.
353
354 if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
355 int32_t aBoundary = 0;
356 int32_t ruleStatusIndex = 0;
357 if (position > 20) {
358 int32_t backupPos = fBI->handleSafePrevious(position);
359
360 if (backupPos > 0) {
361 // Advance to the boundary following the backup position.
362 // There is a complication: the safe reverse rules identify pairs of code points
363 // that are safe. If advancing from the safe point moves forwards by less than
364 // two code points, we need to advance one more time to ensure that the boundary
365 // is good, including a correct rules status value.
366 //
367 fBI->fPosition = backupPos;
368 aBoundary = fBI->handleNext();
369 if (aBoundary <= backupPos + 4) {
370 // +4 is a quick test for possibly having advanced only one codepoint.
371 // Four being the length of the longest potential code point, a supplementary in UTF-8
372 utext_setNativeIndex(&fBI->fText, aBoundary);
373 if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) {
374 // The initial handleNext() only advanced by a single code point. Go again.
375 aBoundary = fBI->handleNext(); // Safe rules identify safe pairs.
376 }
377 }
378 ruleStatusIndex = fBI->fRuleStatusIndex;
379 }
380 }
381 reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point.
382 }
383
384 // Fill in boundaries between existing cache content and the new requested position.
385
386 if (fBoundaries[fEndBufIdx] < position) {
387 // The last position in the cache precedes the requested position.
388 // Add following position(s) to the cache.
389 while (fBoundaries[fEndBufIdx] < position) {
390 if (!populateFollowing()) {
391 U_ASSERT(false);
392 return false;
393 }
394 }
395 fBufIdx = fEndBufIdx; // Set iterator position to the end of the buffer.
396 fTextIdx = fBoundaries[fBufIdx]; // Required because populateFollowing may add extra boundaries.
397 while (fTextIdx > position) { // Move backwards to a position at or preceding the requested pos.
398 previous(status);
399 }
400 return true;
401 }
402
403 if (fBoundaries[fStartBufIdx] > position) {
404 // The first position in the cache is beyond the requested position.
405 // back up more until we get a boundary <= the requested position.
406 while (fBoundaries[fStartBufIdx] > position) {
407 populatePreceding(status);
408 }
409 fBufIdx = fStartBufIdx; // Set iterator position to the start of the buffer.
410 fTextIdx = fBoundaries[fBufIdx]; // Required because populatePreceding may add extra boundaries.
411 while (fTextIdx < position) { // Move forwards to a position at or following the requested pos.
412 next();
413 }
414 if (fTextIdx > position) {
415 // If position is not itself a boundary, the next() loop above will overshoot.
416 // Back up one, leaving cache position at the boundary preceding the requested position.
417 previous(status);
418 }
419 return true;
420 }
421
422 U_ASSERT(fTextIdx == position);
423 return true;
424 }
425
426
427
populateFollowing()428 UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
429 int32_t fromPosition = fBoundaries[fEndBufIdx];
430 int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
431 int32_t pos = 0;
432 int32_t ruleStatusIdx = 0;
433
434 if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
435 addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
436 return TRUE;
437 }
438
439 fBI->fPosition = fromPosition;
440 pos = fBI->handleNext();
441 if (pos == UBRK_DONE) {
442 return FALSE;
443 }
444
445 ruleStatusIdx = fBI->fRuleStatusIndex;
446 if (fBI->fDictionaryCharCount > 0) {
447 // The text segment obtained from the rules includes dictionary characters.
448 // Subdivide it, with subdivided results going into the dictionary cache.
449 fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
450 if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
451 addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
452 return TRUE;
453 // TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
454 // But be careful with interactions with populateNear().
455 }
456 }
457
458 // Rule based segment did not include dictionary characters.
459 // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
460 // meaning that we didn't take the return, above.
461 // Add its end point to the cache.
462 addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
463
464 // Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
465 // (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
466 //
467 for (int count=0; count<6; ++count) {
468 pos = fBI->handleNext();
469 if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) {
470 break;
471 }
472 addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
473 }
474
475 return TRUE;
476 }
477
478
populatePreceding(UErrorCode & status)479 UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
480 if (U_FAILURE(status)) {
481 return FALSE;
482 }
483
484 int32_t fromPosition = fBoundaries[fStartBufIdx];
485 if (fromPosition == 0) {
486 return FALSE;
487 }
488
489 int32_t position = 0;
490 int32_t positionStatusIdx = 0;
491
492 if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
493 addPreceding(position, positionStatusIdx, UpdateCachePosition);
494 return TRUE;
495 }
496
497 int32_t backupPosition = fromPosition;
498
499 // Find a boundary somewhere preceding the first already-cached boundary
500 do {
501 backupPosition = backupPosition - 30;
502 if (backupPosition <= 0) {
503 backupPosition = 0;
504 } else {
505 backupPosition = fBI->handleSafePrevious(backupPosition);
506 }
507 if (backupPosition == UBRK_DONE || backupPosition == 0) {
508 position = 0;
509 positionStatusIdx = 0;
510 } else {
511 // Advance to the boundary following the backup position.
512 // There is a complication: the safe reverse rules identify pairs of code points
513 // that are safe. If advancing from the safe point moves forwards by less than
514 // two code points, we need to advance one more time to ensure that the boundary
515 // is good, including a correct rules status value.
516 //
517 fBI->fPosition = backupPosition;
518 position = fBI->handleNext();
519 if (position <= backupPosition + 4) {
520 // +4 is a quick test for possibly having advanced only one codepoint.
521 // Four being the length of the longest potential code point, a supplementary in UTF-8
522 utext_setNativeIndex(&fBI->fText, position);
523 if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) {
524 // The initial handleNext() only advanced by a single code point. Go again.
525 position = fBI->handleNext(); // Safe rules identify safe pairs.
526 }
527 };
528 positionStatusIdx = fBI->fRuleStatusIndex;
529 }
530 } while (position >= fromPosition);
531
532 // Find boundaries between the one we just located and the first already-cached boundary
533 // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
534
535 fSideBuffer.removeAllElements();
536 fSideBuffer.addElement(position, status);
537 fSideBuffer.addElement(positionStatusIdx, status);
538
539 do {
540 int32_t prevPosition = fBI->fPosition = position;
541 int32_t prevStatusIdx = positionStatusIdx;
542 position = fBI->handleNext();
543 positionStatusIdx = fBI->fRuleStatusIndex;
544 if (position == UBRK_DONE) {
545 break;
546 }
547
548 UBool segmentHandledByDictionary = FALSE;
549 if (fBI->fDictionaryCharCount != 0) {
550 // Segment from the rules includes dictionary characters.
551 // Subdivide it, with subdivided results going into the dictionary cache.
552 int32_t dictSegEndPosition = position;
553 fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
554 while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
555 segmentHandledByDictionary = true;
556 U_ASSERT(position > prevPosition);
557 if (position >= fromPosition) {
558 break;
559 }
560 U_ASSERT(position <= dictSegEndPosition);
561 fSideBuffer.addElement(position, status);
562 fSideBuffer.addElement(positionStatusIdx, status);
563 prevPosition = position;
564 }
565 U_ASSERT(position==dictSegEndPosition || position>=fromPosition);
566 }
567
568 if (!segmentHandledByDictionary && position < fromPosition) {
569 fSideBuffer.addElement(position, status);
570 fSideBuffer.addElement(positionStatusIdx, status);
571 }
572 } while (position < fromPosition);
573
574 // Move boundaries from the side buffer to the main circular buffer.
575 UBool success = FALSE;
576 if (!fSideBuffer.isEmpty()) {
577 positionStatusIdx = fSideBuffer.popi();
578 position = fSideBuffer.popi();
579 addPreceding(position, positionStatusIdx, UpdateCachePosition);
580 success = TRUE;
581 }
582
583 while (!fSideBuffer.isEmpty()) {
584 positionStatusIdx = fSideBuffer.popi();
585 position = fSideBuffer.popi();
586 if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
587 // No space in circular buffer to hold a new preceding result while
588 // also retaining the current cache (iteration) position.
589 // Bailing out is safe; the cache will refill again if needed.
590 break;
591 }
592 }
593
594 return success;
595 }
596
597
addFollowing(int32_t position,int32_t ruleStatusIdx,UpdatePositionValues update)598 void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
599 U_ASSERT(position > fBoundaries[fEndBufIdx]);
600 U_ASSERT(ruleStatusIdx <= UINT16_MAX);
601 int32_t nextIdx = modChunkSize(fEndBufIdx + 1);
602 if (nextIdx == fStartBufIdx) {
603 fStartBufIdx = modChunkSize(fStartBufIdx + 6); // TODO: experiment. Probably revert to 1.
604 }
605 fBoundaries[nextIdx] = position;
606 fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
607 fEndBufIdx = nextIdx;
608 if (update == UpdateCachePosition) {
609 // Set current position to the newly added boundary.
610 fBufIdx = nextIdx;
611 fTextIdx = position;
612 } else {
613 // Retaining the original cache position.
614 // Check if the added boundary wraps around the buffer, and would over-write the original position.
615 // It's the responsibility of callers of this function to not add too many.
616 U_ASSERT(nextIdx != fBufIdx);
617 }
618 }
619
addPreceding(int32_t position,int32_t ruleStatusIdx,UpdatePositionValues update)620 bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
621 U_ASSERT(position < fBoundaries[fStartBufIdx]);
622 U_ASSERT(ruleStatusIdx <= UINT16_MAX);
623 int32_t nextIdx = modChunkSize(fStartBufIdx - 1);
624 if (nextIdx == fEndBufIdx) {
625 if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
626 // Failure. The insertion of the new boundary would claim the buffer position that is the
627 // current iteration position. And we also want to retain the current iteration position.
628 // (The buffer is already completely full of entries that precede the iteration position.)
629 return false;
630 }
631 fEndBufIdx = modChunkSize(fEndBufIdx - 1);
632 }
633 fBoundaries[nextIdx] = position;
634 fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
635 fStartBufIdx = nextIdx;
636 if (update == UpdateCachePosition) {
637 fBufIdx = nextIdx;
638 fTextIdx = position;
639 }
640 return true;
641 }
642
643
dumpCache()644 void RuleBasedBreakIterator::BreakCache::dumpCache() {
645 #ifdef RBBI_DEBUG
646 RBBIDebugPrintf("fTextIdx:%d fBufIdx:%d\n", fTextIdx, fBufIdx);
647 for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) {
648 RBBIDebugPrintf("%d %d\n", i, fBoundaries[i]);
649 if (i == fEndBufIdx) {
650 break;
651 }
652 }
653 #endif
654 }
655
656 U_NAMESPACE_END
657
658 #endif // #if !UCONFIG_NO_BREAK_ITERATION
659