• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2017 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html
4 package android.icu.text;
5 
6 import java.nio.BufferOverflowException;
7 import java.util.Arrays;
8 
9 /**
10  * Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions
11  * in linear progression. Does not support moving/reordering of text.
12  * <p>
13  * There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to
14  * instances of this class using {@link #addReplace(int, int)} (for change edits) and
15  * {@link #addUnchanged(int)} (for no-change edits). Change edits are retained with full granularity,
16  * whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one
17  * mapping between code points in the source and destination strings.
18  * <p>
19  * After all edits have been added, instances of this class should be considered immutable, and an
20  * {@link Edits.Iterator} can be used for queries.
21  * <p>
22  * There are four flavors of Edits.Iterator:
23  * <ul>
24  * <li>{@link #getFineIterator()} retains full granularity of change edits.
25  * <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling
26  * next() on the iterator, skips over no-change edits (unchanged regions).
27  * <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change
28  * edits are automatically merged during the construction phase.)
29  * <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when
30  * calling next() on the iterator, skips over no-change edits (unchanged regions).
31  * </ul>
32  * <p>
33  * For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the
34  * following fine edits:
35  * <ul>
36  * <li>abc ⇨ abc (no-change)
37  * <li>ß ⇨ ss (change)
38  * <li>D ⇨ d (change)
39  * <li>e ⇨ e (no-change)
40  * <li>F ⇨ f (change)
41  * </ul>
42  * and the following coarse edits (note how adjacent change edits get merged together):
43  * <ul>
44  * <li>abc ⇨ abc (no-change)
45  * <li>ßD ⇨ ssd (change)
46  * <li>e ⇨ e (no-change)
47  * <li>F ⇨ f (change)
48  * </ul>
49  * <p>
50  * The "fine changes" and "coarse changes" iterators will step through only the change edits when their
51  * {@link Edits.Iterator#next()} methods are called. They are identical to the non-change iterators when
52  * their {@link Edits.Iterator#findSourceIndex(int)} or {@link Edits.Iterator#findDestinationIndex(int)}
53  * methods are used to walk through the string.
54  * <p>
55  * For examples of how to use this class, see the test <code>TestCaseMapEditsIteratorDocs</code> in
56  * UCharacterCaseTest.java.
57  */
58 public final class Edits {
59     // 0000uuuuuuuuuuuu records u+1 unchanged text units.
60     private static final int MAX_UNCHANGED_LENGTH = 0x1000;
61     private static final int MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
62 
63     // 0mmmnnnccccccccc with m=1..6 records ccc+1 replacements of m:n text units.
64     private static final int MAX_SHORT_CHANGE_OLD_LENGTH = 6;
65     private static final int MAX_SHORT_CHANGE_NEW_LENGTH = 7;
66     private static final int SHORT_CHANGE_NUM_MASK = 0x1ff;
67     private static final int MAX_SHORT_CHANGE = 0x6fff;
68 
69     // 0111mmmmmmnnnnnn records a replacement of m text units with n.
70     // m or n = 61: actual length follows in the next edits array unit.
71     // m or n = 62..63: actual length follows in the next two edits array units.
72     // Bit 30 of the actual length is in the head unit.
73     // Trailing units have bit 15 set.
74     private static final int LENGTH_IN_1TRAIL = 61;
75     private static final int LENGTH_IN_2TRAIL = 62;
76 
77     private static final int STACK_CAPACITY = 100;
78     private char[] array;
79     private int length;
80     private int delta;
81     private int numChanges;
82 
83     /**
84      * Constructs an empty object.
85      */
Edits()86     public Edits() {
87         array = new char[STACK_CAPACITY];
88     }
89 
90     /**
91      * Resets the data but may not release memory.
92      */
reset()93     public void reset() {
94         length = delta = numChanges = 0;
95     }
96 
setLastUnit(int last)97     private void setLastUnit(int last) {
98         array[length - 1] = (char)last;
99     }
lastUnit()100     private int lastUnit() {
101         return length > 0 ? array[length - 1] : 0xffff;
102     }
103 
104     /**
105      * Adds a no-change edit: a record for an unchanged segment of text.
106      * Normally called from inside ICU string transformation functions, not user code.
107      */
addUnchanged(int unchangedLength)108     public void addUnchanged(int unchangedLength) {
109         if(unchangedLength < 0) {
110             throw new IllegalArgumentException(
111                     "addUnchanged(" + unchangedLength + "): length must not be negative");
112         }
113         // Merge into previous unchanged-text record, if any.
114         int last = lastUnit();
115         if(last < MAX_UNCHANGED) {
116             int remaining = MAX_UNCHANGED - last;
117             if (remaining >= unchangedLength) {
118                 setLastUnit(last + unchangedLength);
119                 return;
120             }
121             setLastUnit(MAX_UNCHANGED);
122             unchangedLength -= remaining;
123         }
124         // Split large lengths into multiple units.
125         while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
126             append(MAX_UNCHANGED);
127             unchangedLength -= MAX_UNCHANGED_LENGTH;
128         }
129         // Write a small (remaining) length.
130         if(unchangedLength > 0) {
131             append(unchangedLength - 1);
132         }
133     }
134 
135     /**
136      * Adds a change edit: a record for a text replacement/insertion/deletion.
137      * Normally called from inside ICU string transformation functions, not user code.
138      */
addReplace(int oldLength, int newLength)139     public void addReplace(int oldLength, int newLength) {
140         if(oldLength < 0 || newLength < 0) {
141             throw new IllegalArgumentException(
142                     "addReplace(" + oldLength + ", " + newLength +
143                     "): both lengths must be non-negative");
144         }
145         if (oldLength == 0 && newLength == 0) {
146             return;
147         }
148         ++numChanges;
149         int newDelta = newLength - oldLength;
150         if (newDelta != 0) {
151             if ((newDelta > 0 && delta >= 0 && newDelta > (Integer.MAX_VALUE - delta)) ||
152                     (newDelta < 0 && delta < 0 && newDelta < (Integer.MIN_VALUE - delta))) {
153                 // Integer overflow or underflow.
154                 throw new IndexOutOfBoundsException();
155             }
156             delta += newDelta;
157         }
158 
159         if(0 < oldLength && oldLength <= MAX_SHORT_CHANGE_OLD_LENGTH &&
160                 newLength <= MAX_SHORT_CHANGE_NEW_LENGTH) {
161             // Merge into previous same-lengths short-replacement record, if any.
162             int u = (oldLength << 12) | (newLength << 9);
163             int last = lastUnit();
164             if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
165                     (last & ~SHORT_CHANGE_NUM_MASK) == u &&
166                     (last & SHORT_CHANGE_NUM_MASK) < SHORT_CHANGE_NUM_MASK) {
167                 setLastUnit(last + 1);
168                 return;
169             }
170             append(u);
171             return;
172         }
173 
174         int head = 0x7000;
175         if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
176             head |= oldLength << 6;
177             head |= newLength;
178             append(head);
179         } else if ((array.length - length) >= 5 || growArray()) {
180             int limit = length + 1;
181             if(oldLength < LENGTH_IN_1TRAIL) {
182                 head |= oldLength << 6;
183             } else if(oldLength <= 0x7fff) {
184                 head |= LENGTH_IN_1TRAIL << 6;
185                 array[limit++] = (char)(0x8000 | oldLength);
186             } else {
187                 head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
188                 array[limit++] = (char)(0x8000 | (oldLength >> 15));
189                 array[limit++] = (char)(0x8000 | oldLength);
190             }
191             if(newLength < LENGTH_IN_1TRAIL) {
192                 head |= newLength;
193             } else if(newLength <= 0x7fff) {
194                 head |= LENGTH_IN_1TRAIL;
195                 array[limit++] = (char)(0x8000 | newLength);
196             } else {
197                 head |= LENGTH_IN_2TRAIL + (newLength >> 30);
198                 array[limit++] = (char)(0x8000 | (newLength >> 15));
199                 array[limit++] = (char)(0x8000 | newLength);
200             }
201             array[length] = (char)head;
202             length = limit;
203         }
204     }
205 
append(int r)206     private void append(int r) {
207         if(length < array.length || growArray()) {
208             array[length++] = (char)r;
209         }
210     }
211 
growArray()212     private boolean growArray() {
213         int newCapacity;
214         if (array.length == STACK_CAPACITY) {
215             newCapacity = 2000;
216         } else if (array.length == Integer.MAX_VALUE) {
217             throw new BufferOverflowException();
218         } else if (array.length >= (Integer.MAX_VALUE / 2)) {
219             newCapacity = Integer.MAX_VALUE;
220         } else {
221             newCapacity = 2 * array.length;
222         }
223         // Grow by at least 5 units so that a maximal change record will fit.
224         if ((newCapacity - array.length) < 5) {
225             throw new BufferOverflowException();
226         }
227         array = Arrays.copyOf(array, newCapacity);
228         return true;
229     }
230 
231     /**
232      * How much longer is the new text compared with the old text?
233      * @return new length minus old length
234      */
lengthDelta()235     public int lengthDelta() { return delta; }
236     /**
237      * @return true if there are any change edits
238      */
hasChanges()239     public boolean hasChanges()  { return numChanges != 0; }
240 
241     /**
242      * @return the number of change edits
243      */
numberOfChanges()244     public int numberOfChanges() { return numChanges; }
245 
246     /**
247      * Access to the list of edits.
248      * <p>
249      * At any moment in time, an instance of this class points to a single edit: a "window" into a span
250      * of the source string and the corresponding span of the destination string. The source string span
251      * starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string
252      * span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars.
253      * <p>
254      * The iterator can be moved between edits using the {@link #next()}, {@link #findSourceIndex(int)},
255      * and {@link #findDestinationIndex(int)} methods. Calling any of these methods mutates the iterator
256      * to make it point to the corresponding edit.
257      * <p>
258      * For more information, see the documentation for {@link Edits}.
259      * <p>
260      * Note: Although this class is called "Iterator", it does not implement {@link java.util.Iterator}.
261      *
262      * @see #getCoarseIterator
263      * @see #getFineIterator
264      */
265     public static final class Iterator {
266         private final char[] array;
267         private int index;
268         private final int length;
269         /**
270          * 0 if we are not within compressed equal-length changes.
271          * Otherwise the number of remaining changes, including the current one.
272          */
273         private int remaining;
274         private final boolean onlyChanges_, coarse;
275 
276         private int dir;  // iteration direction: back(<0), initial(0), forward(>0)
277         private boolean changed;
278         private int oldLength_, newLength_;
279         private int srcIndex, replIndex, destIndex;
280 
Iterator(char[] a, int len, boolean oc, boolean crs)281         private Iterator(char[] a, int len, boolean oc, boolean crs) {
282             array = a;
283             length = len;
284             onlyChanges_ = oc;
285             coarse = crs;
286         }
287 
readLength(int head)288         private int readLength(int head) {
289             if (head < LENGTH_IN_1TRAIL) {
290                 return head;
291             } else if (head < LENGTH_IN_2TRAIL) {
292                 assert(index < length);
293                 assert(array[index] >= 0x8000);
294                 return array[index++] & 0x7fff;
295             } else {
296                 assert((index + 2) <= length);
297                 assert(array[index] >= 0x8000);
298                 assert(array[index + 1] >= 0x8000);
299                 int len = ((head & 1) << 30) |
300                         ((array[index] & 0x7fff) << 15) |
301                         (array[index + 1] & 0x7fff);
302                 index += 2;
303                 return len;
304             }
305         }
306 
updateNextIndexes()307         private void updateNextIndexes() {
308             srcIndex += oldLength_;
309             if (changed) {
310                 replIndex += newLength_;
311             }
312             destIndex += newLength_;
313         }
314 
updatePreviousIndexes()315         private void updatePreviousIndexes() {
316             srcIndex -= oldLength_;
317             if (changed) {
318                 replIndex -= newLength_;
319             }
320             destIndex -= newLength_;
321         }
322 
noNext()323         private boolean noNext() {
324             // No change before or beyond the string.
325             dir = 0;
326             changed = false;
327             oldLength_ = newLength_ = 0;
328             return false;
329         }
330 
331         /**
332          * Advances the iterator to the next edit.
333          * @return true if there is another edit
334          */
next()335         public boolean next() {
336             return next(onlyChanges_);
337         }
338 
next(boolean onlyChanges)339         private boolean next(boolean onlyChanges) {
340             // Forward iteration: Update the string indexes to the limit of the current span,
341             // and post-increment-read array units to assemble a new span.
342             // Leaves the array index one after the last unit of that span.
343             if (dir > 0) {
344                 updateNextIndexes();
345             } else {
346                 if (dir < 0) {
347                     // Turn around from previous() to next().
348                     // Post-increment-read the same span again.
349                     if (remaining > 0) {
350                         // Fine-grained iterator:
351                         // Stay on the current one of a sequence of compressed changes.
352                         ++index;  // next() rests on the index after the sequence unit.
353                         dir = 1;
354                         return true;
355                     }
356                 }
357                 dir = 1;
358             }
359             if (remaining >= 1) {
360                 // Fine-grained iterator: Continue a sequence of compressed changes.
361                 if (remaining > 1) {
362                     --remaining;
363                     return true;
364                 }
365                 remaining = 0;
366             }
367             if (index >= length) {
368                 return noNext();
369             }
370             int u = array[index++];
371             if (u <= MAX_UNCHANGED) {
372                 // Combine adjacent unchanged ranges.
373                 changed = false;
374                 oldLength_ = u + 1;
375                 while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
376                     ++index;
377                     oldLength_ += u + 1;
378                 }
379                 newLength_ = oldLength_;
380                 if (onlyChanges) {
381                     updateNextIndexes();
382                     if (index >= length) {
383                         return noNext();
384                     }
385                     // already fetched u > MAX_UNCHANGED at index
386                     ++index;
387                 } else {
388                     return true;
389                 }
390             }
391             changed = true;
392             if (u <= MAX_SHORT_CHANGE) {
393                 int oldLen = u >> 12;
394                 int newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
395                 int num = (u & SHORT_CHANGE_NUM_MASK) + 1;
396                 if (coarse) {
397                     oldLength_ = num * oldLen;
398                     newLength_ = num * newLen;
399                 } else {
400                     // Split a sequence of changes that was compressed into one unit.
401                     oldLength_ = oldLen;
402                     newLength_ = newLen;
403                     if (num > 1) {
404                         remaining = num;  // This is the first of two or more changes.
405                     }
406                     return true;
407                 }
408             } else {
409                 assert(u <= 0x7fff);
410                 oldLength_ = readLength((u >> 6) & 0x3f);
411                 newLength_ = readLength(u & 0x3f);
412                 if (!coarse) {
413                     return true;
414                 }
415             }
416             // Combine adjacent changes.
417             while (index < length && (u = array[index]) > MAX_UNCHANGED) {
418                 ++index;
419                 if (u <= MAX_SHORT_CHANGE) {
420                     int num = (u & SHORT_CHANGE_NUM_MASK) + 1;
421                     oldLength_ += (u >> 12) * num;
422                     newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
423                 } else {
424                     assert(u <= 0x7fff);
425                     oldLength_ += readLength((u >> 6) & 0x3f);
426                     newLength_ += readLength(u & 0x3f);
427                 }
428             }
429             return true;
430         }
431 
previous()432         private boolean previous() {
433             // Backward iteration: Pre-decrement-read array units to assemble a new span,
434             // then update the string indexes to the start of that span.
435             // Leaves the array index on the head unit of that span.
436             if (dir >= 0) {
437                 if (dir > 0) {
438                     // Turn around from next() to previous().
439                     // Set the string indexes to the span limit and
440                     // pre-decrement-read the same span again.
441                     if (remaining > 0) {
442                         // Fine-grained iterator:
443                         // Stay on the current one of a sequence of compressed changes.
444                         --index;  // previous() rests on the sequence unit.
445                         dir = -1;
446                         return true;
447                     }
448                     updateNextIndexes();
449                 }
450                 dir = -1;
451             }
452             if (remaining > 0) {
453                 // Fine-grained iterator: Continue a sequence of compressed changes.
454                 int u = array[index];
455                 assert(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
456                 if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) {
457                     ++remaining;
458                     updatePreviousIndexes();
459                     return true;
460                 }
461                 remaining = 0;
462             }
463             if (index <= 0) {
464                 return noNext();
465             }
466             int u = array[--index];
467             if (u <= MAX_UNCHANGED) {
468                 // Combine adjacent unchanged ranges.
469                 changed = false;
470                 oldLength_ = u + 1;
471                 while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) {
472                     --index;
473                     oldLength_ += u + 1;
474                 }
475                 newLength_ = oldLength_;
476                 // No need to handle onlyChanges as long as previous() is called only from findIndex().
477                 updatePreviousIndexes();
478                 return true;
479             }
480             changed = true;
481             if (u <= MAX_SHORT_CHANGE) {
482                 int oldLen = u >> 12;
483                 int newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
484                 int num = (u & SHORT_CHANGE_NUM_MASK) + 1;
485                 if (coarse) {
486                     oldLength_ = num * oldLen;
487                     newLength_ = num * newLen;
488                 } else {
489                     // Split a sequence of changes that was compressed into one unit.
490                     oldLength_ = oldLen;
491                     newLength_ = newLen;
492                     if (num > 1) {
493                         remaining = 1;  // This is the last of two or more changes.
494                     }
495                     updatePreviousIndexes();
496                     return true;
497                 }
498             } else {
499                 if (u <= 0x7fff) {
500                     // The change is encoded in u alone.
501                     oldLength_ = readLength((u >> 6) & 0x3f);
502                     newLength_ = readLength(u & 0x3f);
503                 } else {
504                     // Back up to the head of the change, read the lengths,
505                     // and reset the index to the head again.
506                     assert(index > 0);
507                     while ((u = array[--index]) > 0x7fff) {}
508                     assert(u > MAX_SHORT_CHANGE);
509                     int headIndex = index++;
510                     oldLength_ = readLength((u >> 6) & 0x3f);
511                     newLength_ = readLength(u & 0x3f);
512                     index = headIndex;
513                 }
514                 if (!coarse) {
515                     updatePreviousIndexes();
516                     return true;
517                 }
518             }
519             // Combine adjacent changes.
520             while (index > 0 && (u = array[index - 1]) > MAX_UNCHANGED) {
521                 --index;
522                 if (u <= MAX_SHORT_CHANGE) {
523                     int num = (u & SHORT_CHANGE_NUM_MASK) + 1;
524                     oldLength_ += (u >> 12) * num;
525                     newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
526                 } else if (u <= 0x7fff) {
527                     // Read the lengths, and reset the index to the head again.
528                     int headIndex = index++;
529                     oldLength_ += readLength((u >> 6) & 0x3f);
530                     newLength_ += readLength(u & 0x3f);
531                     index = headIndex;
532                 }
533             }
534             updatePreviousIndexes();
535             return true;
536         }
537 
538         /**
539          * Moves the iterator to the edit that contains the source index.
540          * The source index may be found in a no-change edit
541          * even if normal iteration would skip no-change edits.
542          * Normal iteration can continue from a found edit.
543          *
544          * <p>The iterator state before this search logically does not matter.
545          * (It may affect the performance of the search.)
546          *
547          * <p>The iterator state after this search is undefined
548          * if the source index is out of bounds for the source string.
549          *
550          * @param i source index
551          * @return true if the edit for the source index was found
552          */
findSourceIndex(int i)553         public boolean findSourceIndex(int i) {
554             return findIndex(i, true) == 0;
555         }
556 
557         /**
558          * Moves the iterator to the edit that contains the destination index.
559          * The destination index may be found in a no-change edit
560          * even if normal iteration would skip no-change edits.
561          * Normal iteration can continue from a found edit.
562          *
563          * <p>The iterator state before this search logically does not matter.
564          * (It may affect the performance of the search.)
565          *
566          * <p>The iterator state after this search is undefined
567          * if the source index is out of bounds for the source string.
568          *
569          * @param i destination index
570          * @return true if the edit for the destination index was found
571          */
findDestinationIndex(int i)572         public boolean findDestinationIndex(int i) {
573             return findIndex(i, false) == 0;
574         }
575 
576         /** @return -1: error or i<0; 0: found; 1: i>=string length */
findIndex(int i, boolean findSource)577         private int findIndex(int i, boolean findSource) {
578             if (i < 0) { return -1; }
579             int spanStart, spanLength;
580             if (findSource) {  // find source index
581                 spanStart = srcIndex;
582                 spanLength = oldLength_;
583             } else {  // find destination index
584                 spanStart = destIndex;
585                 spanLength = newLength_;
586             }
587             if (i < spanStart) {
588                 if (i >= (spanStart / 2)) {
589                     // Search backwards.
590                     for (;;) {
591                         boolean hasPrevious = previous();
592                         assert(hasPrevious);  // because i>=0 and the first span starts at 0
593                         spanStart = findSource ? srcIndex : destIndex;
594                         if (i >= spanStart) {
595                             // The index is in the current span.
596                             return 0;
597                         }
598                         if (remaining > 0) {
599                             // Is the index in one of the remaining compressed edits?
600                             // spanStart is the start of the current span, first of the remaining ones.
601                             spanLength = findSource ? oldLength_ : newLength_;
602                             int u = array[index];
603                             assert(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
604                             int num = (u & SHORT_CHANGE_NUM_MASK) + 1 - remaining;
605                             int len = num * spanLength;
606                             if (i >= (spanStart - len)) {
607                                 int n = ((spanStart - i - 1) / spanLength) + 1;
608                                 // 1 <= n <= num
609                                 srcIndex -= n * oldLength_;
610                                 replIndex -= n * newLength_;
611                                 destIndex -= n * newLength_;
612                                 remaining += n;
613                                 return 0;
614                             }
615                             // Skip all of these edits at once.
616                             srcIndex -= num * oldLength_;
617                             replIndex -= num * newLength_;
618                             destIndex -= num * newLength_;
619                             remaining = 0;
620                         }
621                     }
622                 }
623                 // Reset the iterator to the start.
624                 dir = 0;
625                 index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
626             } else if (i < (spanStart + spanLength)) {
627                 // The index is in the current span.
628                 return 0;
629             }
630             while (next(false)) {
631                 if (findSource) {
632                     spanStart = srcIndex;
633                     spanLength = oldLength_;
634                 } else {
635                     spanStart = destIndex;
636                     spanLength = newLength_;
637                 }
638                 if (i < (spanStart + spanLength)) {
639                     // The index is in the current span.
640                     return 0;
641                 }
642                 if (remaining > 1) {
643                     // Is the index in one of the remaining compressed edits?
644                     // spanStart is the start of the current span, first of the remaining ones.
645                     int len = remaining * spanLength;
646                     if (i < (spanStart + len)) {
647                         int n = (i - spanStart) / spanLength;  // 1 <= n <= remaining - 1
648                         srcIndex += n * oldLength_;
649                         replIndex += n * newLength_;
650                         destIndex += n * newLength_;
651                         remaining -= n;
652                         return 0;
653                     }
654                     // Make next() skip all of these edits at once.
655                     oldLength_ *= remaining;
656                     newLength_ *= remaining;
657                     remaining = 0;
658                 }
659             }
660             return 1;
661         }
662 
663         /**
664          * Computes the destination index corresponding to the given source index.
665          * If the source index is inside a change edit (not at its start),
666          * then the destination index at the end of that edit is returned,
667          * since there is no information about index mapping inside a change edit.
668          *
669          * <p>(This means that indexes to the start and middle of an edit,
670          * for example around a grapheme cluster, are mapped to indexes
671          * encompassing the entire edit.
672          * The alternative, mapping an interior index to the start,
673          * would map such an interval to an empty one.)
674          *
675          * <p>This operation will usually but not always modify this object.
676          * The iterator state after this search is undefined.
677          *
678          * @param i source index
679          * @return destination index; undefined if i is not 0..string length
680          */
681         public int destinationIndexFromSourceIndex(int i) {
682             int where = findIndex(i, true);
683             if (where < 0) {
684                 // Error or before the string.
685                 return 0;
686             }
687             if (where > 0 || i == srcIndex) {
688                 // At or after string length, or at start of the found span.
689                 return destIndex;
690             }
691             if (changed) {
692                 // In a change span, map to its end.
693                 return destIndex + newLength_;
694             } else {
695                 // In an unchanged span, offset 1:1 within it.
696                 return destIndex + (i - srcIndex);
697             }
698         }
699 
700         /**
701          * Computes the source index corresponding to the given destination index.
702          * If the destination index is inside a change edit (not at its start),
703          * then the source index at the end of that edit is returned,
704          * since there is no information about index mapping inside a change edit.
705          *
706          * <p>(This means that indexes to the start and middle of an edit,
707          * for example around a grapheme cluster, are mapped to indexes
708          * encompassing the entire edit.
709          * The alternative, mapping an interior index to the start,
710          * would map such an interval to an empty one.)
711          *
712          * <p>This operation will usually but not always modify this object.
713          * The iterator state after this search is undefined.
714          *
715          * @param i destination index
716          * @return source index; undefined if i is not 0..string length
717          */
718         public int sourceIndexFromDestinationIndex(int i) {
719             int where = findIndex(i, false);
720             if (where < 0) {
721                 // Error or before the string.
722                 return 0;
723             }
724             if (where > 0 || i == destIndex) {
725                 // At or after string length, or at start of the found span.
726                 return srcIndex;
727             }
728             if (changed) {
729                 // In a change span, map to its end.
730                 return srcIndex + oldLength_;
731             } else {
732                 // In an unchanged span, offset within it.
733                 return srcIndex + (i - destIndex);
734             }
735         }
736 
737         /**
738          * Returns whether the edit currently represented by the iterator is a change edit.
739          *
740          * @return true if this edit replaces oldLength() units with newLength() different ones.
741          *         false if oldLength units remain unchanged.
742          */
743         public boolean hasChange() { return changed; }
744 
745         /**
746          * The length of the current span in the source string, which starts at {@link #sourceIndex}.
747          *
748          * @return the number of units in the source string which are replaced or remain unchanged.
749          */
750         public int oldLength() { return oldLength_; }
751 
752         /**
753          * The length of the current span in the destination string, which starts at
754          * {@link #destinationIndex}, or in the replacement string, which starts at
755          * {@link #replacementIndex}.
756          *
757          * @return the number of units in the destination string, if hasChange() is true. Same as
758          *         oldLength if hasChange() is false.
759          */
760         public int newLength() { return newLength_; }
761 
762         /**
763          * The start index of the current span in the source string; the span has length
764          * {@link #oldLength}.
765          *
766          * @return the current index into the source string
767          */
768         public int sourceIndex() { return srcIndex; }
769 
770         /**
771          * The start index of the current span in the replacement string; the span has length
772          * {@link #newLength}. Well-defined only if the current edit is a change edit.
773          * <p>
774          * The <em>replacement string</em> is the concatenation of all substrings of the destination
775          * string corresponding to change edits.
776          * <p>
777          * This method is intended to be used together with operations that write only replacement
778          * characters (e.g., {@link CaseMap#omitUnchangedText()}). The source string can then be modified
779          * in-place.
780          *
781          * @return the current index into the replacement-characters-only string, not counting unchanged
782          *         spans
783          */
784         public int replacementIndex() {
785             // TODO: Throw an exception if we aren't in a change edit?
786             return replIndex;
787         }
788 
789         /**
790          * The start index of the current span in the destination string; the span has length
791          * {@link #newLength}.
792          *
793          * @return the current index into the full destination string
794          */
795         public int destinationIndex() { return destIndex; }
796 
797         /**
798          * A string representation of the current edit represented by the iterator for debugging. You
799          * should not depend on the contents of the return string; it may change over time.
800          * @return a string representation of the object.
801          */
802         @Override
803         public String toString() {
804             StringBuilder sb = new StringBuilder();
805             sb.append(super.toString());
806             sb.append("{ src[");
807             sb.append(srcIndex);
808             sb.append("..");
809             sb.append(srcIndex + oldLength_);
810             if (changed) {
811                 sb.append("] ⇝ dest[");
812             } else {
813                 sb.append("] ≡ dest[");
814             }
815             sb.append(destIndex);
816             sb.append("..");
817             sb.append(destIndex + newLength_);
818             if (changed) {
819                 sb.append("], repl[");
820                 sb.append(replIndex);
821                 sb.append("..");
822                 sb.append(replIndex + newLength_);
823                 sb.append("] }");
824             } else {
825                 sb.append("] (no-change) }");
826             }
827             return sb.toString();
828         }
829     };
830 
831     /**
832      * Returns an Iterator for coarse-grained change edits
833      * (adjacent change edits are treated as one).
834      * Can be used to perform simple string updates.
835      * Skips no-change edits.
836      * @return an Iterator that merges adjacent changes.
837      */
838     public Iterator getCoarseChangesIterator() {
839         return new Iterator(array, length, true, true);
840     }
841 
842     /**
843      * Returns an Iterator for coarse-grained change and no-change edits
844      * (adjacent change edits are treated as one).
845      * Can be used to perform simple string updates.
846      * Adjacent change edits are treated as one edit.
847      * @return an Iterator that merges adjacent changes.
848      */
849     public Iterator getCoarseIterator() {
850         return new Iterator(array, length, false, true);
851     }
852 
853     /**
854      * Returns an Iterator for fine-grained change edits
855      * (full granularity of change edits is retained).
856      * Can be used for modifying styled text.
857      * Skips no-change edits.
858      * @return an Iterator that separates adjacent changes.
859      */
860     public Iterator getFineChangesIterator() {
861         return new Iterator(array, length, true, false);
862     }
863 
864     /**
865      * Returns an Iterator for fine-grained change and no-change edits
866      * (full granularity of change edits is retained).
867      * Can be used for modifying styled text.
868      * @return an Iterator that separates adjacent changes.
869      */
870     public Iterator getFineIterator() {
871         return new Iterator(array, length, false, false);
872     }
873 
874     /**
875      * Merges the two input Edits and appends the result to this object.
876      *
877      * <p>Consider two string transformations (for example, normalization and case mapping)
878      * where each records Edits in addition to writing an output string.<br>
879      * Edits ab reflect how substrings of input string a
880      * map to substrings of intermediate string b.<br>
881      * Edits bc reflect how substrings of intermediate string b
882      * map to substrings of output string c.<br>
883      * This function merges ab and bc such that the additional edits
884      * recorded in this object reflect how substrings of input string a
885      * map to substrings of output string c.
886      *
887      * <p>If unrelated Edits are passed in where the output string of the first
888      * has a different length than the input string of the second,
889      * then an IllegalArgumentException is thrown.
890      *
891      * @param ab reflects how substrings of input string a
892      *     map to substrings of intermediate string b.
893      * @param bc reflects how substrings of intermediate string b
894      *     map to substrings of output string c.
895      * @return this, with the merged edits appended
896      */
897     public Edits mergeAndAppend(Edits ab, Edits bc) {
898         // Picture string a --(Edits ab)--> string b --(Edits bc)--> string c.
899         // Parallel iteration over both Edits.
900         Iterator abIter = ab.getFineIterator();
901         Iterator bcIter = bc.getFineIterator();
902         boolean abHasNext = true, bcHasNext = true;
903         // Copy iterator state into local variables, so that we can modify and subdivide spans.
904         // ab old & new length, bc old & new length
905         int aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
906         // When we have different-intermediate-length changes, we accumulate a larger change.
907         int pending_aLength = 0, pending_cLength = 0;
908         for (;;) {
909             // At this point, for each of the two iterators:
910             // Either we are done with the locally cached current edit,
911             // and its intermediate-string length has been reset,
912             // or we will continue to work with a truncated remainder of this edit.
913             //
914             // If the current edit is done, and the iterator has not yet reached the end,
915             // then we fetch the next edit. This is true for at least one of the iterators.
916             //
917             // Normally it does not matter whether we fetch from ab and then bc or vice versa.
918             // However, the result is observably different when
919             // ab deletions meet bc insertions at the same intermediate-string index.
920             // Some users expect the bc insertions to come first, so we fetch from bc first.
921             if (bc_bLength == 0) {
922                 if (bcHasNext && (bcHasNext = bcIter.next())) {
923                     bc_bLength = bcIter.oldLength();
924                     cLength = bcIter.newLength();
925                     if (bc_bLength == 0) {
926                         // insertion
927                         if (ab_bLength == 0 || !abIter.hasChange()) {
928                             addReplace(pending_aLength, pending_cLength + cLength);
929                             pending_aLength = pending_cLength = 0;
930                         } else {
931                             pending_cLength += cLength;
932                         }
933                         continue;
934                     }
935                 }
936                 // else see if the other iterator is done, too.
937             }
938             if (ab_bLength == 0) {
939                 if (abHasNext && (abHasNext = abIter.next())) {
940                     aLength = abIter.oldLength();
941                     ab_bLength = abIter.newLength();
942                     if (ab_bLength == 0) {
943                         // deletion
944                         if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) {
945                             addReplace(pending_aLength + aLength, pending_cLength);
946                             pending_aLength = pending_cLength = 0;
947                         } else {
948                             pending_aLength += aLength;
949                         }
950                         continue;
951                     }
952                 } else if (bc_bLength == 0) {
953                     // Both iterators are done at the same time:
954                     // The intermediate-string lengths match.
955                     break;
956                 } else {
957                     throw new IllegalArgumentException(
958                             "The ab output string is shorter than the bc input string.");
959                 }
960             }
961             if (bc_bLength == 0) {
962                 throw new IllegalArgumentException(
963                         "The bc input string is shorter than the ab output string.");
964             }
965             //  Done fetching: ab_bLength > 0 && bc_bLength > 0
966 
967             // The current state has two parts:
968             // - Past: We accumulate a longer ac edit in the "pending" variables.
969             // - Current: We have copies of the current ab/bc edits in local variables.
970             //   At least one side is newly fetched.
971             //   One side might be a truncated remainder of an edit we fetched earlier.
972 
973             if (!abIter.hasChange() && !bcIter.hasChange()) {
974                 // An unchanged span all the way from string a to string c.
975                 if (pending_aLength != 0 || pending_cLength != 0) {
976                     addReplace(pending_aLength, pending_cLength);
977                     pending_aLength = pending_cLength = 0;
978                 }
979                 int unchangedLength = aLength <= cLength ? aLength : cLength;
980                 addUnchanged(unchangedLength);
981                 ab_bLength = aLength -= unchangedLength;
982                 bc_bLength = cLength -= unchangedLength;
983                 // At least one of the unchanged spans is now empty.
984                 continue;
985             }
986             if (!abIter.hasChange() && bcIter.hasChange()) {
987                 // Unchanged a->b but changed b->c.
988                 if (ab_bLength >= bc_bLength) {
989                     // Split the longer unchanged span into change + remainder.
990                     addReplace(pending_aLength + bc_bLength, pending_cLength + cLength);
991                     pending_aLength = pending_cLength = 0;
992                     aLength = ab_bLength -= bc_bLength;
993                     bc_bLength = 0;
994                     continue;
995                 }
996                 // Handle the shorter unchanged span below like a change.
997             } else if (abIter.hasChange() && !bcIter.hasChange()) {
998                 // Changed a->b and then unchanged b->c.
999                 if (ab_bLength <= bc_bLength) {
1000                     // Split the longer unchanged span into change + remainder.
1001                     addReplace(pending_aLength + aLength, pending_cLength + ab_bLength);
1002                     pending_aLength = pending_cLength = 0;
1003                     cLength = bc_bLength -= ab_bLength;
1004                     ab_bLength = 0;
1005                     continue;
1006                 }
1007                 // Handle the shorter unchanged span below like a change.
1008             } else {  // both abIter.hasChange() && bcIter.hasChange()
1009                 if (ab_bLength == bc_bLength) {
1010                     // Changes on both sides up to the same position. Emit & reset.
1011                     addReplace(pending_aLength + aLength, pending_cLength + cLength);
1012                     pending_aLength = pending_cLength = 0;
1013                     ab_bLength = bc_bLength = 0;
1014                     continue;
1015                 }
1016             }
1017             // Accumulate the a->c change, reset the shorter side,
1018             // keep a remainder of the longer one.
1019             pending_aLength += aLength;
1020             pending_cLength += cLength;
1021             if (ab_bLength < bc_bLength) {
1022                 bc_bLength -= ab_bLength;
1023                 cLength = ab_bLength = 0;
1024             } else {  // ab_bLength > bc_bLength
1025                 ab_bLength -= bc_bLength;
1026                 aLength = bc_bLength = 0;
1027             }
1028         }
1029         if (pending_aLength != 0 || pending_cLength != 0) {
1030             addReplace(pending_aLength, pending_cLength);
1031         }
1032         return this;
1033     }
1034 }
1035