• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 package com.ibm.icu.text;
4 
5 import java.nio.BufferOverflowException;
6 import java.util.Arrays;
7 
8 /**
9  * Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions
10  * in linear progression. Does not support moving/reordering of text.
11  * <p>
12  * There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to
13  * instances of this class using {@link #addReplace(int, int)} (for change edits) and
14  * {@link #addUnchanged(int)} (for no-change edits). Change edits are retained with full granularity,
15  * whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one
16  * mapping between code points in the source and destination strings.
17  * <p>
18  * After all edits have been added, instances of this class should be considered immutable, and an
19  * {@link Edits.Iterator} can be used for queries.
20  * <p>
21  * There are four flavors of Edits.Iterator:
22  * <ul>
23  * <li>{@link #getFineIterator()} retains full granularity of change edits.
24  * <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling
25  * next() on the iterator, skips over no-change edits (unchanged regions).
26  * <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change
27  * edits are automatically merged during the construction phase.)
28  * <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when
29  * calling next() on the iterator, skips over no-change edits (unchanged regions).
30  * </ul>
31  * <p>
32  * For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the
33  * following fine edits:
34  * <ul>
35  * <li>abc ⇨ abc (no-change)
36  * <li>ß ⇨ ss (change)
37  * <li>D ⇨ d (change)
38  * <li>e ⇨ e (no-change)
39  * <li>F ⇨ f (change)
40  * </ul>
41  * and the following coarse edits (note how adjacent change edits get merged together):
42  * <ul>
43  * <li>abc ⇨ abc (no-change)
44  * <li>ßD ⇨ ssd (change)
45  * <li>e ⇨ e (no-change)
46  * <li>F ⇨ f (change)
47  * </ul>
48  * <p>
49  * The "fine changes" and "coarse changes" iterators will step through only the change edits when their
50  * {@link Edits.Iterator#next()} methods are called. They are identical to the non-change iterators when
51  * their {@link Edits.Iterator#findSourceIndex(int)} or {@link Edits.Iterator#findDestinationIndex(int)}
52  * methods are used to walk through the string.
53  * <p>
54  * For examples of how to use this class, see the test <code>TestCaseMapEditsIteratorDocs</code> in
55  * UCharacterCaseTest.java.
56  *
57  * @stable ICU 59
58  */
59 public final class Edits {
60     // 0000uuuuuuuuuuuu records u+1 unchanged text units.
61     private static final int MAX_UNCHANGED_LENGTH = 0x1000;
62     private static final int MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
63 
64     // 0mmmnnnccccccccc with m=1..6 records ccc+1 replacements of m:n text units.
65     private static final int MAX_SHORT_CHANGE_OLD_LENGTH = 6;
66     private static final int MAX_SHORT_CHANGE_NEW_LENGTH = 7;
67     private static final int SHORT_CHANGE_NUM_MASK = 0x1ff;
68     private static final int MAX_SHORT_CHANGE = 0x6fff;
69 
70     // 0111mmmmmmnnnnnn records a replacement of m text units with n.
71     // m or n = 61: actual length follows in the next edits array unit.
72     // m or n = 62..63: actual length follows in the next two edits array units.
73     // Bit 30 of the actual length is in the head unit.
74     // Trailing units have bit 15 set.
75     private static final int LENGTH_IN_1TRAIL = 61;
76     private static final int LENGTH_IN_2TRAIL = 62;
77 
78     private static final int STACK_CAPACITY = 100;
79     private char[] array;
80     private int length;
81     private int delta;
82     private int numChanges;
83 
84     /**
85      * Constructs an empty object.
86      * @stable ICU 59
87      */
Edits()88     public Edits() {
89         array = new char[STACK_CAPACITY];
90     }
91 
92     /**
93      * Resets the data but may not release memory.
94      * @stable ICU 59
95      */
reset()96     public void reset() {
97         length = delta = numChanges = 0;
98     }
99 
setLastUnit(int last)100     private void setLastUnit(int last) {
101         array[length - 1] = (char)last;
102     }
lastUnit()103     private int lastUnit() {
104         return length > 0 ? array[length - 1] : 0xffff;
105     }
106 
107     /**
108      * Adds a no-change edit: a record for an unchanged segment of text.
109      * Normally called from inside ICU string transformation functions, not user code.
110      * @stable ICU 59
111      */
addUnchanged(int unchangedLength)112     public void addUnchanged(int unchangedLength) {
113         if(unchangedLength < 0) {
114             throw new IllegalArgumentException(
115                     "addUnchanged(" + unchangedLength + "): length must not be negative");
116         }
117         // Merge into previous unchanged-text record, if any.
118         int last = lastUnit();
119         if(last < MAX_UNCHANGED) {
120             int remaining = MAX_UNCHANGED - last;
121             if (remaining >= unchangedLength) {
122                 setLastUnit(last + unchangedLength);
123                 return;
124             }
125             setLastUnit(MAX_UNCHANGED);
126             unchangedLength -= remaining;
127         }
128         // Split large lengths into multiple units.
129         while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
130             append(MAX_UNCHANGED);
131             unchangedLength -= MAX_UNCHANGED_LENGTH;
132         }
133         // Write a small (remaining) length.
134         if(unchangedLength > 0) {
135             append(unchangedLength - 1);
136         }
137     }
138 
139     /**
140      * Adds a change edit: a record for a text replacement/insertion/deletion.
141      * Normally called from inside ICU string transformation functions, not user code.
142      * @stable ICU 59
143      */
addReplace(int oldLength, int newLength)144     public void addReplace(int oldLength, int newLength) {
145         if(oldLength < 0 || newLength < 0) {
146             throw new IllegalArgumentException(
147                     "addReplace(" + oldLength + ", " + newLength +
148                     "): both lengths must be non-negative");
149         }
150         if (oldLength == 0 && newLength == 0) {
151             return;
152         }
153         ++numChanges;
154         int newDelta = newLength - oldLength;
155         if (newDelta != 0) {
156             if ((newDelta > 0 && delta >= 0 && newDelta > (Integer.MAX_VALUE - delta)) ||
157                     (newDelta < 0 && delta < 0 && newDelta < (Integer.MIN_VALUE - delta))) {
158                 // Integer overflow or underflow.
159                 throw new IndexOutOfBoundsException();
160             }
161             delta += newDelta;
162         }
163 
164         if(0 < oldLength && oldLength <= MAX_SHORT_CHANGE_OLD_LENGTH &&
165                 newLength <= MAX_SHORT_CHANGE_NEW_LENGTH) {
166             // Merge into previous same-lengths short-replacement record, if any.
167             int u = (oldLength << 12) | (newLength << 9);
168             int last = lastUnit();
169             if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
170                     (last & ~SHORT_CHANGE_NUM_MASK) == u &&
171                     (last & SHORT_CHANGE_NUM_MASK) < SHORT_CHANGE_NUM_MASK) {
172                 setLastUnit(last + 1);
173                 return;
174             }
175             append(u);
176             return;
177         }
178 
179         int head = 0x7000;
180         if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
181             head |= oldLength << 6;
182             head |= newLength;
183             append(head);
184         } else if ((array.length - length) >= 5 || growArray()) {
185             int limit = length + 1;
186             if(oldLength < LENGTH_IN_1TRAIL) {
187                 head |= oldLength << 6;
188             } else if(oldLength <= 0x7fff) {
189                 head |= LENGTH_IN_1TRAIL << 6;
190                 array[limit++] = (char)(0x8000 | oldLength);
191             } else {
192                 head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
193                 array[limit++] = (char)(0x8000 | (oldLength >> 15));
194                 array[limit++] = (char)(0x8000 | oldLength);
195             }
196             if(newLength < LENGTH_IN_1TRAIL) {
197                 head |= newLength;
198             } else if(newLength <= 0x7fff) {
199                 head |= LENGTH_IN_1TRAIL;
200                 array[limit++] = (char)(0x8000 | newLength);
201             } else {
202                 head |= LENGTH_IN_2TRAIL + (newLength >> 30);
203                 array[limit++] = (char)(0x8000 | (newLength >> 15));
204                 array[limit++] = (char)(0x8000 | newLength);
205             }
206             array[length] = (char)head;
207             length = limit;
208         }
209     }
210 
append(int r)211     private void append(int r) {
212         if(length < array.length || growArray()) {
213             array[length++] = (char)r;
214         }
215     }
216 
growArray()217     private boolean growArray() {
218         int newCapacity;
219         if (array.length == STACK_CAPACITY) {
220             newCapacity = 2000;
221         } else if (array.length == Integer.MAX_VALUE) {
222             throw new BufferOverflowException();
223         } else if (array.length >= (Integer.MAX_VALUE / 2)) {
224             newCapacity = Integer.MAX_VALUE;
225         } else {
226             newCapacity = 2 * array.length;
227         }
228         // Grow by at least 5 units so that a maximal change record will fit.
229         if ((newCapacity - array.length) < 5) {
230             throw new BufferOverflowException();
231         }
232         array = Arrays.copyOf(array, newCapacity);
233         return true;
234     }
235 
236     /**
237      * How much longer is the new text compared with the old text?
238      * @return new length minus old length
239      * @stable ICU 59
240      */
lengthDelta()241     public int lengthDelta() { return delta; }
242     /**
243      * @return true if there are any change edits
244      * @stable ICU 59
245      */
hasChanges()246     public boolean hasChanges()  { return numChanges != 0; }
247 
248     /**
249      * @return the number of change edits
250      * @stable ICU 60
251      */
numberOfChanges()252     public int numberOfChanges() { return numChanges; }
253 
254     /**
255      * Access to the list of edits.
256      * <p>
257      * At any moment in time, an instance of this class points to a single edit: a "window" into a span
258      * of the source string and the corresponding span of the destination string. The source string span
259      * starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string
260      * span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars.
261      * <p>
262      * The iterator can be moved between edits using the {@link #next()}, {@link #findSourceIndex(int)},
263      * and {@link #findDestinationIndex(int)} methods. Calling any of these methods mutates the iterator
264      * to make it point to the corresponding edit.
265      * <p>
266      * For more information, see the documentation for {@link Edits}.
267      * <p>
268      * Note: Although this class is called "Iterator", it does not implement {@link java.util.Iterator}.
269      *
270      * @see #getCoarseIterator
271      * @see #getFineIterator
272      * @stable ICU 59
273      */
274     public static final class Iterator {
275         private final char[] array;
276         private int index;
277         private final int length;
278         /**
279          * 0 if we are not within compressed equal-length changes.
280          * Otherwise the number of remaining changes, including the current one.
281          */
282         private int remaining;
283         private final boolean onlyChanges_, coarse;
284 
285         private int dir;  // iteration direction: back(<0), initial(0), forward(>0)
286         private boolean changed;
287         private int oldLength_, newLength_;
288         private int srcIndex, replIndex, destIndex;
289 
Iterator(char[] a, int len, boolean oc, boolean crs)290         private Iterator(char[] a, int len, boolean oc, boolean crs) {
291             array = a;
292             length = len;
293             onlyChanges_ = oc;
294             coarse = crs;
295         }
296 
readLength(int head)297         private int readLength(int head) {
298             if (head < LENGTH_IN_1TRAIL) {
299                 return head;
300             } else if (head < LENGTH_IN_2TRAIL) {
301                 assert(index < length);
302                 assert(array[index] >= 0x8000);
303                 return array[index++] & 0x7fff;
304             } else {
305                 assert((index + 2) <= length);
306                 assert(array[index] >= 0x8000);
307                 assert(array[index + 1] >= 0x8000);
308                 int len = ((head & 1) << 30) |
309                         ((array[index] & 0x7fff) << 15) |
310                         (array[index + 1] & 0x7fff);
311                 index += 2;
312                 return len;
313             }
314         }
315 
updateNextIndexes()316         private void updateNextIndexes() {
317             srcIndex += oldLength_;
318             if (changed) {
319                 replIndex += newLength_;
320             }
321             destIndex += newLength_;
322         }
323 
updatePreviousIndexes()324         private void updatePreviousIndexes() {
325             srcIndex -= oldLength_;
326             if (changed) {
327                 replIndex -= newLength_;
328             }
329             destIndex -= newLength_;
330         }
331 
noNext()332         private boolean noNext() {
333             // No change before or beyond the string.
334             dir = 0;
335             changed = false;
336             oldLength_ = newLength_ = 0;
337             return false;
338         }
339 
340         /**
341          * Advances the iterator to the next edit.
342          * @return true if there is another edit
343          * @stable ICU 59
344          */
next()345         public boolean next() {
346             return next(onlyChanges_);
347         }
348 
next(boolean onlyChanges)349         private boolean next(boolean onlyChanges) {
350             // Forward iteration: Update the string indexes to the limit of the current span,
351             // and post-increment-read array units to assemble a new span.
352             // Leaves the array index one after the last unit of that span.
353             if (dir > 0) {
354                 updateNextIndexes();
355             } else {
356                 if (dir < 0) {
357                     // Turn around from previous() to next().
358                     // Post-increment-read the same span again.
359                     if (remaining > 0) {
360                         // Fine-grained iterator:
361                         // Stay on the current one of a sequence of compressed changes.
362                         ++index;  // next() rests on the index after the sequence unit.
363                         dir = 1;
364                         return true;
365                     }
366                 }
367                 dir = 1;
368             }
369             if (remaining >= 1) {
370                 // Fine-grained iterator: Continue a sequence of compressed changes.
371                 if (remaining > 1) {
372                     --remaining;
373                     return true;
374                 }
375                 remaining = 0;
376             }
377             if (index >= length) {
378                 return noNext();
379             }
380             int u = array[index++];
381             if (u <= MAX_UNCHANGED) {
382                 // Combine adjacent unchanged ranges.
383                 changed = false;
384                 oldLength_ = u + 1;
385                 while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
386                     ++index;
387                     oldLength_ += u + 1;
388                 }
389                 newLength_ = oldLength_;
390                 if (onlyChanges) {
391                     updateNextIndexes();
392                     if (index >= length) {
393                         return noNext();
394                     }
395                     // already fetched u > MAX_UNCHANGED at index
396                     ++index;
397                 } else {
398                     return true;
399                 }
400             }
401             changed = true;
402             if (u <= MAX_SHORT_CHANGE) {
403                 int oldLen = u >> 12;
404                 int newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
405                 int num = (u & SHORT_CHANGE_NUM_MASK) + 1;
406                 if (coarse) {
407                     oldLength_ = num * oldLen;
408                     newLength_ = num * newLen;
409                 } else {
410                     // Split a sequence of changes that was compressed into one unit.
411                     oldLength_ = oldLen;
412                     newLength_ = newLen;
413                     if (num > 1) {
414                         remaining = num;  // This is the first of two or more changes.
415                     }
416                     return true;
417                 }
418             } else {
419                 assert(u <= 0x7fff);
420                 oldLength_ = readLength((u >> 6) & 0x3f);
421                 newLength_ = readLength(u & 0x3f);
422                 if (!coarse) {
423                     return true;
424                 }
425             }
426             // Combine adjacent changes.
427             while (index < length && (u = array[index]) > MAX_UNCHANGED) {
428                 ++index;
429                 if (u <= MAX_SHORT_CHANGE) {
430                     int num = (u & SHORT_CHANGE_NUM_MASK) + 1;
431                     oldLength_ += (u >> 12) * num;
432                     newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
433                 } else {
434                     assert(u <= 0x7fff);
435                     oldLength_ += readLength((u >> 6) & 0x3f);
436                     newLength_ += readLength(u & 0x3f);
437                 }
438             }
439             return true;
440         }
441 
previous()442         private boolean previous() {
443             // Backward iteration: Pre-decrement-read array units to assemble a new span,
444             // then update the string indexes to the start of that span.
445             // Leaves the array index on the head unit of that span.
446             if (dir >= 0) {
447                 if (dir > 0) {
448                     // Turn around from next() to previous().
449                     // Set the string indexes to the span limit and
450                     // pre-decrement-read the same span again.
451                     if (remaining > 0) {
452                         // Fine-grained iterator:
453                         // Stay on the current one of a sequence of compressed changes.
454                         --index;  // previous() rests on the sequence unit.
455                         dir = -1;
456                         return true;
457                     }
458                     updateNextIndexes();
459                 }
460                 dir = -1;
461             }
462             if (remaining > 0) {
463                 // Fine-grained iterator: Continue a sequence of compressed changes.
464                 int u = array[index];
465                 assert(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
466                 if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) {
467                     ++remaining;
468                     updatePreviousIndexes();
469                     return true;
470                 }
471                 remaining = 0;
472             }
473             if (index <= 0) {
474                 return noNext();
475             }
476             int u = array[--index];
477             if (u <= MAX_UNCHANGED) {
478                 // Combine adjacent unchanged ranges.
479                 changed = false;
480                 oldLength_ = u + 1;
481                 while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) {
482                     --index;
483                     oldLength_ += u + 1;
484                 }
485                 newLength_ = oldLength_;
486                 // No need to handle onlyChanges as long as previous() is called only from findIndex().
487                 updatePreviousIndexes();
488                 return true;
489             }
490             changed = true;
491             if (u <= MAX_SHORT_CHANGE) {
492                 int oldLen = u >> 12;
493                 int newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
494                 int num = (u & SHORT_CHANGE_NUM_MASK) + 1;
495                 if (coarse) {
496                     oldLength_ = num * oldLen;
497                     newLength_ = num * newLen;
498                 } else {
499                     // Split a sequence of changes that was compressed into one unit.
500                     oldLength_ = oldLen;
501                     newLength_ = newLen;
502                     if (num > 1) {
503                         remaining = 1;  // This is the last of two or more changes.
504                     }
505                     updatePreviousIndexes();
506                     return true;
507                 }
508             } else {
509                 if (u <= 0x7fff) {
510                     // The change is encoded in u alone.
511                     oldLength_ = readLength((u >> 6) & 0x3f);
512                     newLength_ = readLength(u & 0x3f);
513                 } else {
514                     // Back up to the head of the change, read the lengths,
515                     // and reset the index to the head again.
516                     assert(index > 0);
517                     while ((u = array[--index]) > 0x7fff) {}
518                     assert(u > MAX_SHORT_CHANGE);
519                     int headIndex = index++;
520                     oldLength_ = readLength((u >> 6) & 0x3f);
521                     newLength_ = readLength(u & 0x3f);
522                     index = headIndex;
523                 }
524                 if (!coarse) {
525                     updatePreviousIndexes();
526                     return true;
527                 }
528             }
529             // Combine adjacent changes.
530             while (index > 0 && (u = array[index - 1]) > MAX_UNCHANGED) {
531                 --index;
532                 if (u <= MAX_SHORT_CHANGE) {
533                     int num = (u & SHORT_CHANGE_NUM_MASK) + 1;
534                     oldLength_ += (u >> 12) * num;
535                     newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
536                 } else if (u <= 0x7fff) {
537                     // Read the lengths, and reset the index to the head again.
538                     int headIndex = index++;
539                     oldLength_ += readLength((u >> 6) & 0x3f);
540                     newLength_ += readLength(u & 0x3f);
541                     index = headIndex;
542                 }
543             }
544             updatePreviousIndexes();
545             return true;
546         }
547 
548         /**
549          * Moves the iterator to the edit that contains the source index.
550          * The source index may be found in a no-change edit
551          * even if normal iteration would skip no-change edits.
552          * Normal iteration can continue from a found edit.
553          *
554          * <p>The iterator state before this search logically does not matter.
555          * (It may affect the performance of the search.)
556          *
557          * <p>The iterator state after this search is undefined
558          * if the source index is out of bounds for the source string.
559          *
560          * @param i source index
561          * @return true if the edit for the source index was found
562          * @stable ICU 59
563          */
findSourceIndex(int i)564         public boolean findSourceIndex(int i) {
565             return findIndex(i, true) == 0;
566         }
567 
568         /**
569          * Moves the iterator to the edit that contains the destination index.
570          * The destination index may be found in a no-change edit
571          * even if normal iteration would skip no-change edits.
572          * Normal iteration can continue from a found edit.
573          *
574          * <p>The iterator state before this search logically does not matter.
575          * (It may affect the performance of the search.)
576          *
577          * <p>The iterator state after this search is undefined
578          * if the source index is out of bounds for the source string.
579          *
580          * @param i destination index
581          * @return true if the edit for the destination index was found
582          * @stable ICU 60
583          */
findDestinationIndex(int i)584         public boolean findDestinationIndex(int i) {
585             return findIndex(i, false) == 0;
586         }
587 
588         /** @return -1: error or i<0; 0: found; 1: i>=string length */
findIndex(int i, boolean findSource)589         private int findIndex(int i, boolean findSource) {
590             if (i < 0) { return -1; }
591             int spanStart, spanLength;
592             if (findSource) {  // find source index
593                 spanStart = srcIndex;
594                 spanLength = oldLength_;
595             } else {  // find destination index
596                 spanStart = destIndex;
597                 spanLength = newLength_;
598             }
599             if (i < spanStart) {
600                 if (i >= (spanStart / 2)) {
601                     // Search backwards.
602                     for (;;) {
603                         boolean hasPrevious = previous();
604                         assert(hasPrevious);  // because i>=0 and the first span starts at 0
605                         spanStart = findSource ? srcIndex : destIndex;
606                         if (i >= spanStart) {
607                             // The index is in the current span.
608                             return 0;
609                         }
610                         if (remaining > 0) {
611                             // Is the index in one of the remaining compressed edits?
612                             // spanStart is the start of the current span, first of the remaining ones.
613                             spanLength = findSource ? oldLength_ : newLength_;
614                             int u = array[index];
615                             assert(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
616                             int num = (u & SHORT_CHANGE_NUM_MASK) + 1 - remaining;
617                             int len = num * spanLength;
618                             if (i >= (spanStart - len)) {
619                                 int n = ((spanStart - i - 1) / spanLength) + 1;
620                                 // 1 <= n <= num
621                                 srcIndex -= n * oldLength_;
622                                 replIndex -= n * newLength_;
623                                 destIndex -= n * newLength_;
624                                 remaining += n;
625                                 return 0;
626                             }
627                             // Skip all of these edits at once.
628                             srcIndex -= num * oldLength_;
629                             replIndex -= num * newLength_;
630                             destIndex -= num * newLength_;
631                             remaining = 0;
632                         }
633                     }
634                 }
635                 // Reset the iterator to the start.
636                 dir = 0;
637                 index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
638             } else if (i < (spanStart + spanLength)) {
639                 // The index is in the current span.
640                 return 0;
641             }
642             while (next(false)) {
643                 if (findSource) {
644                     spanStart = srcIndex;
645                     spanLength = oldLength_;
646                 } else {
647                     spanStart = destIndex;
648                     spanLength = newLength_;
649                 }
650                 if (i < (spanStart + spanLength)) {
651                     // The index is in the current span.
652                     return 0;
653                 }
654                 if (remaining > 1) {
655                     // Is the index in one of the remaining compressed edits?
656                     // spanStart is the start of the current span, first of the remaining ones.
657                     int len = remaining * spanLength;
658                     if (i < (spanStart + len)) {
659                         int n = (i - spanStart) / spanLength;  // 1 <= n <= remaining - 1
660                         srcIndex += n * oldLength_;
661                         replIndex += n * newLength_;
662                         destIndex += n * newLength_;
663                         remaining -= n;
664                         return 0;
665                     }
666                     // Make next() skip all of these edits at once.
667                     oldLength_ *= remaining;
668                     newLength_ *= remaining;
669                     remaining = 0;
670                 }
671             }
672             return 1;
673         }
674 
675         /**
676          * Computes the destination index corresponding to the given source index.
677          * If the source index is inside a change edit (not at its start),
678          * then the destination index at the end of that edit is returned,
679          * since there is no information about index mapping inside a change edit.
680          *
681          * <p>(This means that indexes to the start and middle of an edit,
682          * for example around a grapheme cluster, are mapped to indexes
683          * encompassing the entire edit.
684          * The alternative, mapping an interior index to the start,
685          * would map such an interval to an empty one.)
686          *
687          * <p>This operation will usually but not always modify this object.
688          * The iterator state after this search is undefined.
689          *
690          * @param i source index
691          * @return destination index; undefined if i is not 0..string length
692          * @stable ICU 60
693          */
694         public int destinationIndexFromSourceIndex(int i) {
695             int where = findIndex(i, true);
696             if (where < 0) {
697                 // Error or before the string.
698                 return 0;
699             }
700             if (where > 0 || i == srcIndex) {
701                 // At or after string length, or at start of the found span.
702                 return destIndex;
703             }
704             if (changed) {
705                 // In a change span, map to its end.
706                 return destIndex + newLength_;
707             } else {
708                 // In an unchanged span, offset 1:1 within it.
709                 return destIndex + (i - srcIndex);
710             }
711         }
712 
713         /**
714          * Computes the source index corresponding to the given destination index.
715          * If the destination index is inside a change edit (not at its start),
716          * then the source index at the end of that edit is returned,
717          * since there is no information about index mapping inside a change edit.
718          *
719          * <p>(This means that indexes to the start and middle of an edit,
720          * for example around a grapheme cluster, are mapped to indexes
721          * encompassing the entire edit.
722          * The alternative, mapping an interior index to the start,
723          * would map such an interval to an empty one.)
724          *
725          * <p>This operation will usually but not always modify this object.
726          * The iterator state after this search is undefined.
727          *
728          * @param i destination index
729          * @return source index; undefined if i is not 0..string length
730          * @stable ICU 60
731          */
732         public int sourceIndexFromDestinationIndex(int i) {
733             int where = findIndex(i, false);
734             if (where < 0) {
735                 // Error or before the string.
736                 return 0;
737             }
738             if (where > 0 || i == destIndex) {
739                 // At or after string length, or at start of the found span.
740                 return srcIndex;
741             }
742             if (changed) {
743                 // In a change span, map to its end.
744                 return srcIndex + oldLength_;
745             } else {
746                 // In an unchanged span, offset within it.
747                 return srcIndex + (i - destIndex);
748             }
749         }
750 
751         /**
752          * Returns whether the edit currently represented by the iterator is a change edit.
753          *
754          * @return true if this edit replaces oldLength() units with newLength() different ones.
755          *         false if oldLength units remain unchanged.
756          * @stable ICU 59
757          */
758         public boolean hasChange() { return changed; }
759 
760         /**
761          * The length of the current span in the source string, which starts at {@link #sourceIndex}.
762          *
763          * @return the number of units in the source string which are replaced or remain unchanged.
764          * @stable ICU 59
765          */
766         public int oldLength() { return oldLength_; }
767 
768         /**
769          * The length of the current span in the destination string, which starts at
770          * {@link #destinationIndex}, or in the replacement string, which starts at
771          * {@link #replacementIndex}.
772          *
773          * @return the number of units in the destination string, if hasChange() is true. Same as
774          *         oldLength if hasChange() is false.
775          * @stable ICU 59
776          */
777         public int newLength() { return newLength_; }
778 
779         /**
780          * The start index of the current span in the source string; the span has length
781          * {@link #oldLength}.
782          *
783          * @return the current index into the source string
784          * @stable ICU 59
785          */
786         public int sourceIndex() { return srcIndex; }
787 
788         /**
789          * The start index of the current span in the replacement string; the span has length
790          * {@link #newLength}. Well-defined only if the current edit is a change edit.
791          * <p>
792          * The <em>replacement string</em> is the concatenation of all substrings of the destination
793          * string corresponding to change edits.
794          * <p>
795          * This method is intended to be used together with operations that write only replacement
796          * characters (e.g., {@link CaseMap#omitUnchangedText()}). The source string can then be modified
797          * in-place.
798          *
799          * @return the current index into the replacement-characters-only string, not counting unchanged
800          *         spans
801          * @stable ICU 59
802          */
803         public int replacementIndex() {
804             // TODO: Throw an exception if we aren't in a change edit?
805             return replIndex;
806         }
807 
808         /**
809          * The start index of the current span in the destination string; the span has length
810          * {@link #newLength}.
811          *
812          * @return the current index into the full destination string
813          * @stable ICU 59
814          */
815         public int destinationIndex() { return destIndex; }
816 
817         /**
818          * A string representation of the current edit represented by the iterator for debugging. You
819          * should not depend on the contents of the return string; it may change over time.
820          * @return a string representation of the object.
821          * @stable ICU 59
822          */
823         @Override
824         public String toString() {
825             StringBuilder sb = new StringBuilder();
826             sb.append(super.toString());
827             sb.append("{ src[");
828             sb.append(srcIndex);
829             sb.append("..");
830             sb.append(srcIndex + oldLength_);
831             if (changed) {
832                 sb.append("] ⇝ dest[");
833             } else {
834                 sb.append("] ≡ dest[");
835             }
836             sb.append(destIndex);
837             sb.append("..");
838             sb.append(destIndex + newLength_);
839             if (changed) {
840                 sb.append("], repl[");
841                 sb.append(replIndex);
842                 sb.append("..");
843                 sb.append(replIndex + newLength_);
844                 sb.append("] }");
845             } else {
846                 sb.append("] (no-change) }");
847             }
848             return sb.toString();
849         }
850     };
851 
852     /**
853      * Returns an Iterator for coarse-grained change edits
854      * (adjacent change edits are treated as one).
855      * Can be used to perform simple string updates.
856      * Skips no-change edits.
857      * @return an Iterator that merges adjacent changes.
858      * @stable ICU 59
859      */
860     public Iterator getCoarseChangesIterator() {
861         return new Iterator(array, length, true, true);
862     }
863 
864     /**
865      * Returns an Iterator for coarse-grained change and no-change edits
866      * (adjacent change edits are treated as one).
867      * Can be used to perform simple string updates.
868      * Adjacent change edits are treated as one edit.
869      * @return an Iterator that merges adjacent changes.
870      * @stable ICU 59
871      */
872     public Iterator getCoarseIterator() {
873         return new Iterator(array, length, false, true);
874     }
875 
876     /**
877      * Returns an Iterator for fine-grained change edits
878      * (full granularity of change edits is retained).
879      * Can be used for modifying styled text.
880      * Skips no-change edits.
881      * @return an Iterator that separates adjacent changes.
882      * @stable ICU 59
883      */
884     public Iterator getFineChangesIterator() {
885         return new Iterator(array, length, true, false);
886     }
887 
888     /**
889      * Returns an Iterator for fine-grained change and no-change edits
890      * (full granularity of change edits is retained).
891      * Can be used for modifying styled text.
892      * @return an Iterator that separates adjacent changes.
893      * @stable ICU 59
894      */
895     public Iterator getFineIterator() {
896         return new Iterator(array, length, false, false);
897     }
898 
899     /**
900      * Merges the two input Edits and appends the result to this object.
901      *
902      * <p>Consider two string transformations (for example, normalization and case mapping)
903      * where each records Edits in addition to writing an output string.<br>
904      * Edits ab reflect how substrings of input string a
905      * map to substrings of intermediate string b.<br>
906      * Edits bc reflect how substrings of intermediate string b
907      * map to substrings of output string c.<br>
908      * This function merges ab and bc such that the additional edits
909      * recorded in this object reflect how substrings of input string a
910      * map to substrings of output string c.
911      *
912      * <p>If unrelated Edits are passed in where the output string of the first
913      * has a different length than the input string of the second,
914      * then an IllegalArgumentException is thrown.
915      *
916      * @param ab reflects how substrings of input string a
917      *     map to substrings of intermediate string b.
918      * @param bc reflects how substrings of intermediate string b
919      *     map to substrings of output string c.
920      * @return this, with the merged edits appended
921      * @stable ICU 60
922      */
923     public Edits mergeAndAppend(Edits ab, Edits bc) {
924         // Picture string a --(Edits ab)--> string b --(Edits bc)--> string c.
925         // Parallel iteration over both Edits.
926         Iterator abIter = ab.getFineIterator();
927         Iterator bcIter = bc.getFineIterator();
928         boolean abHasNext = true, bcHasNext = true;
929         // Copy iterator state into local variables, so that we can modify and subdivide spans.
930         // ab old & new length, bc old & new length
931         int aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
932         // When we have different-intermediate-length changes, we accumulate a larger change.
933         int pending_aLength = 0, pending_cLength = 0;
934         for (;;) {
935             // At this point, for each of the two iterators:
936             // Either we are done with the locally cached current edit,
937             // and its intermediate-string length has been reset,
938             // or we will continue to work with a truncated remainder of this edit.
939             //
940             // If the current edit is done, and the iterator has not yet reached the end,
941             // then we fetch the next edit. This is true for at least one of the iterators.
942             //
943             // Normally it does not matter whether we fetch from ab and then bc or vice versa.
944             // However, the result is observably different when
945             // ab deletions meet bc insertions at the same intermediate-string index.
946             // Some users expect the bc insertions to come first, so we fetch from bc first.
947             if (bc_bLength == 0) {
948                 if (bcHasNext && (bcHasNext = bcIter.next())) {
949                     bc_bLength = bcIter.oldLength();
950                     cLength = bcIter.newLength();
951                     if (bc_bLength == 0) {
952                         // insertion
953                         if (ab_bLength == 0 || !abIter.hasChange()) {
954                             addReplace(pending_aLength, pending_cLength + cLength);
955                             pending_aLength = pending_cLength = 0;
956                         } else {
957                             pending_cLength += cLength;
958                         }
959                         continue;
960                     }
961                 }
962                 // else see if the other iterator is done, too.
963             }
964             if (ab_bLength == 0) {
965                 if (abHasNext && (abHasNext = abIter.next())) {
966                     aLength = abIter.oldLength();
967                     ab_bLength = abIter.newLength();
968                     if (ab_bLength == 0) {
969                         // deletion
970                         if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) {
971                             addReplace(pending_aLength + aLength, pending_cLength);
972                             pending_aLength = pending_cLength = 0;
973                         } else {
974                             pending_aLength += aLength;
975                         }
976                         continue;
977                     }
978                 } else if (bc_bLength == 0) {
979                     // Both iterators are done at the same time:
980                     // The intermediate-string lengths match.
981                     break;
982                 } else {
983                     throw new IllegalArgumentException(
984                             "The ab output string is shorter than the bc input string.");
985                 }
986             }
987             if (bc_bLength == 0) {
988                 throw new IllegalArgumentException(
989                         "The bc input string is shorter than the ab output string.");
990             }
991             //  Done fetching: ab_bLength > 0 && bc_bLength > 0
992 
993             // The current state has two parts:
994             // - Past: We accumulate a longer ac edit in the "pending" variables.
995             // - Current: We have copies of the current ab/bc edits in local variables.
996             //   At least one side is newly fetched.
997             //   One side might be a truncated remainder of an edit we fetched earlier.
998 
999             if (!abIter.hasChange() && !bcIter.hasChange()) {
1000                 // An unchanged span all the way from string a to string c.
1001                 if (pending_aLength != 0 || pending_cLength != 0) {
1002                     addReplace(pending_aLength, pending_cLength);
1003                     pending_aLength = pending_cLength = 0;
1004                 }
1005                 int unchangedLength = aLength <= cLength ? aLength : cLength;
1006                 addUnchanged(unchangedLength);
1007                 ab_bLength = aLength -= unchangedLength;
1008                 bc_bLength = cLength -= unchangedLength;
1009                 // At least one of the unchanged spans is now empty.
1010                 continue;
1011             }
1012             if (!abIter.hasChange() && bcIter.hasChange()) {
1013                 // Unchanged a->b but changed b->c.
1014                 if (ab_bLength >= bc_bLength) {
1015                     // Split the longer unchanged span into change + remainder.
1016                     addReplace(pending_aLength + bc_bLength, pending_cLength + cLength);
1017                     pending_aLength = pending_cLength = 0;
1018                     aLength = ab_bLength -= bc_bLength;
1019                     bc_bLength = 0;
1020                     continue;
1021                 }
1022                 // Handle the shorter unchanged span below like a change.
1023             } else if (abIter.hasChange() && !bcIter.hasChange()) {
1024                 // Changed a->b and then unchanged b->c.
1025                 if (ab_bLength <= bc_bLength) {
1026                     // Split the longer unchanged span into change + remainder.
1027                     addReplace(pending_aLength + aLength, pending_cLength + ab_bLength);
1028                     pending_aLength = pending_cLength = 0;
1029                     cLength = bc_bLength -= ab_bLength;
1030                     ab_bLength = 0;
1031                     continue;
1032                 }
1033                 // Handle the shorter unchanged span below like a change.
1034             } else {  // both abIter.hasChange() && bcIter.hasChange()
1035                 if (ab_bLength == bc_bLength) {
1036                     // Changes on both sides up to the same position. Emit & reset.
1037                     addReplace(pending_aLength + aLength, pending_cLength + cLength);
1038                     pending_aLength = pending_cLength = 0;
1039                     ab_bLength = bc_bLength = 0;
1040                     continue;
1041                 }
1042             }
1043             // Accumulate the a->c change, reset the shorter side,
1044             // keep a remainder of the longer one.
1045             pending_aLength += aLength;
1046             pending_cLength += cLength;
1047             if (ab_bLength < bc_bLength) {
1048                 bc_bLength -= ab_bLength;
1049                 cLength = ab_bLength = 0;
1050             } else {  // ab_bLength > bc_bLength
1051                 ab_bLength -= bc_bLength;
1052                 aLength = bc_bLength = 0;
1053             }
1054         }
1055         if (pending_aLength != 0 || pending_cLength != 0) {
1056             addReplace(pending_aLength, pending_cLength);
1057         }
1058         return this;
1059     }
1060 }
1061