1 /*
<lambda>null2  * Copyright 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package androidx.compose.foundation.text.input.internal
18 
19 import androidx.compose.foundation.internal.requirePrecondition
20 import androidx.compose.ui.text.TextRange
21 
22 /**
23  * Builds up bidirectional mapping functions to map offsets from an original string to corresponding
24  * in a string that has some edit operations applied.
25  *
26  * Edit operations must be reported via [recordEditOperation]. Offsets can be mapped
27  * [from the source string][mapFromSource] and [back to the source][mapFromDest]. Mapping between
28  * source and transformed offsets is a symmetric operation – given a sequence of edit operations,
29  * the result of mapping each offset from the source to transformed strings will be the same as
30  * mapping backwards on the inverse sequence of edit operations.
31  *
32  * By default, offsets map one-to-one. However, around an edit operation, some alternative rules
33  * apply. In general, offsets are mapped one-to-one where they can be unambiguously. When there
34  * isn't enough information, the mapping is ambiguous, and the mapping result will be a [TextRange]
35  * instead of a single value, where the range represents all the possible offsets that it could map
36  * to.
37  *
38  * _Note: In the following discussion, `I` refers to the start offset in the source text, `N` refers
39  * to the length of the range in the source text, and `N'` refers to the length of the replacement
40  * text. So given `"abc"` and replacing `"bc"` with `"xyz"`, `I=1`, `N=2`, and `N'=3`._
41  *
42  * ### Insertions
43  * When text is inserted (i.e. zero-length range is replaced with non-zero-length range), the
44  * mapping is ambiguous because all of the offsets in the inserted text map back to the same offset
45  * in the source text - the offset where the text was inserted. That means the insertion point can
46  * map to any of the offsets in the inserted text. I.e. I -> I..N'
47  * - This is slightly different than the replacement case, because the offset of the start of the
48  *   operation and the offset of the end of the operation (which are the same) map to a range
49  *   instead of a scalar. This is because there is not enough information to map the start and end
50  *   offsets 1-to-1 to offsets in the transformed text.
51  * - This is symmetric with deletion: Mapping backward from an insertion is the same as mapping
52  *   forward from a deletion.
53  *
54  * ### Deletions
55  * In the inverse case, when text is deleted, mapping is unambiguous. All the offsets in the deleted
56  * range map to the start of the deleted range. I.e. I..N -> I
57  * - This is symmetric with insertion: Mapping backward from a deletion is the same as mapping
58  *   forward from an insertion.
59  *
60  * ### Replacements
61  * When text is replaced, there are both ambiguous and unambiguous mappings:
62  * - The offsets at the _ends_ of the replaced range map unambiguously to the offsets at the
63  *   corresponding edges of the replaced text. I -> I and I+1 -> I+N
64  * - The offsets _inside_ the replaced range (exclusive) map ambiguously to the entire replaced
65  *   range. I+1..I+N-1 -> I+1..I+N'-1
66  * - Note that this means that when a string with length >1 is replaced by a single character, all
67  *   the offsets inside that string will map not to the index of the replacement character but to a
68  *   single-char _range_ containing that character.
69  *
70  * ### Examples
71  *
72  * #### Inserting text
73  *
74  * ```
75  *     012
76  * A: "ab"
77  *     | \
78  *     |  \
79  * B: "azzzb"
80  *     012345
81  * ```
82  *
83  * Forward mapping:
84  *
85  * | from A: | 0   | 1   | 2   |
86  * |--------:|:---:|:---:|:---:|
87  * |   to B: |  0  | 1-4 |  5  |
88  *
89  * Reverse mapping:
90  *
91  * | from B: | 0   | 1   | 2   | 3   | 4   | 5   |
92  * |--------:|:---:|:---:|:---:|:---:|:---:|:---:|
93  * |   to A: |  0  |  1  |  1  |  1  |  1  |  2  |
94  *
95  * #### Deleting text
96  *
97  * ```
98  *     012345
99  * A: "azzzb"
100  *     |  /
101  *     | /
102  * B: "ab"
103  *     012
104  * ```
105  *
106  * Forward mapping:
107  *
108  * | from A: | 0   | 1   | 2   | 3   | 4   | 5   |
109  * |--------:|:---:|:---:|:---:|:---:|:---:|:---:|
110  * |   to B: |  0  |  1  |  1  |  1  |  1  |  2  |
111  *
112  * Reverse mapping:
113  *
114  * | from B: | 0   | 1   | 2   |
115  * |--------:|:---:|:---:|:---:|
116  * |   to A: |  0  | 1-4 |  5  |
117  *
118  * #### Replacing text: single char with char
119  *
120  * ```
121  *     0123
122  * A: "abc"
123  *      |
124  *      |
125  * B: "azc"
126  *     0123
127  * ```
128  *
129  * Forward/reverse mapping: identity
130  *
131  * | from: | 0   | 1   | 2   | 3   |
132  * |------:|:---:|:---:|:---:|:---:|
133  * |   to: |  0  |  1  |  2  |  3  |
134  *
135  * #### Replacing text: char with chars
136  *
137  * ```
138  *     0123
139  * A: "abc"
140  *      |
141  *      |\
142  * B: "azzc"
143  *     01234
144  * ```
145  *
146  * Forward mapping:
147  *
148  * | from A: | 0   | 1   | 2   | 3   |
149  * |--------:|:---:|:---:|:---:|:---:|
150  * |   to B: |  0  |  1  |  3  |  4  |
151  *
152  * Reverse mapping:
153  *
154  * | from B: | 0   | 1   | 2   | 3   | 4   |
155  * |--------:|:---:|:---:|:---:|:---:|:---:|
156  * |   to A: |  0  |  1  |  1  |  2  |  3  |
157  *
158  * #### Replacing text: chars with chars
159  *
160  * ```
161  *     012345
162  * A: "abcde"
163  *      | |
164  *      | /
165  * B: "azze"
166  *     01234
167  * ```
168  *
169  * Forward mapping:
170  *
171  * | from A: | 0   | 1   | 2   | 3   | 4   | 5   |
172  * |--------:|:---:|:---:|:---:|:---:|:---:|:---:|
173  * |   to B: |  0  |  1  | 1-3 | 1-3 |  3  |  4  |
174  *
175  * Reverse mapping:
176  *
177  * | from B: | 0   | 1   | 2   | 3   | 4   |
178  * |--------:|:---:|:---:|:---:|:---:|:---:|
179  * |   to A: |  0  |  1  | 1-4 |  4  |  5  |
180  *
181  * ### Multiple operations
182  *
183  * While the above apply to single edit operations, when multiple edit operations are recorded the
184  * same rules apply. The rules are applied to the first operation, then the result of that is
185  * effectively used as the input text for the next operation, etc. Because an offset can map to a
186  * range at each step, we track both a start and end offset (which start as the same value), and at
187  * each step combine the start and end _ranges_ by taking their union.
188  *
189  * #### Multiple char-to-char replacements (codepoint transformation):
190  * ```
191  *     0123
192  * A: "abc"
193  *     |
194  *    "•bc"
195  *      |
196  *    "••c"
197  *       |
198  * B: "•••"
199  *     0123
200  * ```
201  *
202  * Forward/reverse mapping: identity
203  *
204  * | from: | 0   | 1   | 2   | 3   |
205  * |------:|:---:|:---:|:---:|:---:|
206  * |   to: |  0  |  1  |  2  |  3  |
207  *
208  * #### Multiple inserts:
209  * ```
210  *     01234
211  * A: "abcd"
212  *      |
213  *    "a(bcd"
214  *         |
215  * B: "a(bc)d"
216  *     0123456
217  * ```
218  *
219  * Forward mapping:
220  *
221  * | from A: | 0   | 1   | 2   | 3   | 4   |
222  * |--------:|:---:|:---:|:---:|:---:|:---:|
223  * |   to B: |  0  | 1-2 |  3  | 4-5 |  6  |
224  *
225  * Reverse mapping:
226  *
227  * | from B: | 0   | 1   | 2   | 3   | 4   | 5   | 6   |
228  * |--------:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
229  * |   to A: |  0  |  1  |  1  |  2  |  3  |  3  |  4  |
230  *
231  * #### Multiple replacements of the same range:
232  * ```
233  *     01234
234  * A: "abcd"
235  *      ||
236  *    "awxd"
237  *      ||
238  * B: "ayzd"
239  *     01234
240  * ```
241  *
242  * Forward mapping:
243  *
244  * | from A: | 0   | 1   | 2   | 3   | 4   |
245  * |--------:|:---:|:---:|:---:|:---:|:---:|
246  * |   to B: |  0  |  1  | 1-3 |  3  |  4  |
247  *
248  * Reverse mapping:
249  *
250  * | from B: | 0   | 1   | 2   | 3   | 4   |
251  * |--------:|:---:|:---:|:---:|:---:|:---:|
252  * |   to A: |  0  |  1  | 1-3 |  3  |  4  |
253  *
254  * For other edge cases, including overlapping replacements, see `OffsetMappingCalculatorTest`.
255  */
256 internal class OffsetMappingCalculator {
257     /** Resizable array of edit operations, size is defined by [opsSize]. */
258     private var ops = OpArray(size = 10)
259     private var opsSize = 0
260 
261     /**
262      * Records an edit operation that replaces the range from [sourceStart] (inclusive) to
263      * [sourceEnd] (exclusive) in the original text with some text with length [newLength].
264      */
265     fun recordEditOperation(sourceStart: Int, sourceEnd: Int, newLength: Int) {
266         requirePrecondition(newLength >= 0) { "Expected newLen to be ≥ 0, was $newLength" }
267         val sourceMin = minOf(sourceStart, sourceEnd)
268         val sourceMax = maxOf(sourceMin, sourceEnd)
269         val sourceLength = sourceMax - sourceMin
270 
271         // 0,0 is a noop, and 1,1 is always a 1-to-1 mapping so we don't need to record it.
272         if (sourceLength < 2 && sourceLength == newLength) return
273 
274         // Append the operation to the array, growing it if necessary.
275         val newSize = opsSize + 1
276         if (newSize > ops.size) {
277             val newCapacity = maxOf(newSize * 2, ops.size * 2)
278             ops = ops.copyOf(newCapacity)
279         }
280         ops.set(opsSize, sourceMin, sourceLength, newLength)
281         opsSize = newSize
282     }
283 
284     /**
285      * Maps an [offset] in the original string to the corresponding offset, or range of offsets, in
286      * the transformed string.
287      */
288     fun mapFromSource(offset: Int): TextRange = map(offset, fromSource = true)
289 
290     /**
291      * Maps an [offset] in the original string to the corresponding offset, or range of offsets, in
292      * the transformed string.
293      */
294     fun mapFromDest(offset: Int): TextRange = map(offset, fromSource = false)
295 
296     private fun map(offset: Int, fromSource: Boolean): TextRange {
297         var start = offset
298         var end = offset
299 
300         // This algorithm works for both forward and reverse mapping, we just need to iterate
301         // backwards to do reverse mapping.
302         ops.forEach(max = opsSize, reversed = !fromSource) { opOffset, opSrcLen, opDestLen ->
303             val newStart =
304                 mapStep(
305                     offset = start,
306                     opOffset = opOffset,
307                     untransformedLen = opSrcLen,
308                     transformedLen = opDestLen,
309                     fromSource = fromSource
310                 )
311             val newEnd =
312                 mapStep(
313                     offset = end,
314                     opOffset = opOffset,
315                     untransformedLen = opSrcLen,
316                     transformedLen = opDestLen,
317                     fromSource = fromSource
318                 )
319             // range = newStart ∪ newEnd
320             // Note we don't read TextRange.min/max here because the above code always returns
321             // normalized ranges. It's no less correct, but there's no need to do the additional
322             // min/max calls inside the min/max properties.
323             start = minOf(newStart.start, newEnd.start)
324             end = maxOf(newStart.end, newEnd.end)
325         }
326 
327         return TextRange(start, end)
328     }
329 
330     private fun mapStep(
331         offset: Int,
332         opOffset: Int,
333         untransformedLen: Int,
334         transformedLen: Int,
335         fromSource: Boolean
336     ): TextRange {
337         val srcLen = if (fromSource) untransformedLen else transformedLen
338         val destLen = if (fromSource) transformedLen else untransformedLen
339         return when {
340             // Before the operation, no change.
341             offset < opOffset -> TextRange(offset)
342             offset == opOffset -> {
343                 if (srcLen == 0) {
344                     // On insertion point, map to inserted range.
345                     TextRange(opOffset, opOffset + destLen)
346                 } else {
347                     // On start of replacement, map to start of replaced range.
348                     TextRange(opOffset)
349                 }
350             }
351             offset < opOffset + srcLen -> {
352                 if (destLen == 0) {
353                     // In deleted range, map to start of deleted range.
354                     TextRange(opOffset)
355                 } else {
356                     // In replaced range, map to transformed range.
357                     TextRange(opOffset, opOffset + destLen)
358                 }
359             }
360 
361             // On end of or after replaced range, offset the offset.
362             else -> TextRange(offset - srcLen + destLen)
363         }
364     }
365 }
366 
367 /**
368  * An array of 3-tuples of ints. Each element's values are stored as individual values in the
369  * underlying array.
370  */
371 @kotlin.jvm.JvmInline
372 private value class OpArray private constructor(private val values: IntArray) {
373     constructor(size: Int) : this(IntArray(size * OpArrayElementSize))
374 
375     val size: Int
376         get() = values.size / OpArrayElementSize
377 
setnull378     fun set(index: Int, offset: Int, srcLen: Int, destLen: Int) {
379         values[index * OpArrayElementSize] = offset
380         values[index * OpArrayElementSize + 1] = srcLen
381         values[index * OpArrayElementSize + 2] = destLen
382     }
383 
copyOfnull384     fun copyOf(newSize: Int) = OpArray(values.copyOf(newSize * OpArrayElementSize))
385 
386     /**
387      * Loops through the array between 0 and [max] (exclusive). If [reversed] is false (the
388      * default), iterates forward from 0. When it's true, iterates backwards from `max-1`.
389      */
390     inline fun forEach(
391         max: Int,
392         reversed: Boolean = false,
393         block: (offset: Int, srcLen: Int, destLen: Int) -> Unit
394     ) {
395         if (max < 0) return
396         // Note: This stamps out block twice at the callsite, which is normally bad for an inline
397         // function to do. However, this is a file-private function which is only called in one
398         // place that would need to have two copies of mostly-identical code anyway. Doing the
399         // duplication here keeps the more complicated logic at the callsite more readable.
400         if (reversed) {
401             for (i in max - 1 downTo 0) {
402                 val offset = values[i * OpArrayElementSize]
403                 val srcLen = values[i * OpArrayElementSize + 1]
404                 val destLen = values[i * OpArrayElementSize + 2]
405                 block(offset, srcLen, destLen)
406             }
407         } else {
408             for (i in 0 until max) {
409                 val offset = values[i * OpArrayElementSize]
410                 val srcLen = values[i * OpArrayElementSize + 1]
411                 val destLen = values[i * OpArrayElementSize + 2]
412                 block(offset, srcLen, destLen)
413             }
414         }
415     }
416 }
417 
418 private const val OpArrayElementSize = 3
419