• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2018 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 
5 // created: 2018may10 Markus W. Scherer
6 
7 package ohos.global.icu.util;
8 
9 import java.util.Iterator;
10 import java.util.NoSuchElementException;
11 
12 /**
13  * Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
14  * This does not implement java.util.Map.
15  *
16  * @hide exposed on OHOS
17  */
18 public abstract class CodePointMap implements Iterable<CodePointMap.Range> {
19     /**
20      * Selectors for how getRange() should report value ranges overlapping with surrogates.
21      * Most users should use NORMAL.
22      *
23      * @see #getRange
24      * @hide exposed on OHOS
25      */
26     public enum RangeOption {
27         /**
28          * getRange() enumerates all same-value ranges as stored in the map.
29          * Most users should use this option.
30          */
31         NORMAL,
32         /**
33          * getRange() enumerates all same-value ranges as stored in the map,
34          * except that lead surrogates (U+D800..U+DBFF) are treated as having the
35          * surrogateValue, which is passed to getRange() as a separate parameter.
36          * The surrogateValue is not transformed via filter().
37          * See {@link Character#isHighSurrogate}.
38          *
39          * <p>Most users should use NORMAL instead.
40          *
41          * <p>This option is useful for maps that map surrogate code *units* to
42          * special values optimized for UTF-16 string processing
43          * or for special error behavior for unpaired surrogates,
44          * but those values are not to be associated with the lead surrogate code *points*.
45          */
46         FIXED_LEAD_SURROGATES,
47         /**
48          * getRange() enumerates all same-value ranges as stored in the map,
49          * except that all surrogates (U+D800..U+DFFF) are treated as having the
50          * surrogateValue, which is passed to getRange() as a separate parameter.
51          * The surrogateValue is not transformed via filter().
52          * See {@link Character#isSurrogate}.
53          *
54          * <p>Most users should use NORMAL instead.
55          *
56          * <p>This option is useful for maps that map surrogate code *units* to
57          * special values optimized for UTF-16 string processing
58          * or for special error behavior for unpaired surrogates,
59          * but those values are not to be associated with the lead surrogate code *points*.
60          */
61         FIXED_ALL_SURROGATES
62     }
63 
64     /**
65      * Callback function interface: Modifies a map value.
66      * Optionally called by getRange().
67      * The modified value will be returned by the getRange() function.
68      *
69      * <p>Can be used to ignore some of the value bits,
70      * make a filter for one of several values,
71      * return a value index computed from the map value, etc.
72      *
73      * @see #getRange
74      * @see #iterator
75      * @hide exposed on OHOS
76      */
77     public interface ValueFilter {
78         /**
79          * Modifies the map value.
80          *
81          * @param value map value
82          * @return modified value
83          */
apply(int value)84         public int apply(int value);
85     }
86 
87     /**
88      * Range iteration result data.
89      * Code points from start to end map to the same value.
90      * The value may have been modified by {@link ValueFilter#apply(int)},
91      * or it may be the surrogateValue if a RangeOption other than "normal" was used.
92      *
93      * @see #getRange
94      * @see #iterator
95      * @hide exposed on OHOS
96      */
97     public static final class Range {
98         private int start;
99         private int end;
100         private int value;
101 
102         /**
103          * Constructor. Sets start and end to -1 and value to 0.
104          */
Range()105         public Range() {
106             start = end = -1;
107             value = 0;
108         }
109 
110         /**
111          * @return the start code point
112          */
getStart()113         public int getStart() { return start; }
114         /**
115          * @return the (inclusive) end code point
116          */
getEnd()117         public int getEnd() { return end; }
118         /**
119          * @return the range value
120          */
getValue()121         public int getValue() { return value; }
122         /**
123          * Sets the range. When using {@link #iterator()},
124          * iteration will resume after the newly set end.
125          *
126          * @param start new start code point
127          * @param end new end code point
128          * @param value new value
129          */
set(int start, int end, int value)130         public void set(int start, int end, int value) {
131             this.start = start;
132             this.end = end;
133             this.value = value;
134         }
135     }
136 
137     private final class RangeIterator implements Iterator<Range> {
138         private Range range = new Range();
139 
140         @Override
hasNext()141         public boolean hasNext() {
142             return -1 <= range.end && range.end < 0x10ffff;
143         }
144 
145         @Override
next()146         public Range next() {
147             if (getRange(range.end + 1, null, range)) {
148                 return range;
149             } else {
150                 throw new NoSuchElementException();
151             }
152         }
153 
154         @Override
remove()155         public final void remove() {
156             throw new UnsupportedOperationException();
157         }
158     }
159 
160     /**
161      * Iterates over code points of a string and fetches map values.
162      * This does not implement java.util.Iterator.
163      *
164      * <pre>
165      * void onString(CodePointMap map, CharSequence s, int start) {
166      *     CodePointMap.StringIterator iter = map.stringIterator(s, start);
167      *     while (iter.next()) {
168      *         int end = iter.getIndex();  // code point from between start and end
169      *         useValue(s, start, end, iter.getCodePoint(), iter.getValue());
170      *         start = end;
171      *     }
172      * }
173      * </pre>
174      *
175      * <p>This class is not intended for public subclassing.
176      *
177      * @hide exposed on OHOS
178      */
179     public class StringIterator {
180         /**
181          * @deprecated This API is ICU internal only.
182          * @hide draft / provisional / internal are hidden on OHOS
183          */
184         @Deprecated
185         protected CharSequence s;
186         /**
187          * @deprecated This API is ICU internal only.
188          * @hide draft / provisional / internal are hidden on OHOS
189          */
190         @Deprecated
191         protected int sIndex;
192         /**
193          * @deprecated This API is ICU internal only.
194          * @hide draft / provisional / internal are hidden on OHOS
195          */
196         @Deprecated
197         protected int c;
198         /**
199          * @deprecated This API is ICU internal only.
200          * @hide draft / provisional / internal are hidden on OHOS
201          */
202         @Deprecated
203         protected int value;
204 
205         /**
206          * @deprecated This API is ICU internal only.
207          * @hide draft / provisional / internal are hidden on OHOS
208          */
209         @Deprecated
StringIterator(CharSequence s, int sIndex)210         protected StringIterator(CharSequence s, int sIndex) {
211             this.s = s;
212             this.sIndex = sIndex;
213             c = -1;
214             value = 0;
215         }
216 
217         /**
218          * Resets the iterator to a new string and/or a new string index.
219          *
220          * @param s string to iterate over
221          * @param sIndex string index where the iteration will start
222          */
reset(CharSequence s, int sIndex)223         public void reset(CharSequence s, int sIndex) {
224             this.s = s;
225             this.sIndex = sIndex;
226             c = -1;
227             value = 0;
228         }
229 
230         /**
231          * Reads the next code point, post-increments the string index,
232          * and gets a value from the map.
233          * Sets an implementation-defined error value if the code point is an unpaired surrogate.
234          *
235          * @return true if the string index was not yet at the end of the string;
236          *         otherwise the iterator did not advance
237          */
next()238         public boolean next() {
239             if (sIndex >= s.length()) {
240                 return false;
241             }
242             c = Character.codePointAt(s, sIndex);
243             sIndex += Character.charCount(c);
244             value = get(c);
245             return true;
246         }
247 
248         /**
249          * Reads the previous code point, pre-decrements the string index,
250          * and gets a value from the map.
251          * Sets an implementation-defined error value if the code point is an unpaired surrogate.
252          *
253          * @return true if the string index was not yet at the start of the string;
254          *         otherwise the iterator did not advance
255          */
previous()256         public boolean previous() {
257             if (sIndex <= 0) {
258                 return false;
259             }
260             c = Character.codePointBefore(s, sIndex);
261             sIndex -= Character.charCount(c);
262             value = get(c);
263             return true;
264         }
265         /**
266          * @return the string index
267          */
getIndex()268         public final int getIndex() { return sIndex; }
269         /**
270          * @return the code point
271          */
getCodePoint()272         public final int getCodePoint() { return c; }
273         /**
274          * @return the map value,
275          *         or an implementation-defined error value if
276          *         the code point is an unpaired surrogate
277          */
getValue()278         public final int getValue() { return value; }
279     }
280 
281     /**
282      * Protected no-args constructor.
283      */
CodePointMap()284     protected CodePointMap() {
285     }
286 
287     /**
288      * Returns the value for a code point as stored in the map, with range checking.
289      * Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
290      *
291      * @param c the code point
292      * @return the map value,
293      *         or an implementation-defined error value if
294      *         the code point is not in the range 0..U+10FFFF
295      */
get(int c)296     public abstract int get(int c);
297 
298     /**
299      * Sets the range object to a range of code points beginning with the start parameter.
300      * The range start is the same as the start input parameter
301      * (even if there are preceding code points that have the same value).
302      * The range end is the last code point such that
303      * all those from start to there have the same value.
304      * Returns false if start is not 0..U+10FFFF.
305      * Can be used to efficiently iterate over all same-value ranges in a map.
306      * (This is normally faster than iterating over code points and get()ting each value,
307      * but may be much slower than a data structure that stores ranges directly.)
308      *
309      * <p>If the {@link ValueFilter} parameter is not null, then
310      * the value to be delivered is passed through that filter, and the return value is the end
311      * of the range where all values are modified to the same actual value.
312      * The value is unchanged if that parameter is null.
313      *
314      * <p>Example:
315      * <pre>
316      * int start = 0;
317      * CodePointMap.Range range = new CodePointMap.Range();
318      * while (map.getRange(start, null, range)) {
319      *     int end = range.getEnd();
320      *     int value = range.getValue();
321      *     // Work with the range start..end and its value.
322      *     start = end + 1;
323      * }
324      * </pre>
325      *
326      * @param start range start
327      * @param filter an object that may modify the map data value,
328      *     or null if the values from the map are to be used unmodified
329      * @param range the range object that will be set to the code point range and value
330      * @return true if start is 0..U+10FFFF; otherwise no new range is fetched
331      */
getRange(int start, ValueFilter filter, Range range)332     public abstract boolean getRange(int start, ValueFilter filter, Range range);
333 
334     /**
335      * Sets the range object to a range of code points beginning with the start parameter.
336      * The range start is the same as the start input parameter
337      * (even if there are preceding code points that have the same value).
338      * The range end is the last code point such that
339      * all those from start to there have the same value.
340      * Returns false if start is not 0..U+10FFFF.
341      *
342      * <p>Same as the simpler {@link #getRange(int, ValueFilter, Range)} but optionally
343      * modifies the range if it overlaps with surrogate code points.
344      *
345      * @param start range start
346      * @param option defines whether surrogates are treated normally,
347      *               or as having the surrogateValue; usually {@link RangeOption#NORMAL}
348      * @param surrogateValue value for surrogates; ignored if option=={@link RangeOption#NORMAL}
349      * @param filter an object that may modify the map data value,
350      *     or null if the values from the map are to be used unmodified
351      * @param range the range object that will be set to the code point range and value
352      * @return true if start is 0..U+10FFFF; otherwise no new range is fetched
353      */
getRange(int start, RangeOption option, int surrogateValue, ValueFilter filter, Range range)354     public boolean getRange(int start, RangeOption option, int surrogateValue,
355             ValueFilter filter, Range range) {
356         assert option != null;
357         if (!getRange(start, filter, range)) {
358             return false;
359         }
360         if (option == RangeOption.NORMAL) {
361             return true;
362         }
363         int surrEnd = option == RangeOption.FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff;
364         int end = range.end;
365         if (end < 0xd7ff || start > surrEnd) {
366             return true;
367         }
368         // The range overlaps with surrogates, or ends just before the first one.
369         if (range.value == surrogateValue) {
370             if (end >= surrEnd) {
371                 // Surrogates followed by a non-surrValue range,
372                 // or surrogates are part of a larger surrValue range.
373                 return true;
374             }
375         } else {
376             if (start <= 0xd7ff) {
377                 range.end = 0xd7ff;  // Non-surrValue range ends before surrValue surrogates.
378                 return true;
379             }
380             // Start is a surrogate with a non-surrValue code *unit* value.
381             // Return a surrValue code *point* range.
382             range.value = surrogateValue;
383             if (end > surrEnd) {
384                 range.end = surrEnd;  // Surrogate range ends before non-surrValue rest of range.
385                 return true;
386             }
387         }
388         // See if the surrValue surrogate range can be merged with
389         // an immediately following range.
390         if (getRange(surrEnd + 1, filter, range) && range.value == surrogateValue) {
391             range.start = start;
392             return true;
393         }
394         range.start = start;
395         range.end = surrEnd;
396         range.value = surrogateValue;
397         return true;
398     }
399 
400     /**
401      * Convenience iterator over same-map-value code point ranges.
402      * Same as looping over all ranges with {@link #getRange(int, ValueFilter, Range)}
403      * without filtering.
404      * Adjacent ranges have different map values.
405      *
406      * <p>The iterator always returns the same Range object.
407      *
408      * @return a Range iterator
409      */
410     @Override
iterator()411     public Iterator<Range> iterator() {
412         return new RangeIterator();
413     }
414 
415     /**
416      * Returns an iterator (not a java.util.Iterator) over code points of a string
417      * for fetching map values.
418      *
419      * @param s string to iterate over
420      * @param sIndex string index where the iteration will start
421      * @return the iterator
422      */
stringIterator(CharSequence s, int sIndex)423     public StringIterator stringIterator(CharSequence s, int sIndex) {
424         return new StringIterator(s, sIndex);
425     }
426 }
427