1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2018 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 5 // created: 2018may10 Markus W. Scherer 6 7 package ohos.global.icu.util; 8 9 import java.util.Iterator; 10 import java.util.NoSuchElementException; 11 12 /** 13 * Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values. 14 * This does not implement java.util.Map. 15 * 16 * @hide exposed on OHOS 17 */ 18 public abstract class CodePointMap implements Iterable<CodePointMap.Range> { 19 /** 20 * Selectors for how getRange() should report value ranges overlapping with surrogates. 21 * Most users should use NORMAL. 22 * 23 * @see #getRange 24 * @hide exposed on OHOS 25 */ 26 public enum RangeOption { 27 /** 28 * getRange() enumerates all same-value ranges as stored in the map. 29 * Most users should use this option. 30 */ 31 NORMAL, 32 /** 33 * getRange() enumerates all same-value ranges as stored in the map, 34 * except that lead surrogates (U+D800..U+DBFF) are treated as having the 35 * surrogateValue, which is passed to getRange() as a separate parameter. 36 * The surrogateValue is not transformed via filter(). 37 * See {@link Character#isHighSurrogate}. 38 * 39 * <p>Most users should use NORMAL instead. 40 * 41 * <p>This option is useful for maps that map surrogate code *units* to 42 * special values optimized for UTF-16 string processing 43 * or for special error behavior for unpaired surrogates, 44 * but those values are not to be associated with the lead surrogate code *points*. 45 */ 46 FIXED_LEAD_SURROGATES, 47 /** 48 * getRange() enumerates all same-value ranges as stored in the map, 49 * except that all surrogates (U+D800..U+DFFF) are treated as having the 50 * surrogateValue, which is passed to getRange() as a separate parameter. 51 * The surrogateValue is not transformed via filter(). 52 * See {@link Character#isSurrogate}. 53 * 54 * <p>Most users should use NORMAL instead. 55 * 56 * <p>This option is useful for maps that map surrogate code *units* to 57 * special values optimized for UTF-16 string processing 58 * or for special error behavior for unpaired surrogates, 59 * but those values are not to be associated with the lead surrogate code *points*. 60 */ 61 FIXED_ALL_SURROGATES 62 } 63 64 /** 65 * Callback function interface: Modifies a map value. 66 * Optionally called by getRange(). 67 * The modified value will be returned by the getRange() function. 68 * 69 * <p>Can be used to ignore some of the value bits, 70 * make a filter for one of several values, 71 * return a value index computed from the map value, etc. 72 * 73 * @see #getRange 74 * @see #iterator 75 * @hide exposed on OHOS 76 */ 77 public interface ValueFilter { 78 /** 79 * Modifies the map value. 80 * 81 * @param value map value 82 * @return modified value 83 */ apply(int value)84 public int apply(int value); 85 } 86 87 /** 88 * Range iteration result data. 89 * Code points from start to end map to the same value. 90 * The value may have been modified by {@link ValueFilter#apply(int)}, 91 * or it may be the surrogateValue if a RangeOption other than "normal" was used. 92 * 93 * @see #getRange 94 * @see #iterator 95 * @hide exposed on OHOS 96 */ 97 public static final class Range { 98 private int start; 99 private int end; 100 private int value; 101 102 /** 103 * Constructor. Sets start and end to -1 and value to 0. 104 */ Range()105 public Range() { 106 start = end = -1; 107 value = 0; 108 } 109 110 /** 111 * @return the start code point 112 */ getStart()113 public int getStart() { return start; } 114 /** 115 * @return the (inclusive) end code point 116 */ getEnd()117 public int getEnd() { return end; } 118 /** 119 * @return the range value 120 */ getValue()121 public int getValue() { return value; } 122 /** 123 * Sets the range. When using {@link #iterator()}, 124 * iteration will resume after the newly set end. 125 * 126 * @param start new start code point 127 * @param end new end code point 128 * @param value new value 129 */ set(int start, int end, int value)130 public void set(int start, int end, int value) { 131 this.start = start; 132 this.end = end; 133 this.value = value; 134 } 135 } 136 137 private final class RangeIterator implements Iterator<Range> { 138 private Range range = new Range(); 139 140 @Override hasNext()141 public boolean hasNext() { 142 return -1 <= range.end && range.end < 0x10ffff; 143 } 144 145 @Override next()146 public Range next() { 147 if (getRange(range.end + 1, null, range)) { 148 return range; 149 } else { 150 throw new NoSuchElementException(); 151 } 152 } 153 154 @Override remove()155 public final void remove() { 156 throw new UnsupportedOperationException(); 157 } 158 } 159 160 /** 161 * Iterates over code points of a string and fetches map values. 162 * This does not implement java.util.Iterator. 163 * 164 * <pre> 165 * void onString(CodePointMap map, CharSequence s, int start) { 166 * CodePointMap.StringIterator iter = map.stringIterator(s, start); 167 * while (iter.next()) { 168 * int end = iter.getIndex(); // code point from between start and end 169 * useValue(s, start, end, iter.getCodePoint(), iter.getValue()); 170 * start = end; 171 * } 172 * } 173 * </pre> 174 * 175 * <p>This class is not intended for public subclassing. 176 * 177 * @hide exposed on OHOS 178 */ 179 public class StringIterator { 180 /** 181 * @deprecated This API is ICU internal only. 182 * @hide draft / provisional / internal are hidden on OHOS 183 */ 184 @Deprecated 185 protected CharSequence s; 186 /** 187 * @deprecated This API is ICU internal only. 188 * @hide draft / provisional / internal are hidden on OHOS 189 */ 190 @Deprecated 191 protected int sIndex; 192 /** 193 * @deprecated This API is ICU internal only. 194 * @hide draft / provisional / internal are hidden on OHOS 195 */ 196 @Deprecated 197 protected int c; 198 /** 199 * @deprecated This API is ICU internal only. 200 * @hide draft / provisional / internal are hidden on OHOS 201 */ 202 @Deprecated 203 protected int value; 204 205 /** 206 * @deprecated This API is ICU internal only. 207 * @hide draft / provisional / internal are hidden on OHOS 208 */ 209 @Deprecated StringIterator(CharSequence s, int sIndex)210 protected StringIterator(CharSequence s, int sIndex) { 211 this.s = s; 212 this.sIndex = sIndex; 213 c = -1; 214 value = 0; 215 } 216 217 /** 218 * Resets the iterator to a new string and/or a new string index. 219 * 220 * @param s string to iterate over 221 * @param sIndex string index where the iteration will start 222 */ reset(CharSequence s, int sIndex)223 public void reset(CharSequence s, int sIndex) { 224 this.s = s; 225 this.sIndex = sIndex; 226 c = -1; 227 value = 0; 228 } 229 230 /** 231 * Reads the next code point, post-increments the string index, 232 * and gets a value from the map. 233 * Sets an implementation-defined error value if the code point is an unpaired surrogate. 234 * 235 * @return true if the string index was not yet at the end of the string; 236 * otherwise the iterator did not advance 237 */ next()238 public boolean next() { 239 if (sIndex >= s.length()) { 240 return false; 241 } 242 c = Character.codePointAt(s, sIndex); 243 sIndex += Character.charCount(c); 244 value = get(c); 245 return true; 246 } 247 248 /** 249 * Reads the previous code point, pre-decrements the string index, 250 * and gets a value from the map. 251 * Sets an implementation-defined error value if the code point is an unpaired surrogate. 252 * 253 * @return true if the string index was not yet at the start of the string; 254 * otherwise the iterator did not advance 255 */ previous()256 public boolean previous() { 257 if (sIndex <= 0) { 258 return false; 259 } 260 c = Character.codePointBefore(s, sIndex); 261 sIndex -= Character.charCount(c); 262 value = get(c); 263 return true; 264 } 265 /** 266 * @return the string index 267 */ getIndex()268 public final int getIndex() { return sIndex; } 269 /** 270 * @return the code point 271 */ getCodePoint()272 public final int getCodePoint() { return c; } 273 /** 274 * @return the map value, 275 * or an implementation-defined error value if 276 * the code point is an unpaired surrogate 277 */ getValue()278 public final int getValue() { return value; } 279 } 280 281 /** 282 * Protected no-args constructor. 283 */ CodePointMap()284 protected CodePointMap() { 285 } 286 287 /** 288 * Returns the value for a code point as stored in the map, with range checking. 289 * Returns an implementation-defined error value if c is not in the range 0..U+10FFFF. 290 * 291 * @param c the code point 292 * @return the map value, 293 * or an implementation-defined error value if 294 * the code point is not in the range 0..U+10FFFF 295 */ get(int c)296 public abstract int get(int c); 297 298 /** 299 * Sets the range object to a range of code points beginning with the start parameter. 300 * The range start is the same as the start input parameter 301 * (even if there are preceding code points that have the same value). 302 * The range end is the last code point such that 303 * all those from start to there have the same value. 304 * Returns false if start is not 0..U+10FFFF. 305 * Can be used to efficiently iterate over all same-value ranges in a map. 306 * (This is normally faster than iterating over code points and get()ting each value, 307 * but may be much slower than a data structure that stores ranges directly.) 308 * 309 * <p>If the {@link ValueFilter} parameter is not null, then 310 * the value to be delivered is passed through that filter, and the return value is the end 311 * of the range where all values are modified to the same actual value. 312 * The value is unchanged if that parameter is null. 313 * 314 * <p>Example: 315 * <pre> 316 * int start = 0; 317 * CodePointMap.Range range = new CodePointMap.Range(); 318 * while (map.getRange(start, null, range)) { 319 * int end = range.getEnd(); 320 * int value = range.getValue(); 321 * // Work with the range start..end and its value. 322 * start = end + 1; 323 * } 324 * </pre> 325 * 326 * @param start range start 327 * @param filter an object that may modify the map data value, 328 * or null if the values from the map are to be used unmodified 329 * @param range the range object that will be set to the code point range and value 330 * @return true if start is 0..U+10FFFF; otherwise no new range is fetched 331 */ getRange(int start, ValueFilter filter, Range range)332 public abstract boolean getRange(int start, ValueFilter filter, Range range); 333 334 /** 335 * Sets the range object to a range of code points beginning with the start parameter. 336 * The range start is the same as the start input parameter 337 * (even if there are preceding code points that have the same value). 338 * The range end is the last code point such that 339 * all those from start to there have the same value. 340 * Returns false if start is not 0..U+10FFFF. 341 * 342 * <p>Same as the simpler {@link #getRange(int, ValueFilter, Range)} but optionally 343 * modifies the range if it overlaps with surrogate code points. 344 * 345 * @param start range start 346 * @param option defines whether surrogates are treated normally, 347 * or as having the surrogateValue; usually {@link RangeOption#NORMAL} 348 * @param surrogateValue value for surrogates; ignored if option=={@link RangeOption#NORMAL} 349 * @param filter an object that may modify the map data value, 350 * or null if the values from the map are to be used unmodified 351 * @param range the range object that will be set to the code point range and value 352 * @return true if start is 0..U+10FFFF; otherwise no new range is fetched 353 */ getRange(int start, RangeOption option, int surrogateValue, ValueFilter filter, Range range)354 public boolean getRange(int start, RangeOption option, int surrogateValue, 355 ValueFilter filter, Range range) { 356 assert option != null; 357 if (!getRange(start, filter, range)) { 358 return false; 359 } 360 if (option == RangeOption.NORMAL) { 361 return true; 362 } 363 int surrEnd = option == RangeOption.FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff; 364 int end = range.end; 365 if (end < 0xd7ff || start > surrEnd) { 366 return true; 367 } 368 // The range overlaps with surrogates, or ends just before the first one. 369 if (range.value == surrogateValue) { 370 if (end >= surrEnd) { 371 // Surrogates followed by a non-surrValue range, 372 // or surrogates are part of a larger surrValue range. 373 return true; 374 } 375 } else { 376 if (start <= 0xd7ff) { 377 range.end = 0xd7ff; // Non-surrValue range ends before surrValue surrogates. 378 return true; 379 } 380 // Start is a surrogate with a non-surrValue code *unit* value. 381 // Return a surrValue code *point* range. 382 range.value = surrogateValue; 383 if (end > surrEnd) { 384 range.end = surrEnd; // Surrogate range ends before non-surrValue rest of range. 385 return true; 386 } 387 } 388 // See if the surrValue surrogate range can be merged with 389 // an immediately following range. 390 if (getRange(surrEnd + 1, filter, range) && range.value == surrogateValue) { 391 range.start = start; 392 return true; 393 } 394 range.start = start; 395 range.end = surrEnd; 396 range.value = surrogateValue; 397 return true; 398 } 399 400 /** 401 * Convenience iterator over same-map-value code point ranges. 402 * Same as looping over all ranges with {@link #getRange(int, ValueFilter, Range)} 403 * without filtering. 404 * Adjacent ranges have different map values. 405 * 406 * <p>The iterator always returns the same Range object. 407 * 408 * @return a Range iterator 409 */ 410 @Override iterator()411 public Iterator<Range> iterator() { 412 return new RangeIterator(); 413 } 414 415 /** 416 * Returns an iterator (not a java.util.Iterator) over code points of a string 417 * for fetching map values. 418 * 419 * @param s string to iterate over 420 * @param sIndex string index where the iteration will start 421 * @return the iterator 422 */ stringIterator(CharSequence s, int sIndex)423 public StringIterator stringIterator(CharSequence s, int sIndex) { 424 return new StringIterator(s, sIndex); 425 } 426 } 427