1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ********************************************************************** 6 * Copyright (c) 2003-2011, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ********************************************************************** 9 * Author: Alan Liu 10 * Created: September 23 2003 11 * Since: ICU 2.8 12 ********************************************************************** 13 */ 14 package ohos.global.icu.impl; 15 16 import java.text.ParsePosition; 17 18 import ohos.global.icu.text.SymbolTable; 19 import ohos.global.icu.text.UTF16; 20 21 /** 22 * An iterator that returns 32-bit code points. This class is deliberately 23 * <em>not</em> related to any of the JDK or ICU4J character iterator classes 24 * in order to minimize complexity. 25 * @author Alan Liu 26 * @hide exposed on OHOS 27 */ 28 public class RuleCharacterIterator { 29 30 // TODO: Ideas for later. (Do not implement if not needed, lest the 31 // code coverage numbers go down due to unused methods.) 32 // 1. Add a copy constructor, equals() method, clone() method. 33 // 2. Rather than return DONE, throw an exception if the end 34 // is reached -- this is an alternate usage model, probably not useful. 35 // 3. Return isEscaped from next(). If this happens, 36 // don't keep an isEscaped member variable. 37 38 /** 39 * Text being iterated. 40 */ 41 private String text; 42 43 /** 44 * Position of iterator. 45 */ 46 private ParsePosition pos; 47 48 /** 49 * Symbol table used to parse and dereference variables. May be null. 50 */ 51 private SymbolTable sym; 52 53 /** 54 * Current variable expansion, or null if none. 55 */ 56 private char[] buf; 57 58 /** 59 * Position within buf[]. Meaningless if buf == null. 60 */ 61 private int bufPos; 62 63 /** 64 * Flag indicating whether the last character was parsed from an escape. 65 */ 66 private boolean isEscaped; 67 68 /** 69 * Value returned when there are no more characters to iterate. 70 */ 71 public static final int DONE = -1; 72 73 /** 74 * Bitmask option to enable parsing of variable names. If (options & 75 * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to 76 * its value. Variables are parsed using the SymbolTable API. 77 */ 78 public static final int PARSE_VARIABLES = 1; 79 80 /** 81 * Bitmask option to enable parsing of escape sequences. If (options & 82 * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded 83 * to its value. Escapes are parsed using Utility.unescapeAt(). 84 */ 85 public static final int PARSE_ESCAPES = 2; 86 87 /** 88 * Bitmask option to enable skipping of whitespace. If (options & 89 * SKIP_WHITESPACE) != 0, then Unicode Pattern_White_Space characters will be silently 90 * skipped, as if they were not present in the input. 91 */ 92 public static final int SKIP_WHITESPACE = 4; 93 94 /** 95 * Constructs an iterator over the given text, starting at the given 96 * position. 97 * @param text the text to be iterated 98 * @param sym the symbol table, or null if there is none. If sym is null, 99 * then variables will not be deferenced, even if the PARSE_VARIABLES 100 * option is set. 101 * @param pos upon input, the index of the next character to return. If a 102 * variable has been dereferenced, then pos will <em>not</em> increment as 103 * characters of the variable value are iterated. 104 */ RuleCharacterIterator(String text, SymbolTable sym, ParsePosition pos)105 public RuleCharacterIterator(String text, SymbolTable sym, 106 ParsePosition pos) { 107 if (text == null || pos.getIndex() > text.length()) { 108 throw new IllegalArgumentException(); 109 } 110 this.text = text; 111 this.sym = sym; 112 this.pos = pos; 113 buf = null; 114 } 115 116 /** 117 * Returns true if this iterator has no more characters to return. 118 */ atEnd()119 public boolean atEnd() { 120 return buf == null && pos.getIndex() == text.length(); 121 } 122 123 /** 124 * Returns the next character using the given options, or DONE if there 125 * are no more characters, and advance the position to the next 126 * character. 127 * @param options one or more of the following options, bitwise-OR-ed 128 * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE. 129 * @return the current 32-bit code point, or DONE 130 */ next(int options)131 public int next(int options) { 132 int c = DONE; 133 isEscaped = false; 134 135 for (;;) { 136 c = _current(); 137 _advance(UTF16.getCharCount(c)); 138 139 if (c == SymbolTable.SYMBOL_REF && buf == null && 140 (options & PARSE_VARIABLES) != 0 && sym != null) { 141 String name = sym.parseReference(text, pos, text.length()); 142 // If name == null there was an isolated SYMBOL_REF; 143 // return it. Caller must be prepared for this. 144 if (name == null) { 145 break; 146 } 147 bufPos = 0; 148 buf = sym.lookup(name); 149 if (buf == null) { 150 throw new IllegalArgumentException( 151 "Undefined variable: " + name); 152 } 153 // Handle empty variable value 154 if (buf.length == 0) { 155 buf = null; 156 } 157 continue; 158 } 159 160 if ((options & SKIP_WHITESPACE) != 0 && 161 PatternProps.isWhiteSpace(c)) { 162 continue; 163 } 164 165 if (c == '\\' && (options & PARSE_ESCAPES) != 0) { 166 int offset[] = new int[] { 0 }; 167 c = Utility.unescapeAt(lookahead(), offset); 168 jumpahead(offset[0]); 169 isEscaped = true; 170 if (c < 0) { 171 throw new IllegalArgumentException("Invalid escape"); 172 } 173 } 174 175 break; 176 } 177 178 return c; 179 } 180 181 /** 182 * Returns true if the last character returned by next() was 183 * escaped. This will only be the case if the option passed in to 184 * next() included PARSE_ESCAPED and the next character was an 185 * escape sequence. 186 */ isEscaped()187 public boolean isEscaped() { 188 return isEscaped; 189 } 190 191 /** 192 * Returns true if this iterator is currently within a variable expansion. 193 */ inVariable()194 public boolean inVariable() { 195 return buf != null; 196 } 197 198 /** 199 * Returns an object which, when later passed to setPos(), will 200 * restore this iterator's position. Usage idiom: 201 * 202 * RuleCharacterIterator iterator = ...; 203 * Object pos = iterator.getPos(null); // allocate position object 204 * for (;;) { 205 * pos = iterator.getPos(pos); // reuse position object 206 * int c = iterator.next(...); 207 * ... 208 * } 209 * iterator.setPos(pos); 210 * 211 * @param p a position object previously returned by getPos(), 212 * or null. If not null, it will be updated and returned. If 213 * null, a new position object will be allocated and returned. 214 * @return a position object which may be passed to setPos(), 215 * either `p,' or if `p' == null, a newly-allocated object 216 */ getPos(Object p)217 public Object getPos(Object p) { 218 if (p == null) { 219 return new Object[] {buf, new int[] {pos.getIndex(), bufPos}}; 220 } 221 Object[] a = (Object[]) p; 222 a[0] = buf; 223 int[] v = (int[]) a[1]; 224 v[0] = pos.getIndex(); 225 v[1] = bufPos; 226 return p; 227 } 228 229 /** 230 * Restores this iterator to the position it had when getPos() 231 * returned the given object. 232 * @param p a position object previously returned by getPos() 233 */ setPos(Object p)234 public void setPos(Object p) { 235 Object[] a = (Object[]) p; 236 buf = (char[]) a[0]; 237 int[] v = (int[]) a[1]; 238 pos.setIndex(v[0]); 239 bufPos = v[1]; 240 } 241 242 /** 243 * Skips ahead past any ignored characters, as indicated by the given 244 * options. This is useful in conjunction with the lookahead() method. 245 * 246 * Currently, this only has an effect for SKIP_WHITESPACE. 247 * @param options one or more of the following options, bitwise-OR-ed 248 * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE. 249 */ skipIgnored(int options)250 public void skipIgnored(int options) { 251 if ((options & SKIP_WHITESPACE) != 0) { 252 for (;;) { 253 int a = _current(); 254 if (!PatternProps.isWhiteSpace(a)) break; 255 _advance(UTF16.getCharCount(a)); 256 } 257 } 258 } 259 260 /** 261 * Returns a string containing the remainder of the characters to be 262 * returned by this iterator, without any option processing. If the 263 * iterator is currently within a variable expansion, this will only 264 * extend to the end of the variable expansion. This method is provided 265 * so that iterators may interoperate with string-based APIs. The typical 266 * sequence of calls is to call skipIgnored(), then call lookahead(), then 267 * parse the string returned by lookahead(), then call jumpahead() to 268 * resynchronize the iterator. 269 * @return a string containing the characters to be returned by future 270 * calls to next() 271 */ lookahead()272 public String lookahead() { 273 if (buf != null) { 274 return new String(buf, bufPos, buf.length - bufPos); 275 } else { 276 return text.substring(pos.getIndex()); 277 } 278 } 279 280 /** 281 * Advances the position by the given number of 16-bit code units. 282 * This is useful in conjunction with the lookahead() method. 283 * @param count the number of 16-bit code units to jump over 284 */ jumpahead(int count)285 public void jumpahead(int count) { 286 if (count < 0) { 287 throw new IllegalArgumentException(); 288 } 289 if (buf != null) { 290 bufPos += count; 291 if (bufPos > buf.length) { 292 throw new IllegalArgumentException(); 293 } 294 if (bufPos == buf.length) { 295 buf = null; 296 } 297 } else { 298 int i = pos.getIndex() + count; 299 pos.setIndex(i); 300 if (i > text.length()) { 301 throw new IllegalArgumentException(); 302 } 303 } 304 } 305 306 /** 307 * Returns a string representation of this object, consisting of the 308 * characters being iterated, with a '|' marking the current position. 309 * Position within an expanded variable is <em>not</em> indicated. 310 * @return a string representation of this object 311 */ 312 @Override toString()313 public String toString() { 314 int b = pos.getIndex(); 315 return text.substring(0, b) + '|' + text.substring(b); 316 } 317 318 /** 319 * Returns the current 32-bit code point without parsing escapes, parsing 320 * variables, or skipping whitespace. 321 * @return the current 32-bit code point 322 */ _current()323 private int _current() { 324 if (buf != null) { 325 return UTF16.charAt(buf, 0, buf.length, bufPos); 326 } else { 327 int i = pos.getIndex(); 328 return (i < text.length()) ? UTF16.charAt(text, i) : DONE; 329 } 330 } 331 332 /** 333 * Advances the position by the given amount. 334 * @param count the number of 16-bit code units to advance past 335 */ _advance(int count)336 private void _advance(int count) { 337 if (buf != null) { 338 bufPos += count; 339 if (bufPos == buf.length) { 340 buf = null; 341 } 342 } else { 343 pos.setIndex(pos.getIndex() + count); 344 if (pos.getIndex() > text.length()) { 345 pos.setIndex(text.length()); 346 } 347 } 348 } 349 }