• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 **********************************************************************
6 * Copyright (c) 2003-2011, International Business Machines
7 * Corporation and others.  All Rights Reserved.
8 **********************************************************************
9 * Author: Alan Liu
10 * Created: September 23 2003
11 * Since: ICU 2.8
12 **********************************************************************
13 */
14 package ohos.global.icu.impl;
15 
16 import java.text.ParsePosition;
17 
18 import ohos.global.icu.text.SymbolTable;
19 import ohos.global.icu.text.UTF16;
20 
21 /**
22  * An iterator that returns 32-bit code points.  This class is deliberately
23  * <em>not</em> related to any of the JDK or ICU4J character iterator classes
24  * in order to minimize complexity.
25  * @author Alan Liu
26  * @hide exposed on OHOS
27  */
28 public class RuleCharacterIterator {
29 
30     // TODO: Ideas for later.  (Do not implement if not needed, lest the
31     // code coverage numbers go down due to unused methods.)
32     // 1. Add a copy constructor, equals() method, clone() method.
33     // 2. Rather than return DONE, throw an exception if the end
34     // is reached -- this is an alternate usage model, probably not useful.
35     // 3. Return isEscaped from next().  If this happens,
36     // don't keep an isEscaped member variable.
37 
38     /**
39      * Text being iterated.
40      */
41     private String text;
42 
43     /**
44      * Position of iterator.
45      */
46     private ParsePosition pos;
47 
48     /**
49      * Symbol table used to parse and dereference variables.  May be null.
50      */
51     private SymbolTable sym;
52 
53     /**
54      * Current variable expansion, or null if none.
55      */
56     private char[] buf;
57 
58     /**
59      * Position within buf[].  Meaningless if buf == null.
60      */
61     private int bufPos;
62 
63     /**
64      * Flag indicating whether the last character was parsed from an escape.
65      */
66     private boolean isEscaped;
67 
68     /**
69      * Value returned when there are no more characters to iterate.
70      */
71     public static final int DONE = -1;
72 
73     /**
74      * Bitmask option to enable parsing of variable names.  If (options &
75      * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to
76      * its value.  Variables are parsed using the SymbolTable API.
77      */
78     public static final int PARSE_VARIABLES = 1;
79 
80     /**
81      * Bitmask option to enable parsing of escape sequences.  If (options &
82      * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded
83      * to its value.  Escapes are parsed using Utility.unescapeAt().
84      */
85     public static final int PARSE_ESCAPES   = 2;
86 
87     /**
88      * Bitmask option to enable skipping of whitespace.  If (options &
89      * SKIP_WHITESPACE) != 0, then Unicode Pattern_White_Space characters will be silently
90      * skipped, as if they were not present in the input.
91      */
92     public static final int SKIP_WHITESPACE = 4;
93 
94     /**
95      * Constructs an iterator over the given text, starting at the given
96      * position.
97      * @param text the text to be iterated
98      * @param sym the symbol table, or null if there is none.  If sym is null,
99      * then variables will not be deferenced, even if the PARSE_VARIABLES
100      * option is set.
101      * @param pos upon input, the index of the next character to return.  If a
102      * variable has been dereferenced, then pos will <em>not</em> increment as
103      * characters of the variable value are iterated.
104      */
RuleCharacterIterator(String text, SymbolTable sym, ParsePosition pos)105     public RuleCharacterIterator(String text, SymbolTable sym,
106                                  ParsePosition pos) {
107         if (text == null || pos.getIndex() > text.length()) {
108             throw new IllegalArgumentException();
109         }
110         this.text = text;
111         this.sym = sym;
112         this.pos = pos;
113         buf = null;
114     }
115 
116     /**
117      * Returns true if this iterator has no more characters to return.
118      */
atEnd()119     public boolean atEnd() {
120         return buf == null && pos.getIndex() == text.length();
121     }
122 
123     /**
124      * Returns the next character using the given options, or DONE if there
125      * are no more characters, and advance the position to the next
126      * character.
127      * @param options one or more of the following options, bitwise-OR-ed
128      * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
129      * @return the current 32-bit code point, or DONE
130      */
next(int options)131     public int next(int options) {
132         int c = DONE;
133         isEscaped = false;
134 
135         for (;;) {
136             c = _current();
137             _advance(UTF16.getCharCount(c));
138 
139             if (c == SymbolTable.SYMBOL_REF && buf == null &&
140                 (options & PARSE_VARIABLES) != 0 && sym != null) {
141                 String name = sym.parseReference(text, pos, text.length());
142                 // If name == null there was an isolated SYMBOL_REF;
143                 // return it.  Caller must be prepared for this.
144                 if (name == null) {
145                     break;
146                 }
147                 bufPos = 0;
148                 buf = sym.lookup(name);
149                 if (buf == null) {
150                     throw new IllegalArgumentException(
151                                 "Undefined variable: " + name);
152                 }
153                 // Handle empty variable value
154                 if (buf.length == 0) {
155                     buf = null;
156                 }
157                 continue;
158             }
159 
160             if ((options & SKIP_WHITESPACE) != 0 &&
161                 PatternProps.isWhiteSpace(c)) {
162                 continue;
163             }
164 
165             if (c == '\\' && (options & PARSE_ESCAPES) != 0) {
166                 int offset[] = new int[] { 0 };
167                 c = Utility.unescapeAt(lookahead(), offset);
168                 jumpahead(offset[0]);
169                 isEscaped = true;
170                 if (c < 0) {
171                     throw new IllegalArgumentException("Invalid escape");
172                 }
173             }
174 
175             break;
176         }
177 
178         return c;
179     }
180 
181     /**
182      * Returns true if the last character returned by next() was
183      * escaped.  This will only be the case if the option passed in to
184      * next() included PARSE_ESCAPED and the next character was an
185      * escape sequence.
186      */
isEscaped()187     public boolean isEscaped() {
188         return isEscaped;
189     }
190 
191     /**
192      * Returns true if this iterator is currently within a variable expansion.
193      */
inVariable()194     public boolean inVariable() {
195         return buf != null;
196     }
197 
198     /**
199      * Returns an object which, when later passed to setPos(), will
200      * restore this iterator's position.  Usage idiom:
201      *
202      * RuleCharacterIterator iterator = ...;
203      * Object pos = iterator.getPos(null); // allocate position object
204      * for (;;) {
205      *   pos = iterator.getPos(pos); // reuse position object
206      *   int c = iterator.next(...);
207      *   ...
208      * }
209      * iterator.setPos(pos);
210      *
211      * @param p a position object previously returned by getPos(),
212      * or null.  If not null, it will be updated and returned.  If
213      * null, a new position object will be allocated and returned.
214      * @return a position object which may be passed to setPos(),
215      * either `p,' or if `p' == null, a newly-allocated object
216      */
getPos(Object p)217     public Object getPos(Object p) {
218         if (p == null) {
219             return new Object[] {buf, new int[] {pos.getIndex(), bufPos}};
220         }
221         Object[] a = (Object[]) p;
222         a[0] = buf;
223         int[] v = (int[]) a[1];
224         v[0] = pos.getIndex();
225         v[1] = bufPos;
226         return p;
227     }
228 
229     /**
230      * Restores this iterator to the position it had when getPos()
231      * returned the given object.
232      * @param p a position object previously returned by getPos()
233      */
setPos(Object p)234     public void setPos(Object p) {
235         Object[] a = (Object[]) p;
236         buf = (char[]) a[0];
237         int[] v = (int[]) a[1];
238         pos.setIndex(v[0]);
239         bufPos = v[1];
240     }
241 
242     /**
243      * Skips ahead past any ignored characters, as indicated by the given
244      * options.  This is useful in conjunction with the lookahead() method.
245      *
246      * Currently, this only has an effect for SKIP_WHITESPACE.
247      * @param options one or more of the following options, bitwise-OR-ed
248      * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
249      */
skipIgnored(int options)250     public void skipIgnored(int options) {
251         if ((options & SKIP_WHITESPACE) != 0) {
252             for (;;) {
253                 int a = _current();
254                 if (!PatternProps.isWhiteSpace(a)) break;
255                 _advance(UTF16.getCharCount(a));
256             }
257         }
258     }
259 
260     /**
261      * Returns a string containing the remainder of the characters to be
262      * returned by this iterator, without any option processing.  If the
263      * iterator is currently within a variable expansion, this will only
264      * extend to the end of the variable expansion.  This method is provided
265      * so that iterators may interoperate with string-based APIs.  The typical
266      * sequence of calls is to call skipIgnored(), then call lookahead(), then
267      * parse the string returned by lookahead(), then call jumpahead() to
268      * resynchronize the iterator.
269      * @return a string containing the characters to be returned by future
270      * calls to next()
271      */
lookahead()272     public String lookahead() {
273         if (buf != null) {
274             return new String(buf, bufPos, buf.length - bufPos);
275         } else {
276             return text.substring(pos.getIndex());
277         }
278     }
279 
280     /**
281      * Advances the position by the given number of 16-bit code units.
282      * This is useful in conjunction with the lookahead() method.
283      * @param count the number of 16-bit code units to jump over
284      */
jumpahead(int count)285     public void jumpahead(int count) {
286         if (count < 0) {
287             throw new IllegalArgumentException();
288         }
289         if (buf != null) {
290             bufPos += count;
291             if (bufPos > buf.length) {
292                 throw new IllegalArgumentException();
293             }
294             if (bufPos == buf.length) {
295                 buf = null;
296             }
297         } else {
298             int i = pos.getIndex() + count;
299             pos.setIndex(i);
300             if (i > text.length()) {
301                 throw new IllegalArgumentException();
302             }
303         }
304     }
305 
306     /**
307      * Returns a string representation of this object, consisting of the
308      * characters being iterated, with a '|' marking the current position.
309      * Position within an expanded variable is <em>not</em> indicated.
310      * @return a string representation of this object
311      */
312     @Override
toString()313     public String toString() {
314         int b = pos.getIndex();
315         return text.substring(0, b) + '|' + text.substring(b);
316     }
317 
318     /**
319      * Returns the current 32-bit code point without parsing escapes, parsing
320      * variables, or skipping whitespace.
321      * @return the current 32-bit code point
322      */
_current()323     private int _current() {
324         if (buf != null) {
325             return UTF16.charAt(buf, 0, buf.length, bufPos);
326         } else {
327             int i = pos.getIndex();
328             return (i < text.length()) ? UTF16.charAt(text, i) : DONE;
329         }
330     }
331 
332     /**
333      * Advances the position by the given amount.
334      * @param count the number of 16-bit code units to advance past
335      */
_advance(int count)336     private void _advance(int count) {
337         if (buf != null) {
338             bufPos += count;
339             if (bufPos == buf.length) {
340                 buf = null;
341             }
342         } else {
343             pos.setIndex(pos.getIndex() + count);
344             if (pos.getIndex() > text.length()) {
345                 pos.setIndex(text.length());
346             }
347         }
348     }
349 }