• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 * Copyright (c) 2003-2011, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 * Author: Alan Liu
7 * Created: September 24 2003
8 * Since: ICU 2.8
9 **********************************************************************
10 */
11 #ifndef _RULEITER_H_
12 #define _RULEITER_H_
13 
14 #include "unicode/uobject.h"
15 
16 U_NAMESPACE_BEGIN
17 
18 class UnicodeString;
19 class ParsePosition;
20 class SymbolTable;
21 
22 /**
23  * An iterator that returns 32-bit code points.  This class is deliberately
24  * <em>not</em> related to any of the ICU character iterator classes
25  * in order to minimize complexity.
26  * @author Alan Liu
27  * @since ICU 2.8
28  */
29 class RuleCharacterIterator : public UMemory {
30 
31     // TODO: Ideas for later.  (Do not implement if not needed, lest the
32     // code coverage numbers go down due to unused methods.)
33     // 1. Add a copy constructor, operator==() method.
34     // 2. Rather than return DONE, throw an exception if the end
35     // is reached -- this is an alternate usage model, probably not useful.
36 
37 private:
38     /**
39      * Text being iterated.
40      */
41     const UnicodeString& text;
42 
43     /**
44      * Position of iterator.
45      */
46     ParsePosition& pos;
47 
48     /**
49      * Symbol table used to parse and dereference variables.  May be 0.
50      */
51     const SymbolTable* sym;
52 
53     /**
54      * Current variable expansion, or 0 if none.
55      */
56     const UnicodeString* buf;
57 
58     /**
59      * Position within buf.  Meaningless if buf == 0.
60      */
61     int32_t bufPos;
62 
63 public:
64     /**
65      * Value returned when there are no more characters to iterate.
66      */
67     enum { DONE = -1 };
68 
69     /**
70      * Bitmask option to enable parsing of variable names.  If (options &
71      * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to
72      * its value.  Variables are parsed using the SymbolTable API.
73      */
74     enum { PARSE_VARIABLES = 1 };
75 
76     /**
77      * Bitmask option to enable parsing of escape sequences.  If (options &
78      * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded
79      * to its value.  Escapes are parsed using Utility.unescapeAt().
80      */
81     enum { PARSE_ESCAPES   = 2 };
82 
83     /**
84      * Bitmask option to enable skipping of whitespace.  If (options &
85      * SKIP_WHITESPACE) != 0, then Pattern_White_Space characters will be silently
86      * skipped, as if they were not present in the input.
87      */
88     enum { SKIP_WHITESPACE = 4 };
89 
90     /**
91      * Constructs an iterator over the given text, starting at the given
92      * position.
93      * @param text the text to be iterated
94      * @param sym the symbol table, or null if there is none.  If sym is null,
95      * then variables will not be deferenced, even if the PARSE_VARIABLES
96      * option is set.
97      * @param pos upon input, the index of the next character to return.  If a
98      * variable has been dereferenced, then pos will <em>not</em> increment as
99      * characters of the variable value are iterated.
100      */
101     RuleCharacterIterator(const UnicodeString& text, const SymbolTable* sym,
102                           ParsePosition& pos);
103 
104     /**
105      * Returns true if this iterator has no more characters to return.
106      */
107     UBool atEnd() const;
108 
109     /**
110      * Returns the next character using the given options, or DONE if there
111      * are no more characters, and advance the position to the next
112      * character.
113      * @param options one or more of the following options, bitwise-OR-ed
114      * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
115      * @param isEscaped output parameter set to TRUE if the character
116      * was escaped
117      * @param ec input-output error code.  An error will only be set by
118      * this routing if options includes PARSE_VARIABLES and an unknown
119      * variable name is seen, or if options includes PARSE_ESCAPES and
120      * an invalid escape sequence is seen.
121      * @return the current 32-bit code point, or DONE
122      */
123     UChar32 next(int32_t options, UBool& isEscaped, UErrorCode& ec);
124 
125     /**
126      * Returns true if this iterator is currently within a variable expansion.
127      */
128     inline UBool inVariable() const;
129 
130     /**
131      * An opaque object representing the position of a RuleCharacterIterator.
132      */
133     struct Pos : public UMemory {
134     private:
135         const UnicodeString* buf;
136         int32_t pos;
137         int32_t bufPos;
138         friend class RuleCharacterIterator;
139     };
140 
141     /**
142      * Sets an object which, when later passed to setPos(), will
143      * restore this iterator's position.  Usage idiom:
144      *
145      * RuleCharacterIterator iterator = ...;
146      * RuleCharacterIterator::Pos pos;
147      * iterator.getPos(pos);
148      * for (;;) {
149      *   iterator.getPos(pos);
150      *   int c = iterator.next(...);
151      *   ...
152      * }
153      * iterator.setPos(pos);
154      *
155      * @param p a position object to be set to this iterator's
156      * current position.
157      */
158     void getPos(Pos& p) const;
159 
160     /**
161      * Restores this iterator to the position it had when getPos()
162      * set the given object.
163      * @param p a position object previously set by getPos()
164      */
165     void setPos(const Pos& p);
166 
167     /**
168      * Skips ahead past any ignored characters, as indicated by the given
169      * options.  This is useful in conjunction with the lookahead() method.
170      *
171      * Currently, this only has an effect for SKIP_WHITESPACE.
172      * @param options one or more of the following options, bitwise-OR-ed
173      * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
174      */
175     void skipIgnored(int32_t options);
176 
177     /**
178      * Returns a string containing the remainder of the characters to be
179      * returned by this iterator, without any option processing.  If the
180      * iterator is currently within a variable expansion, this will only
181      * extend to the end of the variable expansion.  This method is provided
182      * so that iterators may interoperate with string-based APIs.  The typical
183      * sequence of calls is to call skipIgnored(), then call lookahead(), then
184      * parse the string returned by lookahead(), then call jumpahead() to
185      * resynchronize the iterator.
186      * @param result a string to receive the characters to be returned
187      * by future calls to next()
188      * @param maxLookAhead The maximum to copy into the result.
189      * @return a reference to result
190      */
191     UnicodeString& lookahead(UnicodeString& result, int32_t maxLookAhead = -1) const;
192 
193     /**
194      * Advances the position by the given number of 16-bit code units.
195      * This is useful in conjunction with the lookahead() method.
196      * @param count the number of 16-bit code units to jump over
197      */
198     void jumpahead(int32_t count);
199 
200     /**
201      * Returns a string representation of this object, consisting of the
202      * characters being iterated, with a '|' marking the current position.
203      * Position within an expanded variable is <em>not</em> indicated.
204      * @param result output parameter to receive a string
205      * representation of this object
206      */
207 //    UnicodeString& toString(UnicodeString& result) const;
208 
209 private:
210     /**
211      * Returns the current 32-bit code point without parsing escapes, parsing
212      * variables, or skipping whitespace.
213      * @return the current 32-bit code point
214      */
215     UChar32 _current() const;
216 
217     /**
218      * Advances the position by the given amount.
219      * @param count the number of 16-bit code units to advance past
220      */
221     void _advance(int32_t count);
222 };
223 
inVariable()224 inline UBool RuleCharacterIterator::inVariable() const {
225     return buf != 0;
226 }
227 
228 U_NAMESPACE_END
229 
230 #endif // _RULEITER_H_
231 //eof
232