• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 * Copyright (c) 2003-2007, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 * Author: Alan Liu
7 * Created: September 24 2003
8 * Since: ICU 2.8
9 **********************************************************************
10 */
11 #ifndef _RULEITER_H_
12 #define _RULEITER_H_
13 
14 #include "unicode/uobject.h"
15 
16 U_NAMESPACE_BEGIN
17 
18 class UnicodeString;
19 class ParsePosition;
20 class SymbolTable;
21 
22 /**
23  * An iterator that returns 32-bit code points.  This class is deliberately
24  * <em>not</em> related to any of the ICU character iterator classes
25  * in order to minimize complexity.
26  * @author Alan Liu
27  * @since ICU 2.8
28  */
29 class RuleCharacterIterator : public UMemory {
30 
31     // TODO: Ideas for later.  (Do not implement if not needed, lest the
32     // code coverage numbers go down due to unused methods.)
33     // 1. Add a copy constructor, operator==() method.
34     // 2. Rather than return DONE, throw an exception if the end
35     // is reached -- this is an alternate usage model, probably not useful.
36 
37 private:
38     /**
39      * Text being iterated.
40      */
41     const UnicodeString& text;
42 
43     /**
44      * Position of iterator.
45      */
46     ParsePosition& pos;
47 
48     /**
49      * Symbol table used to parse and dereference variables.  May be 0.
50      */
51     const SymbolTable* sym;
52 
53     /**
54      * Current variable expansion, or 0 if none.
55      */
56     const UnicodeString* buf;
57 
58     /**
59      * Position within buf.  Meaningless if buf == 0.
60      */
61     int32_t bufPos;
62 
63 public:
64     /**
65      * Value returned when there are no more characters to iterate.
66      */
67     enum { DONE = -1 };
68 
69     /**
70      * Bitmask option to enable parsing of variable names.  If (options &
71      * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to
72      * its value.  Variables are parsed using the SymbolTable API.
73      */
74     enum { PARSE_VARIABLES = 1 };
75 
76     /**
77      * Bitmask option to enable parsing of escape sequences.  If (options &
78      * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded
79      * to its value.  Escapes are parsed using Utility.unescapeAt().
80      */
81     enum { PARSE_ESCAPES   = 2 };
82 
83     /**
84      * Bitmask option to enable skipping of whitespace.  If (options &
85      * SKIP_WHITESPACE) != 0, then whitespace characters will be silently
86      * skipped, as if they were not present in the input.  Whitespace
87      * characters are defined by UCharacterProperty.isRuleWhiteSpace().
88      */
89     enum { SKIP_WHITESPACE = 4 };
90 
91     /**
92      * Constructs an iterator over the given text, starting at the given
93      * position.
94      * @param text the text to be iterated
95      * @param sym the symbol table, or null if there is none.  If sym is null,
96      * then variables will not be deferenced, even if the PARSE_VARIABLES
97      * option is set.
98      * @param pos upon input, the index of the next character to return.  If a
99      * variable has been dereferenced, then pos will <em>not</em> increment as
100      * characters of the variable value are iterated.
101      */
102     RuleCharacterIterator(const UnicodeString& text, const SymbolTable* sym,
103                           ParsePosition& pos);
104 
105     /**
106      * Returns true if this iterator has no more characters to return.
107      */
108     UBool atEnd() const;
109 
110     /**
111      * Returns the next character using the given options, or DONE if there
112      * are no more characters, and advance the position to the next
113      * character.
114      * @param options one or more of the following options, bitwise-OR-ed
115      * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
116      * @param isEscaped output parameter set to TRUE if the character
117      * was escaped
118      * @param ec input-output error code.  An error will only be set by
119      * this routing if options includes PARSE_VARIABLES and an unknown
120      * variable name is seen, or if options includes PARSE_ESCAPES and
121      * an invalid escape sequence is seen.
122      * @return the current 32-bit code point, or DONE
123      */
124     UChar32 next(int32_t options, UBool& isEscaped, UErrorCode& ec);
125 
126     /**
127      * Returns true if this iterator is currently within a variable expansion.
128      */
129     inline UBool inVariable() const;
130 
131     /**
132      * An opaque object representing the position of a RuleCharacterIterator.
133      */
134     struct Pos : public UMemory {
135     private:
136         const UnicodeString* buf;
137         int32_t pos;
138         int32_t bufPos;
139         friend class RuleCharacterIterator;
140     };
141 
142     /**
143      * Sets an object which, when later passed to setPos(), will
144      * restore this iterator's position.  Usage idiom:
145      *
146      * RuleCharacterIterator iterator = ...;
147      * RuleCharacterIterator::Pos pos;
148      * iterator.getPos(pos);
149      * for (;;) {
150      *   iterator.getPos(pos);
151      *   int c = iterator.next(...);
152      *   ...
153      * }
154      * iterator.setPos(pos);
155      *
156      * @param p a position object to be set to this iterator's
157      * current position.
158      */
159     void getPos(Pos& p) const;
160 
161     /**
162      * Restores this iterator to the position it had when getPos()
163      * set the given object.
164      * @param p a position object previously set by getPos()
165      */
166     void setPos(const Pos& p);
167 
168     /**
169      * Skips ahead past any ignored characters, as indicated by the given
170      * options.  This is useful in conjunction with the lookahead() method.
171      *
172      * Currently, this only has an effect for SKIP_WHITESPACE.
173      * @param options one or more of the following options, bitwise-OR-ed
174      * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
175      */
176     void skipIgnored(int32_t options);
177 
178     /**
179      * Returns a string containing the remainder of the characters to be
180      * returned by this iterator, without any option processing.  If the
181      * iterator is currently within a variable expansion, this will only
182      * extend to the end of the variable expansion.  This method is provided
183      * so that iterators may interoperate with string-based APIs.  The typical
184      * sequence of calls is to call skipIgnored(), then call lookahead(), then
185      * parse the string returned by lookahead(), then call jumpahead() to
186      * resynchronize the iterator.
187      * @param result a string to receive the characters to be returned
188      * by future calls to next()
189      * @param maxLookAhead The maximum to copy into the result.
190      * @return a reference to result
191      */
192     UnicodeString& lookahead(UnicodeString& result, int32_t maxLookAhead = -1) const;
193 
194     /**
195      * Advances the position by the given number of 16-bit code units.
196      * This is useful in conjunction with the lookahead() method.
197      * @param count the number of 16-bit code units to jump over
198      */
199     void jumpahead(int32_t count);
200 
201     /**
202      * Returns a string representation of this object, consisting of the
203      * characters being iterated, with a '|' marking the current position.
204      * Position within an expanded variable is <em>not</em> indicated.
205      * @param result output parameter to receive a string
206      * representation of this object
207      */
208 //    UnicodeString& toString(UnicodeString& result) const;
209 
210 private:
211     /**
212      * Returns the current 32-bit code point without parsing escapes, parsing
213      * variables, or skipping whitespace.
214      * @return the current 32-bit code point
215      */
216     UChar32 _current() const;
217 
218     /**
219      * Advances the position by the given amount.
220      * @param count the number of 16-bit code units to advance past
221      */
222     void _advance(int32_t count);
223 };
224 
inVariable()225 inline UBool RuleCharacterIterator::inVariable() const {
226     return buf != 0;
227 }
228 
229 U_NAMESPACE_END
230 
231 #endif // _RULEITER_H_
232 //eof
233