1 /*
2 **********************************************************************
3 * Copyright (c) 2003-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Author: Alan Liu
7 * Created: September 24 2003
8 * Since: ICU 2.8
9 **********************************************************************
10 */
11 #include "ruleiter.h"
12 #include "unicode/parsepos.h"
13 #include "unicode/unistr.h"
14 #include "unicode/symtable.h"
15 #include "patternprops.h"
16
17 /* \U87654321 or \ud800\udc00 */
18 #define MAX_U_NOTATION_LEN 12
19
20 U_NAMESPACE_BEGIN
21
RuleCharacterIterator(const UnicodeString & theText,const SymbolTable * theSym,ParsePosition & thePos)22 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
23 ParsePosition& thePos) :
24 text(theText),
25 pos(thePos),
26 sym(theSym),
27 buf(0),
28 bufPos(0)
29 {}
30
atEnd() const31 UBool RuleCharacterIterator::atEnd() const {
32 return buf == 0 && pos.getIndex() == text.length();
33 }
34
next(int32_t options,UBool & isEscaped,UErrorCode & ec)35 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
36 if (U_FAILURE(ec)) return DONE;
37
38 UChar32 c = DONE;
39 isEscaped = FALSE;
40
41 for (;;) {
42 c = _current();
43 _advance(UTF_CHAR_LENGTH(c));
44
45 if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
46 (options & PARSE_VARIABLES) != 0 && sym != 0) {
47 UnicodeString name = sym->parseReference(text, pos, text.length());
48 // If name is empty there was an isolated SYMBOL_REF;
49 // return it. Caller must be prepared for this.
50 if (name.length() == 0) {
51 break;
52 }
53 bufPos = 0;
54 buf = sym->lookup(name);
55 if (buf == 0) {
56 ec = U_UNDEFINED_VARIABLE;
57 return DONE;
58 }
59 // Handle empty variable value
60 if (buf->length() == 0) {
61 buf = 0;
62 }
63 continue;
64 }
65
66 if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
67 continue;
68 }
69
70 if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
71 UnicodeString tempEscape;
72 int32_t offset = 0;
73 c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
74 jumpahead(offset);
75 isEscaped = TRUE;
76 if (c < 0) {
77 ec = U_MALFORMED_UNICODE_ESCAPE;
78 return DONE;
79 }
80 }
81
82 break;
83 }
84
85 return c;
86 }
87
getPos(RuleCharacterIterator::Pos & p) const88 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
89 p.buf = buf;
90 p.pos = pos.getIndex();
91 p.bufPos = bufPos;
92 }
93
setPos(const RuleCharacterIterator::Pos & p)94 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
95 buf = p.buf;
96 pos.setIndex(p.pos);
97 bufPos = p.bufPos;
98 }
99
skipIgnored(int32_t options)100 void RuleCharacterIterator::skipIgnored(int32_t options) {
101 if ((options & SKIP_WHITESPACE) != 0) {
102 for (;;) {
103 UChar32 a = _current();
104 if (!PatternProps::isWhiteSpace(a)) break;
105 _advance(UTF_CHAR_LENGTH(a));
106 }
107 }
108 }
109
lookahead(UnicodeString & result,int32_t maxLookAhead) const110 UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
111 if (maxLookAhead < 0) {
112 maxLookAhead = 0x7FFFFFFF;
113 }
114 if (buf != 0) {
115 buf->extract(bufPos, maxLookAhead, result);
116 } else {
117 text.extract(pos.getIndex(), maxLookAhead, result);
118 }
119 return result;
120 }
121
jumpahead(int32_t count)122 void RuleCharacterIterator::jumpahead(int32_t count) {
123 _advance(count);
124 }
125
126 /*
127 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
128 int32_t b = pos.getIndex();
129 text.extract(0, b, result);
130 return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
131 }
132 */
133
_current() const134 UChar32 RuleCharacterIterator::_current() const {
135 if (buf != 0) {
136 return buf->char32At(bufPos);
137 } else {
138 int i = pos.getIndex();
139 return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
140 }
141 }
142
_advance(int32_t count)143 void RuleCharacterIterator::_advance(int32_t count) {
144 if (buf != 0) {
145 bufPos += count;
146 if (bufPos == buf->length()) {
147 buf = 0;
148 }
149 } else {
150 pos.setIndex(pos.getIndex() + count);
151 if (pos.getIndex() > text.length()) {
152 pos.setIndex(text.length());
153 }
154 }
155 }
156
157 U_NAMESPACE_END
158
159 //eof
160