• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 * Copyright (c) 2003-2011, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 * Author: Alan Liu
7 * Created: September 24 2003
8 * Since: ICU 2.8
9 **********************************************************************
10 */
11 #include "ruleiter.h"
12 #include "unicode/parsepos.h"
13 #include "unicode/unistr.h"
14 #include "unicode/symtable.h"
15 #include "patternprops.h"
16 
17 /* \U87654321 or \ud800\udc00 */
18 #define MAX_U_NOTATION_LEN 12
19 
20 U_NAMESPACE_BEGIN
21 
RuleCharacterIterator(const UnicodeString & theText,const SymbolTable * theSym,ParsePosition & thePos)22 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
23                       ParsePosition& thePos) :
24     text(theText),
25     pos(thePos),
26     sym(theSym),
27     buf(0),
28     bufPos(0)
29 {}
30 
atEnd() const31 UBool RuleCharacterIterator::atEnd() const {
32     return buf == 0 && pos.getIndex() == text.length();
33 }
34 
next(int32_t options,UBool & isEscaped,UErrorCode & ec)35 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
36     if (U_FAILURE(ec)) return DONE;
37 
38     UChar32 c = DONE;
39     isEscaped = FALSE;
40 
41     for (;;) {
42         c = _current();
43         _advance(UTF_CHAR_LENGTH(c));
44 
45         if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
46             (options & PARSE_VARIABLES) != 0 && sym != 0) {
47             UnicodeString name = sym->parseReference(text, pos, text.length());
48             // If name is empty there was an isolated SYMBOL_REF;
49             // return it.  Caller must be prepared for this.
50             if (name.length() == 0) {
51                 break;
52             }
53             bufPos = 0;
54             buf = sym->lookup(name);
55             if (buf == 0) {
56                 ec = U_UNDEFINED_VARIABLE;
57                 return DONE;
58             }
59             // Handle empty variable value
60             if (buf->length() == 0) {
61                 buf = 0;
62             }
63             continue;
64         }
65 
66         if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
67             continue;
68         }
69 
70         if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
71             UnicodeString tempEscape;
72             int32_t offset = 0;
73             c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
74             jumpahead(offset);
75             isEscaped = TRUE;
76             if (c < 0) {
77                 ec = U_MALFORMED_UNICODE_ESCAPE;
78                 return DONE;
79             }
80         }
81 
82         break;
83     }
84 
85     return c;
86 }
87 
getPos(RuleCharacterIterator::Pos & p) const88 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
89     p.buf = buf;
90     p.pos = pos.getIndex();
91     p.bufPos = bufPos;
92 }
93 
setPos(const RuleCharacterIterator::Pos & p)94 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
95     buf = p.buf;
96     pos.setIndex(p.pos);
97     bufPos = p.bufPos;
98 }
99 
skipIgnored(int32_t options)100 void RuleCharacterIterator::skipIgnored(int32_t options) {
101     if ((options & SKIP_WHITESPACE) != 0) {
102         for (;;) {
103             UChar32 a = _current();
104             if (!PatternProps::isWhiteSpace(a)) break;
105             _advance(UTF_CHAR_LENGTH(a));
106         }
107     }
108 }
109 
lookahead(UnicodeString & result,int32_t maxLookAhead) const110 UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
111     if (maxLookAhead < 0) {
112         maxLookAhead = 0x7FFFFFFF;
113     }
114     if (buf != 0) {
115         buf->extract(bufPos, maxLookAhead, result);
116     } else {
117         text.extract(pos.getIndex(), maxLookAhead, result);
118     }
119     return result;
120 }
121 
jumpahead(int32_t count)122 void RuleCharacterIterator::jumpahead(int32_t count) {
123     _advance(count);
124 }
125 
126 /*
127 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
128     int32_t b = pos.getIndex();
129     text.extract(0, b, result);
130     return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
131 }
132 */
133 
_current() const134 UChar32 RuleCharacterIterator::_current() const {
135     if (buf != 0) {
136         return buf->char32At(bufPos);
137     } else {
138         int i = pos.getIndex();
139         return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
140     }
141 }
142 
_advance(int32_t count)143 void RuleCharacterIterator::_advance(int32_t count) {
144     if (buf != 0) {
145         bufPos += count;
146         if (bufPos == buf->length()) {
147             buf = 0;
148         }
149     } else {
150         pos.setIndex(pos.getIndex() + count);
151         if (pos.getIndex() > text.length()) {
152             pos.setIndex(text.length());
153         }
154     }
155 }
156 
157 U_NAMESPACE_END
158 
159 //eof
160