• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 * Copyright (c) 2004-2011, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 * Author: Alan Liu
7 * Created: March 22 2004
8 * Since: ICU 3.0
9 **********************************************************************
10 */
11 #include "tokiter.h"
12 #include "textfile.h"
13 #include "patternprops.h"
14 #include "util.h"
15 #include "uprops.h"
16 
TokenIterator(TextFile * r)17 TokenIterator::TokenIterator(TextFile* r) {
18     reader = r;
19     done = haveLine = FALSE;
20     pos = lastpos = -1;
21 }
22 
~TokenIterator()23 TokenIterator::~TokenIterator() {
24 }
25 
next(UnicodeString & token,UErrorCode & ec)26 UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
27     if (done || U_FAILURE(ec)) {
28         return FALSE;
29     }
30     token.truncate(0);
31     for (;;) {
32         if (!haveLine) {
33             if (!reader->readLineSkippingComments(line, ec)) {
34                 done = TRUE;
35                 return FALSE;
36             }
37             haveLine = TRUE;
38             pos = 0;
39         }
40         lastpos = pos;
41         if (!nextToken(token, ec)) {
42             haveLine = FALSE;
43             if (U_FAILURE(ec)) return FALSE;
44             continue;
45         }
46         return TRUE;
47     }
48 }
49 
getLineNumber() const50 int32_t TokenIterator::getLineNumber() const {
51     return reader->getLineNumber();
52 }
53 
54 /**
55  * Read the next token from 'this->line' and append it to 'token'.
56  * Tokens are separated by Pattern_White_Space.  Tokens may also be
57  * delimited by double or single quotes.  The closing quote must match
58  * the opening quote.  If a '#' is encountered, the rest of the line
59  * is ignored, unless it is backslash-escaped or within quotes.
60  * @param token the token is appended to this StringBuffer
61  * @param ec input-output error code
62  * @return TRUE if a valid token is found, or FALSE if the end
63  * of the line is reached or an error occurs
64  */
nextToken(UnicodeString & token,UErrorCode & ec)65 UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
66     ICU_Utility::skipWhitespace(line, pos, TRUE);
67     if (pos == line.length()) {
68         return FALSE;
69     }
70     UChar c = line.charAt(pos++);
71     UChar quote = 0;
72     switch (c) {
73     case 34/*'"'*/:
74     case 39/*'\\'*/:
75         quote = c;
76         break;
77     case 35/*'#'*/:
78         return FALSE;
79     default:
80         token.append(c);
81         break;
82     }
83     while (pos < line.length()) {
84         c = line.charAt(pos); // 16-bit ok
85         if (c == 92/*'\\'*/) {
86             UChar32 c32 = line.unescapeAt(pos);
87             if (c32 < 0) {
88                 ec = U_MALFORMED_UNICODE_ESCAPE;
89                 return FALSE;
90             }
91             token.append(c32);
92         } else if ((quote != 0 && c == quote) ||
93                    (quote == 0 && PatternProps::isWhiteSpace(c))) {
94             ++pos;
95             return TRUE;
96         } else if (quote == 0 && c == '#') {
97             return TRUE; // do NOT increment
98         } else {
99             token.append(c);
100             ++pos;
101         }
102     }
103     if (quote != 0) {
104         ec = U_UNTERMINATED_QUOTE;
105         return FALSE;
106     }
107     return TRUE;
108 }
109