• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 * Copyright (c) 2004, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 * Author: Alan Liu
7 * Created: March 22 2004
8 * Since: ICU 3.0
9 **********************************************************************
10 */
11 #include "tokiter.h"
12 #include "textfile.h"
13 #include "util.h"
14 #include "uprops.h"
15 
TokenIterator(TextFile * r)16 TokenIterator::TokenIterator(TextFile* r) {
17     reader = r;
18     done = haveLine = FALSE;
19     pos = lastpos = -1;
20 }
21 
~TokenIterator()22 TokenIterator::~TokenIterator() {
23 }
24 
next(UnicodeString & token,UErrorCode & ec)25 UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
26     if (done || U_FAILURE(ec)) {
27         return FALSE;
28     }
29     token.truncate(0);
30     for (;;) {
31         if (!haveLine) {
32             if (!reader->readLineSkippingComments(line, ec)) {
33                 done = TRUE;
34                 return FALSE;
35             }
36             haveLine = TRUE;
37             pos = 0;
38         }
39         lastpos = pos;
40         if (!nextToken(token, ec)) {
41             haveLine = FALSE;
42             if (U_FAILURE(ec)) return FALSE;
43             continue;
44         }
45         return TRUE;
46     }
47 }
48 
getLineNumber() const49 int32_t TokenIterator::getLineNumber() const {
50     return reader->getLineNumber();
51 }
52 
53 /**
54  * Read the next token from 'this->line' and append it to 'token'.
55  * Tokens are separated by rule white space.  Tokens may also be
56  * delimited by double or single quotes.  The closing quote must match
57  * the opening quote.  If a '#' is encountered, the rest of the line
58  * is ignored, unless it is backslash-escaped or within quotes.
59  * @param token the token is appended to this StringBuffer
60  * @param ec input-output error code
61  * @return TRUE if a valid token is found, or FALSE if the end
62  * of the line is reached or an error occurs
63  */
nextToken(UnicodeString & token,UErrorCode & ec)64 UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
65     ICU_Utility::skipWhitespace(line, pos, TRUE);
66     if (pos == line.length()) {
67         return FALSE;
68     }
69     UChar c = line.charAt(pos++);
70     UChar quote = 0;
71     switch (c) {
72     case 34/*'"'*/:
73     case 39/*'\\'*/:
74         quote = c;
75         break;
76     case 35/*'#'*/:
77         return FALSE;
78     default:
79         token.append(c);
80         break;
81     }
82     while (pos < line.length()) {
83         c = line.charAt(pos); // 16-bit ok
84         if (c == 92/*'\\'*/) {
85             UChar32 c32 = line.unescapeAt(pos);
86             if (c32 < 0) {
87                 ec = U_MALFORMED_UNICODE_ESCAPE;
88                 return FALSE;
89             }
90             token.append(c32);
91         } else if ((quote != 0 && c == quote) ||
92                    (quote == 0 && uprv_isRuleWhiteSpace(c))) {
93             ++pos;
94             return TRUE;
95         } else if (quote == 0 && c == '#') {
96             return TRUE; // do NOT increment
97         } else {
98             token.append(c);
99             ++pos;
100         }
101     }
102     if (quote != 0) {
103         ec = U_UNTERMINATED_QUOTE;
104         return FALSE;
105     }
106     return TRUE;
107 }
108