• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (c) 2004-2011, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 * Author: Alan Liu
9 * Created: March 22 2004
10 * Since: ICU 3.0
11 **********************************************************************
12 */
13 #include "tokiter.h"
14 #include "textfile.h"
15 #include "patternprops.h"
16 #include "util.h"
17 #include "uprops.h"
18 
TokenIterator(TextFile * r)19 TokenIterator::TokenIterator(TextFile* r) {
20     reader = r;
21     done = haveLine = false;
22     pos = lastpos = -1;
23 }
24 
~TokenIterator()25 TokenIterator::~TokenIterator() {
26 }
27 
next(UnicodeString & token,UErrorCode & ec)28 UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
29     if (done || U_FAILURE(ec)) {
30         return false;
31     }
32     token.truncate(0);
33     for (;;) {
34         if (!haveLine) {
35             if (!reader->readLineSkippingComments(line, ec)) {
36                 done = true;
37                 return false;
38             }
39             haveLine = true;
40             pos = 0;
41         }
42         lastpos = pos;
43         if (!nextToken(token, ec)) {
44             haveLine = false;
45             if (U_FAILURE(ec)) return false;
46             continue;
47         }
48         return true;
49     }
50 }
51 
getLineNumber() const52 int32_t TokenIterator::getLineNumber() const {
53     return reader->getLineNumber();
54 }
55 
56 /**
57  * Read the next token from 'this->line' and append it to 'token'.
58  * Tokens are separated by Pattern_White_Space.  Tokens may also be
59  * delimited by double or single quotes.  The closing quote must match
60  * the opening quote.  If a '#' is encountered, the rest of the line
61  * is ignored, unless it is backslash-escaped or within quotes.
62  * @param token the token is appended to this StringBuffer
63  * @param ec input-output error code
64  * @return true if a valid token is found, or false if the end
65  * of the line is reached or an error occurs
66  */
nextToken(UnicodeString & token,UErrorCode & ec)67 UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
68     ICU_Utility::skipWhitespace(line, pos, true);
69     if (pos == line.length()) {
70         return false;
71     }
72     UChar c = line.charAt(pos++);
73     UChar quote = 0;
74     switch (c) {
75     case 34/*'"'*/:
76     case 39/*'\\'*/:
77         quote = c;
78         break;
79     case 35/*'#'*/:
80         return false;
81     default:
82         token.append(c);
83         break;
84     }
85     while (pos < line.length()) {
86         c = line.charAt(pos); // 16-bit ok
87         if (c == 92/*'\\'*/) {
88             UChar32 c32 = line.unescapeAt(pos);
89             if (c32 < 0) {
90                 ec = U_MALFORMED_UNICODE_ESCAPE;
91                 return false;
92             }
93             token.append(c32);
94         } else if ((quote != 0 && c == quote) ||
95                    (quote == 0 && PatternProps::isWhiteSpace(c))) {
96             ++pos;
97             return true;
98         } else if (quote == 0 && c == '#') {
99             return true; // do NOT increment
100         } else {
101             token.append(c);
102             ++pos;
103         }
104     }
105     if (quote != 0) {
106         ec = U_UNTERMINATED_QUOTE;
107         return false;
108     }
109     return true;
110 }
111