1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (c) 2004-2011, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Author: Alan Liu
9 * Created: March 22 2004
10 * Since: ICU 3.0
11 **********************************************************************
12 */
13 #include "tokiter.h"
14 #include "textfile.h"
15 #include "patternprops.h"
16 #include "util.h"
17 #include "uprops.h"
18
TokenIterator(TextFile * r)19 TokenIterator::TokenIterator(TextFile* r) {
20 reader = r;
21 done = haveLine = false;
22 pos = lastpos = -1;
23 }
24
~TokenIterator()25 TokenIterator::~TokenIterator() {
26 }
27
next(UnicodeString & token,UErrorCode & ec)28 UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
29 if (done || U_FAILURE(ec)) {
30 return false;
31 }
32 token.truncate(0);
33 for (;;) {
34 if (!haveLine) {
35 if (!reader->readLineSkippingComments(line, ec)) {
36 done = true;
37 return false;
38 }
39 haveLine = true;
40 pos = 0;
41 }
42 lastpos = pos;
43 if (!nextToken(token, ec)) {
44 haveLine = false;
45 if (U_FAILURE(ec)) return false;
46 continue;
47 }
48 return true;
49 }
50 }
51
getLineNumber() const52 int32_t TokenIterator::getLineNumber() const {
53 return reader->getLineNumber();
54 }
55
56 /**
57 * Read the next token from 'this->line' and append it to 'token'.
58 * Tokens are separated by Pattern_White_Space. Tokens may also be
59 * delimited by double or single quotes. The closing quote must match
60 * the opening quote. If a '#' is encountered, the rest of the line
61 * is ignored, unless it is backslash-escaped or within quotes.
62 * @param token the token is appended to this StringBuffer
63 * @param ec input-output error code
64 * @return true if a valid token is found, or false if the end
65 * of the line is reached or an error occurs
66 */
nextToken(UnicodeString & token,UErrorCode & ec)67 UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
68 ICU_Utility::skipWhitespace(line, pos, true);
69 if (pos == line.length()) {
70 return false;
71 }
72 UChar c = line.charAt(pos++);
73 UChar quote = 0;
74 switch (c) {
75 case 34/*'"'*/:
76 case 39/*'\\'*/:
77 quote = c;
78 break;
79 case 35/*'#'*/:
80 return false;
81 default:
82 token.append(c);
83 break;
84 }
85 while (pos < line.length()) {
86 c = line.charAt(pos); // 16-bit ok
87 if (c == 92/*'\\'*/) {
88 UChar32 c32 = line.unescapeAt(pos);
89 if (c32 < 0) {
90 ec = U_MALFORMED_UNICODE_ESCAPE;
91 return false;
92 }
93 token.append(c32);
94 } else if ((quote != 0 && c == quote) ||
95 (quote == 0 && PatternProps::isWhiteSpace(c))) {
96 ++pos;
97 return true;
98 } else if (quote == 0 && c == '#') {
99 return true; // do NOT increment
100 } else {
101 token.append(c);
102 ++pos;
103 }
104 }
105 if (quote != 0) {
106 ec = U_UNTERMINATED_QUOTE;
107 return false;
108 }
109 return true;
110 }
111