1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "lexer.h"
16
17 #include <stdio.h>
18
19 #include "eval_env.h"
20 #include "util.h"
21
22 using namespace std;
23
Error(const string & message,string * err)24 bool Lexer::Error(const string& message, string* err) {
25 // Compute line/column.
26 int line = 1;
27 const char* line_start = input_.str_;
28 for (const char* p = input_.str_; p < last_token_; ++p) {
29 if (*p == '\n') {
30 ++line;
31 line_start = p + 1;
32 }
33 }
34 int col = last_token_ ? (int)(last_token_ - line_start) : 0;
35
36 char buf[1024];
37 snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line);
38 *err = buf;
39 *err += message + "\n";
40
41 // Add some context to the message.
42 const int kTruncateColumn = 72;
43 if (col > 0 && col < kTruncateColumn) {
44 int len;
45 bool truncated = true;
46 for (len = 0; len < kTruncateColumn; ++len) {
47 if (line_start[len] == 0 || line_start[len] == '\n') {
48 truncated = false;
49 break;
50 }
51 }
52 *err += string(line_start, len);
53 if (truncated)
54 *err += "...";
55 *err += "\n";
56 *err += string(col, ' ');
57 *err += "^ near here";
58 }
59
60 return false;
61 }
62
Lexer(const char * input)63 Lexer::Lexer(const char* input) {
64 Start("input", input);
65 }
66
Start(StringPiece filename,StringPiece input)67 void Lexer::Start(StringPiece filename, StringPiece input) {
68 filename_ = filename;
69 input_ = input;
70 ofs_ = input_.str_;
71 last_token_ = NULL;
72 }
73
TokenName(Token t)74 const char* Lexer::TokenName(Token t) {
75 switch (t) {
76 case ERROR: return "lexing error";
77 case BUILD: return "'build'";
78 case COLON: return "':'";
79 case DEFAULT: return "'default'";
80 case EQUALS: return "'='";
81 case IDENT: return "identifier";
82 case INCLUDE: return "'include'";
83 case INDENT: return "indent";
84 case NEWLINE: return "newline";
85 case PIPE2: return "'||'";
86 case PIPE: return "'|'";
87 case PIPEAT: return "'|@'";
88 case POOL: return "'pool'";
89 case RULE: return "'rule'";
90 case SUBNINJA: return "'subninja'";
91 case TEOF: return "eof";
92 }
93 return NULL; // not reached
94 }
95
TokenErrorHint(Token expected)96 const char* Lexer::TokenErrorHint(Token expected) {
97 switch (expected) {
98 case COLON:
99 return " ($ also escapes ':')";
100 default:
101 return "";
102 }
103 }
104
DescribeLastError()105 string Lexer::DescribeLastError() {
106 if (last_token_) {
107 switch (last_token_[0]) {
108 case '\t':
109 return "tabs are not allowed, use spaces";
110 }
111 }
112 return "lexing error";
113 }
114
UnreadToken()115 void Lexer::UnreadToken() {
116 ofs_ = last_token_;
117 }
118
ReadToken()119 Lexer::Token Lexer::ReadToken() {
120 const char* p = ofs_;
121 const char* q;
122 const char* start;
123 Lexer::Token token;
124 for (;;) {
125 start = p;
126 /*!re2c
127 re2c:define:YYCTYPE = "unsigned char";
128 re2c:define:YYCURSOR = p;
129 re2c:define:YYMARKER = q;
130 re2c:yyfill:enable = 0;
131
132 nul = "\000";
133 simple_varname = [a-zA-Z0-9_-]+;
134 varname = [a-zA-Z0-9_.-]+;
135
136 [ ]*"#"[^\000\n]*"\n" { continue; }
137 [ ]*"\r\n" { token = NEWLINE; break; }
138 [ ]*"\n" { token = NEWLINE; break; }
139 [ ]+ { token = INDENT; break; }
140 "build" { token = BUILD; break; }
141 "pool" { token = POOL; break; }
142 "rule" { token = RULE; break; }
143 "default" { token = DEFAULT; break; }
144 "=" { token = EQUALS; break; }
145 ":" { token = COLON; break; }
146 "|@" { token = PIPEAT; break; }
147 "||" { token = PIPE2; break; }
148 "|" { token = PIPE; break; }
149 "include" { token = INCLUDE; break; }
150 "subninja" { token = SUBNINJA; break; }
151 varname { token = IDENT; break; }
152 nul { token = TEOF; break; }
153 [^] { token = ERROR; break; }
154 */
155 }
156
157 last_token_ = start;
158 ofs_ = p;
159 if (token != NEWLINE && token != TEOF)
160 EatWhitespace();
161 return token;
162 }
163
PeekToken(Token token)164 bool Lexer::PeekToken(Token token) {
165 Token t = ReadToken();
166 if (t == token)
167 return true;
168 UnreadToken();
169 return false;
170 }
171
EatWhitespace()172 void Lexer::EatWhitespace() {
173 const char* p = ofs_;
174 const char* q;
175 for (;;) {
176 ofs_ = p;
177 /*!re2c
178 [ ]+ { continue; }
179 "$\r\n" { continue; }
180 "$\n" { continue; }
181 nul { break; }
182 [^] { break; }
183 */
184 }
185 }
186
ReadIdent(string * out)187 bool Lexer::ReadIdent(string* out) {
188 const char* p = ofs_;
189 const char* start;
190 for (;;) {
191 start = p;
192 /*!re2c
193 varname {
194 out->assign(start, p - start);
195 break;
196 }
197 [^] {
198 last_token_ = start;
199 return false;
200 }
201 */
202 }
203 last_token_ = start;
204 ofs_ = p;
205 EatWhitespace();
206 return true;
207 }
208
ReadEvalString(EvalString * eval,bool path,string * err)209 bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
210 const char* p = ofs_;
211 const char* q;
212 const char* start;
213 for (;;) {
214 start = p;
215 /*!re2c
216 [^$ :\r\n|\000]+ {
217 eval->AddText(StringPiece(start, p - start));
218 continue;
219 }
220 "\r\n" {
221 if (path)
222 p = start;
223 break;
224 }
225 [ :|\n] {
226 if (path) {
227 p = start;
228 break;
229 } else {
230 if (*start == '\n')
231 break;
232 eval->AddText(StringPiece(start, 1));
233 continue;
234 }
235 }
236 "$$" {
237 eval->AddText(StringPiece("$", 1));
238 continue;
239 }
240 "$ " {
241 eval->AddText(StringPiece(" ", 1));
242 continue;
243 }
244 "$\r\n"[ ]* {
245 continue;
246 }
247 "$\n"[ ]* {
248 continue;
249 }
250 "${"varname"}" {
251 eval->AddSpecial(StringPiece(start + 2, p - start - 3));
252 continue;
253 }
254 "$"simple_varname {
255 eval->AddSpecial(StringPiece(start + 1, p - start - 1));
256 continue;
257 }
258 "$:" {
259 eval->AddText(StringPiece(":", 1));
260 continue;
261 }
262 "$". {
263 last_token_ = start;
264 return Error("bad $-escape (literal $ must be written as $$)", err);
265 }
266 nul {
267 last_token_ = start;
268 return Error("unexpected EOF", err);
269 }
270 [^] {
271 last_token_ = start;
272 return Error(DescribeLastError(), err);
273 }
274 */
275 }
276 last_token_ = start;
277 ofs_ = p;
278 if (path)
279 EatWhitespace();
280 // Non-path strings end in newlines, so there's no whitespace to eat.
281 return true;
282 }
283