1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "lexer.h"
16
17 #include <stdio.h>
18
19 #include "eval_env.h"
20 #include "util.h"
21
Error(const string & message,string * err)22 bool Lexer::Error(const string& message, string* err) {
23 // Compute line/column.
24 int line = 1;
25 const char* line_start = input_.str_;
26 for (const char* p = input_.str_; p < last_token_; ++p) {
27 if (*p == '\n') {
28 ++line;
29 line_start = p + 1;
30 }
31 }
32 int col = last_token_ ? (int)(last_token_ - line_start) : 0;
33
34 char buf[1024];
35 snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line);
36 *err = buf;
37 *err += message + "\n";
38
39 // Add some context to the message.
40 const int kTruncateColumn = 72;
41 if (col > 0 && col < kTruncateColumn) {
42 int len;
43 bool truncated = true;
44 for (len = 0; len < kTruncateColumn; ++len) {
45 if (line_start[len] == 0 || line_start[len] == '\n') {
46 truncated = false;
47 break;
48 }
49 }
50 *err += string(line_start, len);
51 if (truncated)
52 *err += "...";
53 *err += "\n";
54 *err += string(col, ' ');
55 *err += "^ near here";
56 }
57
58 return false;
59 }
60
Lexer(const char * input)61 Lexer::Lexer(const char* input) {
62 Start("input", input);
63 }
64
Start(StringPiece filename,StringPiece input)65 void Lexer::Start(StringPiece filename, StringPiece input) {
66 filename_ = filename;
67 input_ = input;
68 ofs_ = input_.str_;
69 last_token_ = NULL;
70 }
71
TokenName(Token t)72 const char* Lexer::TokenName(Token t) {
73 switch (t) {
74 case ERROR: return "lexing error";
75 case BUILD: return "'build'";
76 case COLON: return "':'";
77 case DEFAULT: return "'default'";
78 case EQUALS: return "'='";
79 case IDENT: return "identifier";
80 case INCLUDE: return "'include'";
81 case INDENT: return "indent";
82 case NEWLINE: return "newline";
83 case PIPE2: return "'||'";
84 case PIPE: return "'|'";
85 case POOL: return "'pool'";
86 case RULE: return "'rule'";
87 case SUBNINJA: return "'subninja'";
88 case TEOF: return "eof";
89 }
90 return NULL; // not reached
91 }
92
TokenErrorHint(Token expected)93 const char* Lexer::TokenErrorHint(Token expected) {
94 switch (expected) {
95 case COLON:
96 return " ($ also escapes ':')";
97 default:
98 return "";
99 }
100 }
101
DescribeLastError()102 string Lexer::DescribeLastError() {
103 if (last_token_) {
104 switch (last_token_[0]) {
105 case '\t':
106 return "tabs are not allowed, use spaces";
107 }
108 }
109 return "lexing error";
110 }
111
UnreadToken()112 void Lexer::UnreadToken() {
113 ofs_ = last_token_;
114 }
115
ReadToken()116 Lexer::Token Lexer::ReadToken() {
117 const char* p = ofs_;
118 const char* q;
119 const char* start;
120 Lexer::Token token;
121 for (;;) {
122 start = p;
123 /*!re2c
124 re2c:define:YYCTYPE = "unsigned char";
125 re2c:define:YYCURSOR = p;
126 re2c:define:YYMARKER = q;
127 re2c:yyfill:enable = 0;
128
129 nul = "\000";
130 simple_varname = [a-zA-Z0-9_-]+;
131 varname = [a-zA-Z0-9_.-]+;
132
133 [ ]*"#"[^\000\n]*"\n" { continue; }
134 [ ]*"\r\n" { token = NEWLINE; break; }
135 [ ]*"\n" { token = NEWLINE; break; }
136 [ ]+ { token = INDENT; break; }
137 "build" { token = BUILD; break; }
138 "pool" { token = POOL; break; }
139 "rule" { token = RULE; break; }
140 "default" { token = DEFAULT; break; }
141 "=" { token = EQUALS; break; }
142 ":" { token = COLON; break; }
143 "||" { token = PIPE2; break; }
144 "|" { token = PIPE; break; }
145 "include" { token = INCLUDE; break; }
146 "subninja" { token = SUBNINJA; break; }
147 varname { token = IDENT; break; }
148 nul { token = TEOF; break; }
149 [^] { token = ERROR; break; }
150 */
151 }
152
153 last_token_ = start;
154 ofs_ = p;
155 if (token != NEWLINE && token != TEOF)
156 EatWhitespace();
157 return token;
158 }
159
PeekToken(Token token)160 bool Lexer::PeekToken(Token token) {
161 Token t = ReadToken();
162 if (t == token)
163 return true;
164 UnreadToken();
165 return false;
166 }
167
EatWhitespace()168 void Lexer::EatWhitespace() {
169 const char* p = ofs_;
170 const char* q;
171 for (;;) {
172 ofs_ = p;
173 /*!re2c
174 [ ]+ { continue; }
175 "$\r\n" { continue; }
176 "$\n" { continue; }
177 nul { break; }
178 [^] { break; }
179 */
180 }
181 }
182
ReadIdent(string * out)183 bool Lexer::ReadIdent(string* out) {
184 const char* p = ofs_;
185 const char* start;
186 for (;;) {
187 start = p;
188 /*!re2c
189 varname {
190 out->assign(start, p - start);
191 break;
192 }
193 [^] {
194 last_token_ = start;
195 return false;
196 }
197 */
198 }
199 last_token_ = start;
200 ofs_ = p;
201 EatWhitespace();
202 return true;
203 }
204
ReadEvalString(EvalString * eval,bool path,string * err)205 bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
206 const char* p = ofs_;
207 const char* q;
208 const char* start;
209 for (;;) {
210 start = p;
211 /*!re2c
212 [^$ :\r\n|\000]+ {
213 eval->AddText(StringPiece(start, p - start));
214 continue;
215 }
216 "\r\n" {
217 if (path)
218 p = start;
219 break;
220 }
221 [ :|\n] {
222 if (path) {
223 p = start;
224 break;
225 } else {
226 if (*start == '\n')
227 break;
228 eval->AddText(StringPiece(start, 1));
229 continue;
230 }
231 }
232 "$$" {
233 eval->AddText(StringPiece("$", 1));
234 continue;
235 }
236 "$ " {
237 eval->AddText(StringPiece(" ", 1));
238 continue;
239 }
240 "$\r\n"[ ]* {
241 continue;
242 }
243 "$\n"[ ]* {
244 continue;
245 }
246 "${"varname"}" {
247 eval->AddSpecial(StringPiece(start + 2, p - start - 3));
248 continue;
249 }
250 "$"simple_varname {
251 eval->AddSpecial(StringPiece(start + 1, p - start - 1));
252 continue;
253 }
254 "$:" {
255 eval->AddText(StringPiece(":", 1));
256 continue;
257 }
258 "$". {
259 last_token_ = start;
260 return Error("bad $-escape (literal $ must be written as $$)", err);
261 }
262 nul {
263 last_token_ = start;
264 return Error("unexpected EOF", err);
265 }
266 [^] {
267 last_token_ = start;
268 return Error(DescribeLastError(), err);
269 }
270 */
271 }
272 last_token_ = start;
273 ofs_ = p;
274 if (path)
275 EatWhitespace();
276 // Non-path strings end in newlines, so there's no whitespace to eat.
277 return true;
278 }
279