• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "lexer.h"
16 
17 #include <stdio.h>
18 
19 #include "eval_env.h"
20 #include "util.h"
21 
22 using namespace std;
23 
Error(const string & message,string * err)24 bool Lexer::Error(const string& message, string* err) {
25   // Compute line/column.
26   int line = 1;
27   const char* line_start = input_.str_;
28   for (const char* p = input_.str_; p < last_token_; ++p) {
29     if (*p == '\n') {
30       ++line;
31       line_start = p + 1;
32     }
33   }
34   int col = last_token_ ? (int)(last_token_ - line_start) : 0;
35 
36   char buf[1024];
37   snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line);
38   *err = buf;
39   *err += message + "\n";
40 
41   // Add some context to the message.
42   const int kTruncateColumn = 72;
43   if (col > 0 && col < kTruncateColumn) {
44     int len;
45     bool truncated = true;
46     for (len = 0; len < kTruncateColumn; ++len) {
47       if (line_start[len] == 0 || line_start[len] == '\n') {
48         truncated = false;
49         break;
50       }
51     }
52     *err += string(line_start, len);
53     if (truncated)
54       *err += "...";
55     *err += "\n";
56     *err += string(col, ' ');
57     *err += "^ near here";
58   }
59 
60   return false;
61 }
62 
Lexer(const char * input)63 Lexer::Lexer(const char* input) {
64   Start("input", input);
65 }
66 
Start(StringPiece filename,StringPiece input)67 void Lexer::Start(StringPiece filename, StringPiece input) {
68   filename_ = filename;
69   input_ = input;
70   ofs_ = input_.str_;
71   last_token_ = NULL;
72 }
73 
TokenName(Token t)74 const char* Lexer::TokenName(Token t) {
75   switch (t) {
76   case ERROR:    return "lexing error";
77   case BUILD:    return "'build'";
78   case COLON:    return "':'";
79   case DEFAULT:  return "'default'";
80   case EQUALS:   return "'='";
81   case IDENT:    return "identifier";
82   case INCLUDE:  return "'include'";
83   case INDENT:   return "indent";
84   case NEWLINE:  return "newline";
85   case PIPE2:    return "'||'";
86   case PIPE:     return "'|'";
87   case PIPEAT:   return "'|@'";
88   case POOL:     return "'pool'";
89   case RULE:     return "'rule'";
90   case SUBNINJA: return "'subninja'";
91   case TEOF:     return "eof";
92   }
93   return NULL;  // not reached
94 }
95 
TokenErrorHint(Token expected)96 const char* Lexer::TokenErrorHint(Token expected) {
97   switch (expected) {
98   case COLON:
99     return " ($ also escapes ':')";
100   default:
101     return "";
102   }
103 }
104 
DescribeLastError()105 string Lexer::DescribeLastError() {
106   if (last_token_) {
107     switch (last_token_[0]) {
108     case '\t':
109       return "tabs are not allowed, use spaces";
110     }
111   }
112   return "lexing error";
113 }
114 
UnreadToken()115 void Lexer::UnreadToken() {
116   ofs_ = last_token_;
117 }
118 
ReadToken()119 Lexer::Token Lexer::ReadToken() {
120   const char* p = ofs_;
121   const char* q;
122   const char* start;
123   Lexer::Token token;
124   for (;;) {
125     start = p;
126     /*!re2c
127     re2c:define:YYCTYPE = "unsigned char";
128     re2c:define:YYCURSOR = p;
129     re2c:define:YYMARKER = q;
130     re2c:yyfill:enable = 0;
131 
132     nul = "\000";
133     simple_varname = [a-zA-Z0-9_-]+;
134     varname = [a-zA-Z0-9_.-]+;
135 
136     [ ]*"#"[^\000\n]*"\n" { continue; }
137     [ ]*"\r\n" { token = NEWLINE;  break; }
138     [ ]*"\n"   { token = NEWLINE;  break; }
139     [ ]+       { token = INDENT;   break; }
140     "build"    { token = BUILD;    break; }
141     "pool"     { token = POOL;     break; }
142     "rule"     { token = RULE;     break; }
143     "default"  { token = DEFAULT;  break; }
144     "="        { token = EQUALS;   break; }
145     ":"        { token = COLON;    break; }
146     "|@"       { token = PIPEAT;   break; }
147     "||"       { token = PIPE2;    break; }
148     "|"        { token = PIPE;     break; }
149     "include"  { token = INCLUDE;  break; }
150     "subninja" { token = SUBNINJA; break; }
151     varname    { token = IDENT;    break; }
152     nul        { token = TEOF;     break; }
153     [^]        { token = ERROR;    break; }
154     */
155   }
156 
157   last_token_ = start;
158   ofs_ = p;
159   if (token != NEWLINE && token != TEOF)
160     EatWhitespace();
161   return token;
162 }
163 
PeekToken(Token token)164 bool Lexer::PeekToken(Token token) {
165   Token t = ReadToken();
166   if (t == token)
167     return true;
168   UnreadToken();
169   return false;
170 }
171 
EatWhitespace()172 void Lexer::EatWhitespace() {
173   const char* p = ofs_;
174   const char* q;
175   for (;;) {
176     ofs_ = p;
177     /*!re2c
178     [ ]+    { continue; }
179     "$\r\n" { continue; }
180     "$\n"   { continue; }
181     nul     { break; }
182     [^]     { break; }
183     */
184   }
185 }
186 
ReadIdent(string * out)187 bool Lexer::ReadIdent(string* out) {
188   const char* p = ofs_;
189   const char* start;
190   for (;;) {
191     start = p;
192     /*!re2c
193     varname {
194       out->assign(start, p - start);
195       break;
196     }
197     [^] {
198       last_token_ = start;
199       return false;
200     }
201     */
202   }
203   last_token_ = start;
204   ofs_ = p;
205   EatWhitespace();
206   return true;
207 }
208 
ReadEvalString(EvalString * eval,bool path,string * err)209 bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
210   const char* p = ofs_;
211   const char* q;
212   const char* start;
213   for (;;) {
214     start = p;
215     /*!re2c
216     [^$ :\r\n|\000]+ {
217       eval->AddText(StringPiece(start, p - start));
218       continue;
219     }
220     "\r\n" {
221       if (path)
222         p = start;
223       break;
224     }
225     [ :|\n] {
226       if (path) {
227         p = start;
228         break;
229       } else {
230         if (*start == '\n')
231           break;
232         eval->AddText(StringPiece(start, 1));
233         continue;
234       }
235     }
236     "$$" {
237       eval->AddText(StringPiece("$", 1));
238       continue;
239     }
240     "$ " {
241       eval->AddText(StringPiece(" ", 1));
242       continue;
243     }
244     "$\r\n"[ ]* {
245       continue;
246     }
247     "$\n"[ ]* {
248       continue;
249     }
250     "${"varname"}" {
251       eval->AddSpecial(StringPiece(start + 2, p - start - 3));
252       continue;
253     }
254     "$"simple_varname {
255       eval->AddSpecial(StringPiece(start + 1, p - start - 1));
256       continue;
257     }
258     "$:" {
259       eval->AddText(StringPiece(":", 1));
260       continue;
261     }
262     "$". {
263       last_token_ = start;
264       return Error("bad $-escape (literal $ must be written as $$)", err);
265     }
266     nul {
267       last_token_ = start;
268       return Error("unexpected EOF", err);
269     }
270     [^] {
271       last_token_ = start;
272       return Error(DescribeLastError(), err);
273     }
274     */
275   }
276   last_token_ = start;
277   ofs_ = p;
278   if (path)
279     EatWhitespace();
280   // Non-path strings end in newlines, so there's no whitespace to eat.
281   return true;
282 }
283