• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "lexer.h"
16 
17 #include <stdio.h>
18 
19 #include "eval_env.h"
20 #include "util.h"
21 
Error(const string & message,string * err)22 bool Lexer::Error(const string& message, string* err) {
23   // Compute line/column.
24   int line = 1;
25   const char* line_start = input_.str_;
26   for (const char* p = input_.str_; p < last_token_; ++p) {
27     if (*p == '\n') {
28       ++line;
29       line_start = p + 1;
30     }
31   }
32   int col = last_token_ ? (int)(last_token_ - line_start) : 0;
33 
34   char buf[1024];
35   snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line);
36   *err = buf;
37   *err += message + "\n";
38 
39   // Add some context to the message.
40   const int kTruncateColumn = 72;
41   if (col > 0 && col < kTruncateColumn) {
42     int len;
43     bool truncated = true;
44     for (len = 0; len < kTruncateColumn; ++len) {
45       if (line_start[len] == 0 || line_start[len] == '\n') {
46         truncated = false;
47         break;
48       }
49     }
50     *err += string(line_start, len);
51     if (truncated)
52       *err += "...";
53     *err += "\n";
54     *err += string(col, ' ');
55     *err += "^ near here";
56   }
57 
58   return false;
59 }
60 
Lexer(const char * input)61 Lexer::Lexer(const char* input) {
62   Start("input", input);
63 }
64 
Start(StringPiece filename,StringPiece input)65 void Lexer::Start(StringPiece filename, StringPiece input) {
66   filename_ = filename;
67   input_ = input;
68   ofs_ = input_.str_;
69   last_token_ = NULL;
70 }
71 
TokenName(Token t)72 const char* Lexer::TokenName(Token t) {
73   switch (t) {
74   case ERROR:    return "lexing error";
75   case BUILD:    return "'build'";
76   case COLON:    return "':'";
77   case DEFAULT:  return "'default'";
78   case EQUALS:   return "'='";
79   case IDENT:    return "identifier";
80   case INCLUDE:  return "'include'";
81   case INDENT:   return "indent";
82   case NEWLINE:  return "newline";
83   case PIPE2:    return "'||'";
84   case PIPE:     return "'|'";
85   case POOL:     return "'pool'";
86   case RULE:     return "'rule'";
87   case SUBNINJA: return "'subninja'";
88   case TEOF:     return "eof";
89   }
90   return NULL;  // not reached
91 }
92 
TokenErrorHint(Token expected)93 const char* Lexer::TokenErrorHint(Token expected) {
94   switch (expected) {
95   case COLON:
96     return " ($ also escapes ':')";
97   default:
98     return "";
99   }
100 }
101 
DescribeLastError()102 string Lexer::DescribeLastError() {
103   if (last_token_) {
104     switch (last_token_[0]) {
105     case '\t':
106       return "tabs are not allowed, use spaces";
107     }
108   }
109   return "lexing error";
110 }
111 
UnreadToken()112 void Lexer::UnreadToken() {
113   ofs_ = last_token_;
114 }
115 
ReadToken()116 Lexer::Token Lexer::ReadToken() {
117   const char* p = ofs_;
118   const char* q;
119   const char* start;
120   Lexer::Token token;
121   for (;;) {
122     start = p;
123     /*!re2c
124     re2c:define:YYCTYPE = "unsigned char";
125     re2c:define:YYCURSOR = p;
126     re2c:define:YYMARKER = q;
127     re2c:yyfill:enable = 0;
128 
129     nul = "\000";
130     simple_varname = [a-zA-Z0-9_-]+;
131     varname = [a-zA-Z0-9_.-]+;
132 
133     [ ]*"#"[^\000\n]*"\n" { continue; }
134     [ ]*"\r\n" { token = NEWLINE;  break; }
135     [ ]*"\n"   { token = NEWLINE;  break; }
136     [ ]+       { token = INDENT;   break; }
137     "build"    { token = BUILD;    break; }
138     "pool"     { token = POOL;     break; }
139     "rule"     { token = RULE;     break; }
140     "default"  { token = DEFAULT;  break; }
141     "="        { token = EQUALS;   break; }
142     ":"        { token = COLON;    break; }
143     "||"       { token = PIPE2;    break; }
144     "|"        { token = PIPE;     break; }
145     "include"  { token = INCLUDE;  break; }
146     "subninja" { token = SUBNINJA; break; }
147     varname    { token = IDENT;    break; }
148     nul        { token = TEOF;     break; }
149     [^]        { token = ERROR;    break; }
150     */
151   }
152 
153   last_token_ = start;
154   ofs_ = p;
155   if (token != NEWLINE && token != TEOF)
156     EatWhitespace();
157   return token;
158 }
159 
PeekToken(Token token)160 bool Lexer::PeekToken(Token token) {
161   Token t = ReadToken();
162   if (t == token)
163     return true;
164   UnreadToken();
165   return false;
166 }
167 
EatWhitespace()168 void Lexer::EatWhitespace() {
169   const char* p = ofs_;
170   const char* q;
171   for (;;) {
172     ofs_ = p;
173     /*!re2c
174     [ ]+    { continue; }
175     "$\r\n" { continue; }
176     "$\n"   { continue; }
177     nul     { break; }
178     [^]     { break; }
179     */
180   }
181 }
182 
ReadIdent(string * out)183 bool Lexer::ReadIdent(string* out) {
184   const char* p = ofs_;
185   const char* start;
186   for (;;) {
187     start = p;
188     /*!re2c
189     varname {
190       out->assign(start, p - start);
191       break;
192     }
193     [^] {
194       last_token_ = start;
195       return false;
196     }
197     */
198   }
199   last_token_ = start;
200   ofs_ = p;
201   EatWhitespace();
202   return true;
203 }
204 
ReadEvalString(EvalString * eval,bool path,string * err)205 bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
206   const char* p = ofs_;
207   const char* q;
208   const char* start;
209   for (;;) {
210     start = p;
211     /*!re2c
212     [^$ :\r\n|\000]+ {
213       eval->AddText(StringPiece(start, p - start));
214       continue;
215     }
216     "\r\n" {
217       if (path)
218         p = start;
219       break;
220     }
221     [ :|\n] {
222       if (path) {
223         p = start;
224         break;
225       } else {
226         if (*start == '\n')
227           break;
228         eval->AddText(StringPiece(start, 1));
229         continue;
230       }
231     }
232     "$$" {
233       eval->AddText(StringPiece("$", 1));
234       continue;
235     }
236     "$ " {
237       eval->AddText(StringPiece(" ", 1));
238       continue;
239     }
240     "$\r\n"[ ]* {
241       continue;
242     }
243     "$\n"[ ]* {
244       continue;
245     }
246     "${"varname"}" {
247       eval->AddSpecial(StringPiece(start + 2, p - start - 3));
248       continue;
249     }
250     "$"simple_varname {
251       eval->AddSpecial(StringPiece(start + 1, p - start - 1));
252       continue;
253     }
254     "$:" {
255       eval->AddText(StringPiece(":", 1));
256       continue;
257     }
258     "$". {
259       last_token_ = start;
260       return Error("bad $-escape (literal $ must be written as $$)", err);
261     }
262     nul {
263       last_token_ = start;
264       return Error("unexpected EOF", err);
265     }
266     [^] {
267       last_token_ = start;
268       return Error(DescribeLastError(), err);
269     }
270     */
271   }
272   last_token_ = start;
273   ofs_ = p;
274   if (path)
275     EatWhitespace();
276   // Non-path strings end in newlines, so there's no whitespace to eat.
277   return true;
278 }
279