1 #include <cstdio>
2 #include <cstdlib>
3 #include <string>
4 #include <map>
5 #include <vector>
6
7 //===----------------------------------------------------------------------===//
8 // Lexer
9 //===----------------------------------------------------------------------===//
10
11 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
12 // of these for known things.
13 enum Token {
14 tok_eof = -1,
15
16 // commands
17 tok_def = -2, tok_extern = -3,
18
19 // primary
20 tok_identifier = -4, tok_number = -5
21 };
22
23 static std::string IdentifierStr; // Filled in if tok_identifier
24 static double NumVal; // Filled in if tok_number
25
26 /// gettok - Return the next token from standard input.
gettok()27 static int gettok() {
28 static int LastChar = ' ';
29
30 // Skip any whitespace.
31 while (isspace(LastChar))
32 LastChar = getchar();
33
34 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
35 IdentifierStr = LastChar;
36 while (isalnum((LastChar = getchar())))
37 IdentifierStr += LastChar;
38
39 if (IdentifierStr == "def") return tok_def;
40 if (IdentifierStr == "extern") return tok_extern;
41 return tok_identifier;
42 }
43
44 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
45 std::string NumStr;
46 do {
47 NumStr += LastChar;
48 LastChar = getchar();
49 } while (isdigit(LastChar) || LastChar == '.');
50
51 NumVal = strtod(NumStr.c_str(), 0);
52 return tok_number;
53 }
54
55 if (LastChar == '#') {
56 // Comment until end of line.
57 do LastChar = getchar();
58 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
59
60 if (LastChar != EOF)
61 return gettok();
62 }
63
64 // Check for end of file. Don't eat the EOF.
65 if (LastChar == EOF)
66 return tok_eof;
67
68 // Otherwise, just return the character as its ascii value.
69 int ThisChar = LastChar;
70 LastChar = getchar();
71 return ThisChar;
72 }
73
74 //===----------------------------------------------------------------------===//
75 // Abstract Syntax Tree (aka Parse Tree)
76 //===----------------------------------------------------------------------===//
77
78 /// ExprAST - Base class for all expression nodes.
79 class ExprAST {
80 public:
~ExprAST()81 virtual ~ExprAST() {}
82 };
83
84 /// NumberExprAST - Expression class for numeric literals like "1.0".
85 class NumberExprAST : public ExprAST {
86 double Val;
87 public:
NumberExprAST(double val)88 NumberExprAST(double val) : Val(val) {}
89 };
90
91 /// VariableExprAST - Expression class for referencing a variable, like "a".
92 class VariableExprAST : public ExprAST {
93 std::string Name;
94 public:
VariableExprAST(const std::string & name)95 VariableExprAST(const std::string &name) : Name(name) {}
96 };
97
98 /// BinaryExprAST - Expression class for a binary operator.
99 class BinaryExprAST : public ExprAST {
100 char Op;
101 ExprAST *LHS, *RHS;
102 public:
BinaryExprAST(char op,ExprAST * lhs,ExprAST * rhs)103 BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
104 : Op(op), LHS(lhs), RHS(rhs) {}
105 };
106
107 /// CallExprAST - Expression class for function calls.
108 class CallExprAST : public ExprAST {
109 std::string Callee;
110 std::vector<ExprAST*> Args;
111 public:
CallExprAST(const std::string & callee,std::vector<ExprAST * > & args)112 CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
113 : Callee(callee), Args(args) {}
114 };
115
116 /// PrototypeAST - This class represents the "prototype" for a function,
117 /// which captures its name, and its argument names (thus implicitly the number
118 /// of arguments the function takes).
119 class PrototypeAST {
120 std::string Name;
121 std::vector<std::string> Args;
122 public:
PrototypeAST(const std::string & name,const std::vector<std::string> & args)123 PrototypeAST(const std::string &name, const std::vector<std::string> &args)
124 : Name(name), Args(args) {}
125
126 };
127
128 /// FunctionAST - This class represents a function definition itself.
129 class FunctionAST {
130 PrototypeAST *Proto;
131 ExprAST *Body;
132 public:
FunctionAST(PrototypeAST * proto,ExprAST * body)133 FunctionAST(PrototypeAST *proto, ExprAST *body)
134 : Proto(proto), Body(body) {}
135
136 };
137
138 //===----------------------------------------------------------------------===//
139 // Parser
140 //===----------------------------------------------------------------------===//
141
142 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current
143 /// token the parser is looking at. getNextToken reads another token from the
144 /// lexer and updates CurTok with its results.
145 static int CurTok;
getNextToken()146 static int getNextToken() {
147 return CurTok = gettok();
148 }
149
150 /// BinopPrecedence - This holds the precedence for each binary operator that is
151 /// defined.
152 static std::map<char, int> BinopPrecedence;
153
154 /// GetTokPrecedence - Get the precedence of the pending binary operator token.
GetTokPrecedence()155 static int GetTokPrecedence() {
156 if (!isascii(CurTok))
157 return -1;
158
159 // Make sure it's a declared binop.
160 int TokPrec = BinopPrecedence[CurTok];
161 if (TokPrec <= 0) return -1;
162 return TokPrec;
163 }
164
165 /// Error* - These are little helper functions for error handling.
Error(const char * Str)166 ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
ErrorP(const char * Str)167 PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
ErrorF(const char * Str)168 FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
169
170 static ExprAST *ParseExpression();
171
172 /// identifierexpr
173 /// ::= identifier
174 /// ::= identifier '(' expression* ')'
ParseIdentifierExpr()175 static ExprAST *ParseIdentifierExpr() {
176 std::string IdName = IdentifierStr;
177
178 getNextToken(); // eat identifier.
179
180 if (CurTok != '(') // Simple variable ref.
181 return new VariableExprAST(IdName);
182
183 // Call.
184 getNextToken(); // eat (
185 std::vector<ExprAST*> Args;
186 if (CurTok != ')') {
187 while (1) {
188 ExprAST *Arg = ParseExpression();
189 if (!Arg) return 0;
190 Args.push_back(Arg);
191
192 if (CurTok == ')') break;
193
194 if (CurTok != ',')
195 return Error("Expected ')' or ',' in argument list");
196 getNextToken();
197 }
198 }
199
200 // Eat the ')'.
201 getNextToken();
202
203 return new CallExprAST(IdName, Args);
204 }
205
206 /// numberexpr ::= number
ParseNumberExpr()207 static ExprAST *ParseNumberExpr() {
208 ExprAST *Result = new NumberExprAST(NumVal);
209 getNextToken(); // consume the number
210 return Result;
211 }
212
213 /// parenexpr ::= '(' expression ')'
ParseParenExpr()214 static ExprAST *ParseParenExpr() {
215 getNextToken(); // eat (.
216 ExprAST *V = ParseExpression();
217 if (!V) return 0;
218
219 if (CurTok != ')')
220 return Error("expected ')'");
221 getNextToken(); // eat ).
222 return V;
223 }
224
225 /// primary
226 /// ::= identifierexpr
227 /// ::= numberexpr
228 /// ::= parenexpr
ParsePrimary()229 static ExprAST *ParsePrimary() {
230 switch (CurTok) {
231 default: return Error("unknown token when expecting an expression");
232 case tok_identifier: return ParseIdentifierExpr();
233 case tok_number: return ParseNumberExpr();
234 case '(': return ParseParenExpr();
235 }
236 }
237
238 /// binoprhs
239 /// ::= ('+' primary)*
ParseBinOpRHS(int ExprPrec,ExprAST * LHS)240 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
241 // If this is a binop, find its precedence.
242 while (1) {
243 int TokPrec = GetTokPrecedence();
244
245 // If this is a binop that binds at least as tightly as the current binop,
246 // consume it, otherwise we are done.
247 if (TokPrec < ExprPrec)
248 return LHS;
249
250 // Okay, we know this is a binop.
251 int BinOp = CurTok;
252 getNextToken(); // eat binop
253
254 // Parse the primary expression after the binary operator.
255 ExprAST *RHS = ParsePrimary();
256 if (!RHS) return 0;
257
258 // If BinOp binds less tightly with RHS than the operator after RHS, let
259 // the pending operator take RHS as its LHS.
260 int NextPrec = GetTokPrecedence();
261 if (TokPrec < NextPrec) {
262 RHS = ParseBinOpRHS(TokPrec+1, RHS);
263 if (RHS == 0) return 0;
264 }
265
266 // Merge LHS/RHS.
267 LHS = new BinaryExprAST(BinOp, LHS, RHS);
268 }
269 }
270
271 /// expression
272 /// ::= primary binoprhs
273 ///
ParseExpression()274 static ExprAST *ParseExpression() {
275 ExprAST *LHS = ParsePrimary();
276 if (!LHS) return 0;
277
278 return ParseBinOpRHS(0, LHS);
279 }
280
281 /// prototype
282 /// ::= id '(' id* ')'
ParsePrototype()283 static PrototypeAST *ParsePrototype() {
284 if (CurTok != tok_identifier)
285 return ErrorP("Expected function name in prototype");
286
287 std::string FnName = IdentifierStr;
288 getNextToken();
289
290 if (CurTok != '(')
291 return ErrorP("Expected '(' in prototype");
292
293 std::vector<std::string> ArgNames;
294 while (getNextToken() == tok_identifier)
295 ArgNames.push_back(IdentifierStr);
296 if (CurTok != ')')
297 return ErrorP("Expected ')' in prototype");
298
299 // success.
300 getNextToken(); // eat ')'.
301
302 return new PrototypeAST(FnName, ArgNames);
303 }
304
305 /// definition ::= 'def' prototype expression
ParseDefinition()306 static FunctionAST *ParseDefinition() {
307 getNextToken(); // eat def.
308 PrototypeAST *Proto = ParsePrototype();
309 if (Proto == 0) return 0;
310
311 if (ExprAST *E = ParseExpression())
312 return new FunctionAST(Proto, E);
313 return 0;
314 }
315
316 /// toplevelexpr ::= expression
ParseTopLevelExpr()317 static FunctionAST *ParseTopLevelExpr() {
318 if (ExprAST *E = ParseExpression()) {
319 // Make an anonymous proto.
320 PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
321 return new FunctionAST(Proto, E);
322 }
323 return 0;
324 }
325
326 /// external ::= 'extern' prototype
ParseExtern()327 static PrototypeAST *ParseExtern() {
328 getNextToken(); // eat extern.
329 return ParsePrototype();
330 }
331
332 //===----------------------------------------------------------------------===//
333 // Top-Level parsing
334 //===----------------------------------------------------------------------===//
335
HandleDefinition()336 static void HandleDefinition() {
337 if (ParseDefinition()) {
338 fprintf(stderr, "Parsed a function definition.\n");
339 } else {
340 // Skip token for error recovery.
341 getNextToken();
342 }
343 }
344
HandleExtern()345 static void HandleExtern() {
346 if (ParseExtern()) {
347 fprintf(stderr, "Parsed an extern\n");
348 } else {
349 // Skip token for error recovery.
350 getNextToken();
351 }
352 }
353
HandleTopLevelExpression()354 static void HandleTopLevelExpression() {
355 // Evaluate a top-level expression into an anonymous function.
356 if (ParseTopLevelExpr()) {
357 fprintf(stderr, "Parsed a top-level expr\n");
358 } else {
359 // Skip token for error recovery.
360 getNextToken();
361 }
362 }
363
364 /// top ::= definition | external | expression | ';'
MainLoop()365 static void MainLoop() {
366 while (1) {
367 fprintf(stderr, "ready> ");
368 switch (CurTok) {
369 case tok_eof: return;
370 case ';': getNextToken(); break; // ignore top-level semicolons.
371 case tok_def: HandleDefinition(); break;
372 case tok_extern: HandleExtern(); break;
373 default: HandleTopLevelExpression(); break;
374 }
375 }
376 }
377
378 //===----------------------------------------------------------------------===//
379 // Main driver code.
380 //===----------------------------------------------------------------------===//
381
main()382 int main() {
383 // Install standard binary operators.
384 // 1 is lowest precedence.
385 BinopPrecedence['<'] = 10;
386 BinopPrecedence['+'] = 20;
387 BinopPrecedence['-'] = 20;
388 BinopPrecedence['*'] = 40; // highest.
389
390 // Prime the first token.
391 fprintf(stderr, "ready> ");
392 getNextToken();
393
394 // Run the main "interpreter loop" now.
395 MainLoop();
396
397 return 0;
398 }
399