1 #include <cctype>
2 #include <cstdio>
3 #include <cstdlib>
4 #include <map>
5 #include <string>
6 #include <vector>
7
8 //===----------------------------------------------------------------------===//
9 // Lexer
10 //===----------------------------------------------------------------------===//
11
12 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
13 // of these for known things.
14 enum Token {
15 tok_eof = -1,
16
17 // commands
18 tok_def = -2, tok_extern = -3,
19
20 // primary
21 tok_identifier = -4, tok_number = -5
22 };
23
24 static std::string IdentifierStr; // Filled in if tok_identifier
25 static double NumVal; // Filled in if tok_number
26
27 /// gettok - Return the next token from standard input.
gettok()28 static int gettok() {
29 static int LastChar = ' ';
30
31 // Skip any whitespace.
32 while (isspace(LastChar))
33 LastChar = getchar();
34
35 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
36 IdentifierStr = LastChar;
37 while (isalnum((LastChar = getchar())))
38 IdentifierStr += LastChar;
39
40 if (IdentifierStr == "def") return tok_def;
41 if (IdentifierStr == "extern") return tok_extern;
42 return tok_identifier;
43 }
44
45 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
46 std::string NumStr;
47 do {
48 NumStr += LastChar;
49 LastChar = getchar();
50 } while (isdigit(LastChar) || LastChar == '.');
51
52 NumVal = strtod(NumStr.c_str(), 0);
53 return tok_number;
54 }
55
56 if (LastChar == '#') {
57 // Comment until end of line.
58 do LastChar = getchar();
59 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
60
61 if (LastChar != EOF)
62 return gettok();
63 }
64
65 // Check for end of file. Don't eat the EOF.
66 if (LastChar == EOF)
67 return tok_eof;
68
69 // Otherwise, just return the character as its ascii value.
70 int ThisChar = LastChar;
71 LastChar = getchar();
72 return ThisChar;
73 }
74
75 //===----------------------------------------------------------------------===//
76 // Abstract Syntax Tree (aka Parse Tree)
77 //===----------------------------------------------------------------------===//
78 namespace {
79 /// ExprAST - Base class for all expression nodes.
80 class ExprAST {
81 public:
~ExprAST()82 virtual ~ExprAST() {}
83 };
84
85 /// NumberExprAST - Expression class for numeric literals like "1.0".
86 class NumberExprAST : public ExprAST {
87 public:
NumberExprAST(double val)88 NumberExprAST(double val) {}
89 };
90
91 /// VariableExprAST - Expression class for referencing a variable, like "a".
92 class VariableExprAST : public ExprAST {
93 std::string Name;
94 public:
VariableExprAST(const std::string & name)95 VariableExprAST(const std::string &name) : Name(name) {}
96 };
97
98 /// BinaryExprAST - Expression class for a binary operator.
99 class BinaryExprAST : public ExprAST {
100 public:
BinaryExprAST(char op,ExprAST * lhs,ExprAST * rhs)101 BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) {}
102 };
103
104 /// CallExprAST - Expression class for function calls.
105 class CallExprAST : public ExprAST {
106 std::string Callee;
107 std::vector<ExprAST*> Args;
108 public:
CallExprAST(const std::string & callee,std::vector<ExprAST * > & args)109 CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
110 : Callee(callee), Args(args) {}
111 };
112
113 /// PrototypeAST - This class represents the "prototype" for a function,
114 /// which captures its name, and its argument names (thus implicitly the number
115 /// of arguments the function takes).
116 class PrototypeAST {
117 std::string Name;
118 std::vector<std::string> Args;
119 public:
PrototypeAST(const std::string & name,const std::vector<std::string> & args)120 PrototypeAST(const std::string &name, const std::vector<std::string> &args)
121 : Name(name), Args(args) {}
122
123 };
124
125 /// FunctionAST - This class represents a function definition itself.
126 class FunctionAST {
127 public:
FunctionAST(PrototypeAST * proto,ExprAST * body)128 FunctionAST(PrototypeAST *proto, ExprAST *body) {}
129 };
130 } // end anonymous namespace
131
132 //===----------------------------------------------------------------------===//
133 // Parser
134 //===----------------------------------------------------------------------===//
135
136 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current
137 /// token the parser is looking at. getNextToken reads another token from the
138 /// lexer and updates CurTok with its results.
139 static int CurTok;
getNextToken()140 static int getNextToken() {
141 return CurTok = gettok();
142 }
143
144 /// BinopPrecedence - This holds the precedence for each binary operator that is
145 /// defined.
146 static std::map<char, int> BinopPrecedence;
147
148 /// GetTokPrecedence - Get the precedence of the pending binary operator token.
GetTokPrecedence()149 static int GetTokPrecedence() {
150 if (!isascii(CurTok))
151 return -1;
152
153 // Make sure it's a declared binop.
154 int TokPrec = BinopPrecedence[CurTok];
155 if (TokPrec <= 0) return -1;
156 return TokPrec;
157 }
158
159 /// Error* - These are little helper functions for error handling.
Error(const char * Str)160 ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
ErrorP(const char * Str)161 PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
162
163 static ExprAST *ParseExpression();
164
165 /// identifierexpr
166 /// ::= identifier
167 /// ::= identifier '(' expression* ')'
ParseIdentifierExpr()168 static ExprAST *ParseIdentifierExpr() {
169 std::string IdName = IdentifierStr;
170
171 getNextToken(); // eat identifier.
172
173 if (CurTok != '(') // Simple variable ref.
174 return new VariableExprAST(IdName);
175
176 // Call.
177 getNextToken(); // eat (
178 std::vector<ExprAST*> Args;
179 if (CurTok != ')') {
180 while (1) {
181 ExprAST *Arg = ParseExpression();
182 if (!Arg) return 0;
183 Args.push_back(Arg);
184
185 if (CurTok == ')') break;
186
187 if (CurTok != ',')
188 return Error("Expected ')' or ',' in argument list");
189 getNextToken();
190 }
191 }
192
193 // Eat the ')'.
194 getNextToken();
195
196 return new CallExprAST(IdName, Args);
197 }
198
199 /// numberexpr ::= number
ParseNumberExpr()200 static ExprAST *ParseNumberExpr() {
201 ExprAST *Result = new NumberExprAST(NumVal);
202 getNextToken(); // consume the number
203 return Result;
204 }
205
206 /// parenexpr ::= '(' expression ')'
ParseParenExpr()207 static ExprAST *ParseParenExpr() {
208 getNextToken(); // eat (.
209 ExprAST *V = ParseExpression();
210 if (!V) return 0;
211
212 if (CurTok != ')')
213 return Error("expected ')'");
214 getNextToken(); // eat ).
215 return V;
216 }
217
218 /// primary
219 /// ::= identifierexpr
220 /// ::= numberexpr
221 /// ::= parenexpr
ParsePrimary()222 static ExprAST *ParsePrimary() {
223 switch (CurTok) {
224 default: return Error("unknown token when expecting an expression");
225 case tok_identifier: return ParseIdentifierExpr();
226 case tok_number: return ParseNumberExpr();
227 case '(': return ParseParenExpr();
228 }
229 }
230
231 /// binoprhs
232 /// ::= ('+' primary)*
ParseBinOpRHS(int ExprPrec,ExprAST * LHS)233 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
234 // If this is a binop, find its precedence.
235 while (1) {
236 int TokPrec = GetTokPrecedence();
237
238 // If this is a binop that binds at least as tightly as the current binop,
239 // consume it, otherwise we are done.
240 if (TokPrec < ExprPrec)
241 return LHS;
242
243 // Okay, we know this is a binop.
244 int BinOp = CurTok;
245 getNextToken(); // eat binop
246
247 // Parse the primary expression after the binary operator.
248 ExprAST *RHS = ParsePrimary();
249 if (!RHS) return 0;
250
251 // If BinOp binds less tightly with RHS than the operator after RHS, let
252 // the pending operator take RHS as its LHS.
253 int NextPrec = GetTokPrecedence();
254 if (TokPrec < NextPrec) {
255 RHS = ParseBinOpRHS(TokPrec+1, RHS);
256 if (RHS == 0) return 0;
257 }
258
259 // Merge LHS/RHS.
260 LHS = new BinaryExprAST(BinOp, LHS, RHS);
261 }
262 }
263
264 /// expression
265 /// ::= primary binoprhs
266 ///
ParseExpression()267 static ExprAST *ParseExpression() {
268 ExprAST *LHS = ParsePrimary();
269 if (!LHS) return 0;
270
271 return ParseBinOpRHS(0, LHS);
272 }
273
274 /// prototype
275 /// ::= id '(' id* ')'
ParsePrototype()276 static PrototypeAST *ParsePrototype() {
277 if (CurTok != tok_identifier)
278 return ErrorP("Expected function name in prototype");
279
280 std::string FnName = IdentifierStr;
281 getNextToken();
282
283 if (CurTok != '(')
284 return ErrorP("Expected '(' in prototype");
285
286 std::vector<std::string> ArgNames;
287 while (getNextToken() == tok_identifier)
288 ArgNames.push_back(IdentifierStr);
289 if (CurTok != ')')
290 return ErrorP("Expected ')' in prototype");
291
292 // success.
293 getNextToken(); // eat ')'.
294
295 return new PrototypeAST(FnName, ArgNames);
296 }
297
298 /// definition ::= 'def' prototype expression
ParseDefinition()299 static FunctionAST *ParseDefinition() {
300 getNextToken(); // eat def.
301 PrototypeAST *Proto = ParsePrototype();
302 if (Proto == 0) return 0;
303
304 if (ExprAST *E = ParseExpression())
305 return new FunctionAST(Proto, E);
306 return 0;
307 }
308
309 /// toplevelexpr ::= expression
ParseTopLevelExpr()310 static FunctionAST *ParseTopLevelExpr() {
311 if (ExprAST *E = ParseExpression()) {
312 // Make an anonymous proto.
313 PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
314 return new FunctionAST(Proto, E);
315 }
316 return 0;
317 }
318
319 /// external ::= 'extern' prototype
ParseExtern()320 static PrototypeAST *ParseExtern() {
321 getNextToken(); // eat extern.
322 return ParsePrototype();
323 }
324
325 //===----------------------------------------------------------------------===//
326 // Top-Level parsing
327 //===----------------------------------------------------------------------===//
328
HandleDefinition()329 static void HandleDefinition() {
330 if (ParseDefinition()) {
331 fprintf(stderr, "Parsed a function definition.\n");
332 } else {
333 // Skip token for error recovery.
334 getNextToken();
335 }
336 }
337
HandleExtern()338 static void HandleExtern() {
339 if (ParseExtern()) {
340 fprintf(stderr, "Parsed an extern\n");
341 } else {
342 // Skip token for error recovery.
343 getNextToken();
344 }
345 }
346
HandleTopLevelExpression()347 static void HandleTopLevelExpression() {
348 // Evaluate a top-level expression into an anonymous function.
349 if (ParseTopLevelExpr()) {
350 fprintf(stderr, "Parsed a top-level expr\n");
351 } else {
352 // Skip token for error recovery.
353 getNextToken();
354 }
355 }
356
357 /// top ::= definition | external | expression | ';'
MainLoop()358 static void MainLoop() {
359 while (1) {
360 fprintf(stderr, "ready> ");
361 switch (CurTok) {
362 case tok_eof: return;
363 case ';': getNextToken(); break; // ignore top-level semicolons.
364 case tok_def: HandleDefinition(); break;
365 case tok_extern: HandleExtern(); break;
366 default: HandleTopLevelExpression(); break;
367 }
368 }
369 }
370
371 //===----------------------------------------------------------------------===//
372 // Main driver code.
373 //===----------------------------------------------------------------------===//
374
main()375 int main() {
376 // Install standard binary operators.
377 // 1 is lowest precedence.
378 BinopPrecedence['<'] = 10;
379 BinopPrecedence['+'] = 20;
380 BinopPrecedence['-'] = 20;
381 BinopPrecedence['*'] = 40; // highest.
382
383 // Prime the first token.
384 fprintf(stderr, "ready> ");
385 getNextToken();
386
387 // Run the main "interpreter loop" now.
388 MainLoop();
389
390 return 0;
391 }
392