1 /*
2 * This file includes functions to transform a concrete syntax tree (CST) to
3 * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4 *
5 */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "grammar.h"
9 #include "node.h"
10 #include "pyarena.h"
11 #include "ast.h"
12 #include "token.h"
13 #include "parsetok.h"
14 #include "graminit.h"
15
16 #include <assert.h>
17
18 /* Data structure used internally */
19 struct compiling {
20 char *c_encoding; /* source encoding */
21 int c_future_unicode; /* __future__ unicode literals flag */
22 PyArena *c_arena; /* arena for allocating memeory */
23 const char *c_filename; /* filename */
24 };
25
26 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
27 static expr_ty ast_for_expr(struct compiling *, const node *);
28 static stmt_ty ast_for_stmt(struct compiling *, const node *);
29 static asdl_seq *ast_for_suite(struct compiling *, const node *);
30 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
31 expr_context_ty);
32 static expr_ty ast_for_testlist(struct compiling *, const node *);
33 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
34 static expr_ty ast_for_testlist_comp(struct compiling *, const node *);
35
36 /* Note different signature for ast_for_call */
37 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
38
39 static PyObject *parsenumber(struct compiling *, const char *);
40 static PyObject *parsestr(struct compiling *, const node *n, const char *);
41 static PyObject *parsestrplus(struct compiling *, const node *n);
42
43 #ifndef LINENO
44 #define LINENO(n) ((n)->n_lineno)
45 #endif
46
47 #define COMP_GENEXP 0
48 #define COMP_SETCOMP 1
49
50 static identifier
new_identifier(const char * n,PyArena * arena)51 new_identifier(const char* n, PyArena *arena) {
52 PyObject* id = PyString_InternFromString(n);
53 if (id != NULL)
54 PyArena_AddPyObject(arena, id);
55 return id;
56 }
57
58 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena)
59
60 /* This routine provides an invalid object for the syntax error.
61 The outermost routine must unpack this error and create the
62 proper object. We do this so that we don't have to pass
63 the filename to everything function.
64
65 XXX Maybe we should just pass the filename...
66 */
67
68 static int
ast_error(const node * n,const char * errstr)69 ast_error(const node *n, const char *errstr)
70 {
71 PyObject *u = Py_BuildValue("zi", errstr, LINENO(n));
72 if (!u)
73 return 0;
74 PyErr_SetObject(PyExc_SyntaxError, u);
75 Py_DECREF(u);
76 return 0;
77 }
78
79 static void
ast_error_finish(const char * filename)80 ast_error_finish(const char *filename)
81 {
82 PyObject *type, *value, *tback, *errstr, *loc, *tmp;
83 long lineno;
84
85 assert(PyErr_Occurred());
86 if (!PyErr_ExceptionMatches(PyExc_SyntaxError))
87 return;
88
89 PyErr_Fetch(&type, &value, &tback);
90 errstr = PyTuple_GetItem(value, 0);
91 if (!errstr)
92 return;
93 Py_INCREF(errstr);
94 lineno = PyInt_AsLong(PyTuple_GetItem(value, 1));
95 if (lineno == -1) {
96 Py_DECREF(errstr);
97 return;
98 }
99 Py_DECREF(value);
100
101 loc = PyErr_ProgramText(filename, lineno);
102 if (!loc) {
103 Py_INCREF(Py_None);
104 loc = Py_None;
105 }
106 tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc);
107 Py_DECREF(loc);
108 if (!tmp) {
109 Py_DECREF(errstr);
110 return;
111 }
112 value = PyTuple_Pack(2, errstr, tmp);
113 Py_DECREF(errstr);
114 Py_DECREF(tmp);
115 if (!value)
116 return;
117 PyErr_Restore(type, value, tback);
118 }
119
120 static int
ast_warn(struct compiling * c,const node * n,char * msg)121 ast_warn(struct compiling *c, const node *n, char *msg)
122 {
123 if (PyErr_WarnExplicit(PyExc_SyntaxWarning, msg, c->c_filename, LINENO(n),
124 NULL, NULL) < 0) {
125 /* if -Werr, change it to a SyntaxError */
126 if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_SyntaxWarning))
127 ast_error(n, msg);
128 return 0;
129 }
130 return 1;
131 }
132
133 static int
forbidden_check(struct compiling * c,const node * n,const char * x)134 forbidden_check(struct compiling *c, const node *n, const char *x)
135 {
136 if (!strcmp(x, "None"))
137 return ast_error(n, "cannot assign to None");
138 if (!strcmp(x, "__debug__"))
139 return ast_error(n, "cannot assign to __debug__");
140 if (Py_Py3kWarningFlag) {
141 if (!(strcmp(x, "True") && strcmp(x, "False")) &&
142 !ast_warn(c, n, "assignment to True or False is forbidden in 3.x"))
143 return 0;
144 if (!strcmp(x, "nonlocal") &&
145 !ast_warn(c, n, "nonlocal is a keyword in 3.x"))
146 return 0;
147 }
148 return 1;
149 }
150
151 /* num_stmts() returns number of contained statements.
152
153 Use this routine to determine how big a sequence is needed for
154 the statements in a parse tree. Its raison d'etre is this bit of
155 grammar:
156
157 stmt: simple_stmt | compound_stmt
158 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
159
160 A simple_stmt can contain multiple small_stmt elements joined
161 by semicolons. If the arg is a simple_stmt, the number of
162 small_stmt elements is returned.
163 */
164
165 static int
num_stmts(const node * n)166 num_stmts(const node *n)
167 {
168 int i, l;
169 node *ch;
170
171 switch (TYPE(n)) {
172 case single_input:
173 if (TYPE(CHILD(n, 0)) == NEWLINE)
174 return 0;
175 else
176 return num_stmts(CHILD(n, 0));
177 case file_input:
178 l = 0;
179 for (i = 0; i < NCH(n); i++) {
180 ch = CHILD(n, i);
181 if (TYPE(ch) == stmt)
182 l += num_stmts(ch);
183 }
184 return l;
185 case stmt:
186 return num_stmts(CHILD(n, 0));
187 case compound_stmt:
188 return 1;
189 case simple_stmt:
190 return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
191 case suite:
192 if (NCH(n) == 1)
193 return num_stmts(CHILD(n, 0));
194 else {
195 l = 0;
196 for (i = 2; i < (NCH(n) - 1); i++)
197 l += num_stmts(CHILD(n, i));
198 return l;
199 }
200 default: {
201 char buf[128];
202
203 sprintf(buf, "Non-statement found: %d %d",
204 TYPE(n), NCH(n));
205 Py_FatalError(buf);
206 }
207 }
208 assert(0);
209 return 0;
210 }
211
212 /* Transform the CST rooted at node * to the appropriate AST
213 */
214
215 mod_ty
PyAST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename,PyArena * arena)216 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
217 PyArena *arena)
218 {
219 int i, j, k, num;
220 asdl_seq *stmts = NULL;
221 stmt_ty s;
222 node *ch;
223 struct compiling c;
224
225 if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {
226 c.c_encoding = "utf-8";
227 if (TYPE(n) == encoding_decl) {
228 ast_error(n, "encoding declaration in Unicode string");
229 goto error;
230 }
231 } else if (TYPE(n) == encoding_decl) {
232 c.c_encoding = STR(n);
233 n = CHILD(n, 0);
234 } else {
235 c.c_encoding = NULL;
236 }
237 c.c_future_unicode = flags && flags->cf_flags & CO_FUTURE_UNICODE_LITERALS;
238 c.c_arena = arena;
239 c.c_filename = filename;
240
241 k = 0;
242 switch (TYPE(n)) {
243 case file_input:
244 stmts = asdl_seq_new(num_stmts(n), arena);
245 if (!stmts)
246 return NULL;
247 for (i = 0; i < NCH(n) - 1; i++) {
248 ch = CHILD(n, i);
249 if (TYPE(ch) == NEWLINE)
250 continue;
251 REQ(ch, stmt);
252 num = num_stmts(ch);
253 if (num == 1) {
254 s = ast_for_stmt(&c, ch);
255 if (!s)
256 goto error;
257 asdl_seq_SET(stmts, k++, s);
258 }
259 else {
260 ch = CHILD(ch, 0);
261 REQ(ch, simple_stmt);
262 for (j = 0; j < num; j++) {
263 s = ast_for_stmt(&c, CHILD(ch, j * 2));
264 if (!s)
265 goto error;
266 asdl_seq_SET(stmts, k++, s);
267 }
268 }
269 }
270 return Module(stmts, arena);
271 case eval_input: {
272 expr_ty testlist_ast;
273
274 /* XXX Why not comp_for here? */
275 testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
276 if (!testlist_ast)
277 goto error;
278 return Expression(testlist_ast, arena);
279 }
280 case single_input:
281 if (TYPE(CHILD(n, 0)) == NEWLINE) {
282 stmts = asdl_seq_new(1, arena);
283 if (!stmts)
284 goto error;
285 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
286 arena));
287 if (!asdl_seq_GET(stmts, 0))
288 goto error;
289 return Interactive(stmts, arena);
290 }
291 else {
292 n = CHILD(n, 0);
293 num = num_stmts(n);
294 stmts = asdl_seq_new(num, arena);
295 if (!stmts)
296 goto error;
297 if (num == 1) {
298 s = ast_for_stmt(&c, n);
299 if (!s)
300 goto error;
301 asdl_seq_SET(stmts, 0, s);
302 }
303 else {
304 /* Only a simple_stmt can contain multiple statements. */
305 REQ(n, simple_stmt);
306 for (i = 0; i < NCH(n); i += 2) {
307 if (TYPE(CHILD(n, i)) == NEWLINE)
308 break;
309 s = ast_for_stmt(&c, CHILD(n, i));
310 if (!s)
311 goto error;
312 asdl_seq_SET(stmts, i / 2, s);
313 }
314 }
315
316 return Interactive(stmts, arena);
317 }
318 default:
319 PyErr_Format(PyExc_SystemError,
320 "invalid node %d for PyAST_FromNode", TYPE(n));
321 goto error;
322 }
323 error:
324 ast_error_finish(filename);
325 return NULL;
326 }
327
328 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
329 */
330
331 static operator_ty
get_operator(const node * n)332 get_operator(const node *n)
333 {
334 switch (TYPE(n)) {
335 case VBAR:
336 return BitOr;
337 case CIRCUMFLEX:
338 return BitXor;
339 case AMPER:
340 return BitAnd;
341 case LEFTSHIFT:
342 return LShift;
343 case RIGHTSHIFT:
344 return RShift;
345 case PLUS:
346 return Add;
347 case MINUS:
348 return Sub;
349 case STAR:
350 return Mult;
351 case SLASH:
352 return Div;
353 case DOUBLESLASH:
354 return FloorDiv;
355 case PERCENT:
356 return Mod;
357 default:
358 return (operator_ty)0;
359 }
360 }
361
362 /* Set the context ctx for expr_ty e, recursively traversing e.
363
364 Only sets context for expr kinds that "can appear in assignment context"
365 (according to ../Parser/Python.asdl). For other expr kinds, it sets
366 an appropriate syntax error and returns false.
367 */
368
369 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)370 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
371 {
372 asdl_seq *s = NULL;
373 /* If a particular expression type can't be used for assign / delete,
374 set expr_name to its name and an error message will be generated.
375 */
376 const char* expr_name = NULL;
377
378 /* The ast defines augmented store and load contexts, but the
379 implementation here doesn't actually use them. The code may be
380 a little more complex than necessary as a result. It also means
381 that expressions in an augmented assignment have a Store context.
382 Consider restructuring so that augmented assignment uses
383 set_context(), too.
384 */
385 assert(ctx != AugStore && ctx != AugLoad);
386
387 switch (e->kind) {
388 case Attribute_kind:
389 if (ctx == Store && !forbidden_check(c, n,
390 PyBytes_AS_STRING(e->v.Attribute.attr)))
391 return 0;
392 e->v.Attribute.ctx = ctx;
393 break;
394 case Subscript_kind:
395 e->v.Subscript.ctx = ctx;
396 break;
397 case Name_kind:
398 if (ctx == Store && !forbidden_check(c, n,
399 PyBytes_AS_STRING(e->v.Name.id)))
400 return 0;
401 e->v.Name.ctx = ctx;
402 break;
403 case List_kind:
404 e->v.List.ctx = ctx;
405 s = e->v.List.elts;
406 break;
407 case Tuple_kind:
408 if (asdl_seq_LEN(e->v.Tuple.elts)) {
409 e->v.Tuple.ctx = ctx;
410 s = e->v.Tuple.elts;
411 }
412 else {
413 expr_name = "()";
414 }
415 break;
416 case Lambda_kind:
417 expr_name = "lambda";
418 break;
419 case Call_kind:
420 expr_name = "function call";
421 break;
422 case BoolOp_kind:
423 case BinOp_kind:
424 case UnaryOp_kind:
425 expr_name = "operator";
426 break;
427 case GeneratorExp_kind:
428 expr_name = "generator expression";
429 break;
430 case Yield_kind:
431 expr_name = "yield expression";
432 break;
433 case ListComp_kind:
434 expr_name = "list comprehension";
435 break;
436 case SetComp_kind:
437 expr_name = "set comprehension";
438 break;
439 case DictComp_kind:
440 expr_name = "dict comprehension";
441 break;
442 case Dict_kind:
443 case Set_kind:
444 case Num_kind:
445 case Str_kind:
446 expr_name = "literal";
447 break;
448 case Compare_kind:
449 expr_name = "comparison";
450 break;
451 case Repr_kind:
452 expr_name = "repr";
453 break;
454 case IfExp_kind:
455 expr_name = "conditional expression";
456 break;
457 default:
458 PyErr_Format(PyExc_SystemError,
459 "unexpected expression in assignment %d (line %d)",
460 e->kind, e->lineno);
461 return 0;
462 }
463 /* Check for error string set by switch */
464 if (expr_name) {
465 char buf[300];
466 PyOS_snprintf(buf, sizeof(buf),
467 "can't %s %s",
468 ctx == Store ? "assign to" : "delete",
469 expr_name);
470 return ast_error(n, buf);
471 }
472
473 /* If the LHS is a list or tuple, we need to set the assignment
474 context for all the contained elements.
475 */
476 if (s) {
477 int i;
478
479 for (i = 0; i < asdl_seq_LEN(s); i++) {
480 if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
481 return 0;
482 }
483 }
484 return 1;
485 }
486
487 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)488 ast_for_augassign(struct compiling *c, const node *n)
489 {
490 REQ(n, augassign);
491 n = CHILD(n, 0);
492 switch (STR(n)[0]) {
493 case '+':
494 return Add;
495 case '-':
496 return Sub;
497 case '/':
498 if (STR(n)[1] == '/')
499 return FloorDiv;
500 else
501 return Div;
502 case '%':
503 return Mod;
504 case '<':
505 return LShift;
506 case '>':
507 return RShift;
508 case '&':
509 return BitAnd;
510 case '^':
511 return BitXor;
512 case '|':
513 return BitOr;
514 case '*':
515 if (STR(n)[1] == '*')
516 return Pow;
517 else
518 return Mult;
519 default:
520 PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
521 return (operator_ty)0;
522 }
523 }
524
525 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)526 ast_for_comp_op(struct compiling *c, const node *n)
527 {
528 /* comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'
529 |'is' 'not'
530 */
531 REQ(n, comp_op);
532 if (NCH(n) == 1) {
533 n = CHILD(n, 0);
534 switch (TYPE(n)) {
535 case LESS:
536 return Lt;
537 case GREATER:
538 return Gt;
539 case EQEQUAL: /* == */
540 return Eq;
541 case LESSEQUAL:
542 return LtE;
543 case GREATEREQUAL:
544 return GtE;
545 case NOTEQUAL:
546 return NotEq;
547 case NAME:
548 if (strcmp(STR(n), "in") == 0)
549 return In;
550 if (strcmp(STR(n), "is") == 0)
551 return Is;
552 default:
553 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
554 STR(n));
555 return (cmpop_ty)0;
556 }
557 }
558 else if (NCH(n) == 2) {
559 /* handle "not in" and "is not" */
560 switch (TYPE(CHILD(n, 0))) {
561 case NAME:
562 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
563 return NotIn;
564 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
565 return IsNot;
566 default:
567 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
568 STR(CHILD(n, 0)), STR(CHILD(n, 1)));
569 return (cmpop_ty)0;
570 }
571 }
572 PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
573 NCH(n));
574 return (cmpop_ty)0;
575 }
576
577 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)578 seq_for_testlist(struct compiling *c, const node *n)
579 {
580 /* testlist: test (',' test)* [','] */
581 asdl_seq *seq;
582 expr_ty expression;
583 int i;
584 assert(TYPE(n) == testlist ||
585 TYPE(n) == listmaker ||
586 TYPE(n) == testlist_comp ||
587 TYPE(n) == testlist_safe ||
588 TYPE(n) == testlist1);
589
590 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
591 if (!seq)
592 return NULL;
593
594 for (i = 0; i < NCH(n); i += 2) {
595 assert(TYPE(CHILD(n, i)) == test || TYPE(CHILD(n, i)) == old_test);
596
597 expression = ast_for_expr(c, CHILD(n, i));
598 if (!expression)
599 return NULL;
600
601 assert(i / 2 < seq->size);
602 asdl_seq_SET(seq, i / 2, expression);
603 }
604 return seq;
605 }
606
607 static expr_ty
compiler_complex_args(struct compiling * c,const node * n)608 compiler_complex_args(struct compiling *c, const node *n)
609 {
610 int i, len = (NCH(n) + 1) / 2;
611 expr_ty result;
612 asdl_seq *args = asdl_seq_new(len, c->c_arena);
613 if (!args)
614 return NULL;
615
616 /* fpdef: NAME | '(' fplist ')'
617 fplist: fpdef (',' fpdef)* [',']
618 */
619 REQ(n, fplist);
620 for (i = 0; i < len; i++) {
621 PyObject *arg_id;
622 const node *fpdef_node = CHILD(n, 2*i);
623 const node *child;
624 expr_ty arg;
625 set_name:
626 /* fpdef_node is either a NAME or an fplist */
627 child = CHILD(fpdef_node, 0);
628 if (TYPE(child) == NAME) {
629 if (!forbidden_check(c, n, STR(child)))
630 return NULL;
631 arg_id = NEW_IDENTIFIER(child);
632 if (!arg_id)
633 return NULL;
634 arg = Name(arg_id, Store, LINENO(child), child->n_col_offset,
635 c->c_arena);
636 }
637 else {
638 assert(TYPE(fpdef_node) == fpdef);
639 /* fpdef_node[0] is not a name, so it must be '(', get CHILD[1] */
640 child = CHILD(fpdef_node, 1);
641 assert(TYPE(child) == fplist);
642 /* NCH == 1 means we have (x), we need to elide the extra parens */
643 if (NCH(child) == 1) {
644 fpdef_node = CHILD(child, 0);
645 assert(TYPE(fpdef_node) == fpdef);
646 goto set_name;
647 }
648 arg = compiler_complex_args(c, child);
649 }
650 asdl_seq_SET(args, i, arg);
651 }
652
653 result = Tuple(args, Store, LINENO(n), n->n_col_offset, c->c_arena);
654 if (!set_context(c, result, Store, n))
655 return NULL;
656 return result;
657 }
658
659
660 /* Create AST for argument list. */
661
662 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)663 ast_for_arguments(struct compiling *c, const node *n)
664 {
665 /* parameters: '(' [varargslist] ')'
666 varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME]
667 | '**' NAME) | fpdef ['=' test] (',' fpdef ['=' test])* [',']
668 */
669 int i, j, k, n_args = 0, n_defaults = 0, found_default = 0;
670 asdl_seq *args, *defaults;
671 identifier vararg = NULL, kwarg = NULL;
672 node *ch;
673
674 if (TYPE(n) == parameters) {
675 if (NCH(n) == 2) /* () as argument list */
676 return arguments(NULL, NULL, NULL, NULL, c->c_arena);
677 n = CHILD(n, 1);
678 }
679 REQ(n, varargslist);
680
681 /* first count the number of normal args & defaults */
682 for (i = 0; i < NCH(n); i++) {
683 ch = CHILD(n, i);
684 if (TYPE(ch) == fpdef)
685 n_args++;
686 if (TYPE(ch) == EQUAL)
687 n_defaults++;
688 }
689 args = (n_args ? asdl_seq_new(n_args, c->c_arena) : NULL);
690 if (!args && n_args)
691 return NULL;
692 defaults = (n_defaults ? asdl_seq_new(n_defaults, c->c_arena) : NULL);
693 if (!defaults && n_defaults)
694 return NULL;
695
696 /* fpdef: NAME | '(' fplist ')'
697 fplist: fpdef (',' fpdef)* [',']
698 */
699 i = 0;
700 j = 0; /* index for defaults */
701 k = 0; /* index for args */
702 while (i < NCH(n)) {
703 ch = CHILD(n, i);
704 switch (TYPE(ch)) {
705 case fpdef: {
706 int complex_args = 0, parenthesized = 0;
707 handle_fpdef:
708 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
709 anything other than EQUAL or a comma? */
710 /* XXX Should NCH(n) check be made a separate check? */
711 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
712 expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
713 if (!expression)
714 return NULL;
715 assert(defaults != NULL);
716 asdl_seq_SET(defaults, j++, expression);
717 i += 2;
718 found_default = 1;
719 }
720 else if (found_default) {
721 /* def f((x)=4): pass should raise an error.
722 def f((x, (y))): pass will just incur the tuple unpacking warning. */
723 if (parenthesized && !complex_args) {
724 ast_error(n, "parenthesized arg with default");
725 return NULL;
726 }
727 ast_error(n,
728 "non-default argument follows default argument");
729 return NULL;
730 }
731 if (NCH(ch) == 3) {
732 ch = CHILD(ch, 1);
733 /* def foo((x)): is not complex, special case. */
734 if (NCH(ch) != 1) {
735 /* We have complex arguments, setup for unpacking. */
736 if (Py_Py3kWarningFlag && !ast_warn(c, ch,
737 "tuple parameter unpacking has been removed in 3.x"))
738 return NULL;
739 complex_args = 1;
740 asdl_seq_SET(args, k++, compiler_complex_args(c, ch));
741 if (!asdl_seq_GET(args, k-1))
742 return NULL;
743 } else {
744 /* def foo((x)): setup for checking NAME below. */
745 /* Loop because there can be many parens and tuple
746 unpacking mixed in. */
747 parenthesized = 1;
748 ch = CHILD(ch, 0);
749 assert(TYPE(ch) == fpdef);
750 goto handle_fpdef;
751 }
752 }
753 if (TYPE(CHILD(ch, 0)) == NAME) {
754 PyObject *id;
755 expr_ty name;
756 if (!forbidden_check(c, n, STR(CHILD(ch, 0))))
757 return NULL;
758 id = NEW_IDENTIFIER(CHILD(ch, 0));
759 if (!id)
760 return NULL;
761 name = Name(id, Param, LINENO(ch), ch->n_col_offset,
762 c->c_arena);
763 if (!name)
764 return NULL;
765 asdl_seq_SET(args, k++, name);
766
767 }
768 i += 2; /* the name and the comma */
769 if (parenthesized && Py_Py3kWarningFlag &&
770 !ast_warn(c, ch, "parenthesized argument names "
771 "are invalid in 3.x"))
772 return NULL;
773
774 break;
775 }
776 case STAR:
777 if (!forbidden_check(c, CHILD(n, i+1), STR(CHILD(n, i+1))))
778 return NULL;
779 vararg = NEW_IDENTIFIER(CHILD(n, i+1));
780 if (!vararg)
781 return NULL;
782 i += 3;
783 break;
784 case DOUBLESTAR:
785 if (!forbidden_check(c, CHILD(n, i+1), STR(CHILD(n, i+1))))
786 return NULL;
787 kwarg = NEW_IDENTIFIER(CHILD(n, i+1));
788 if (!kwarg)
789 return NULL;
790 i += 3;
791 break;
792 default:
793 PyErr_Format(PyExc_SystemError,
794 "unexpected node in varargslist: %d @ %d",
795 TYPE(ch), i);
796 return NULL;
797 }
798 }
799
800 return arguments(args, vararg, kwarg, defaults, c->c_arena);
801 }
802
803 static expr_ty
ast_for_dotted_name(struct compiling * c,const node * n)804 ast_for_dotted_name(struct compiling *c, const node *n)
805 {
806 expr_ty e;
807 identifier id;
808 int lineno, col_offset;
809 int i;
810
811 REQ(n, dotted_name);
812
813 lineno = LINENO(n);
814 col_offset = n->n_col_offset;
815
816 id = NEW_IDENTIFIER(CHILD(n, 0));
817 if (!id)
818 return NULL;
819 e = Name(id, Load, lineno, col_offset, c->c_arena);
820 if (!e)
821 return NULL;
822
823 for (i = 2; i < NCH(n); i+=2) {
824 id = NEW_IDENTIFIER(CHILD(n, i));
825 if (!id)
826 return NULL;
827 e = Attribute(e, id, Load, lineno, col_offset, c->c_arena);
828 if (!e)
829 return NULL;
830 }
831
832 return e;
833 }
834
835 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)836 ast_for_decorator(struct compiling *c, const node *n)
837 {
838 /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
839 expr_ty d = NULL;
840 expr_ty name_expr;
841
842 REQ(n, decorator);
843 REQ(CHILD(n, 0), AT);
844 REQ(RCHILD(n, -1), NEWLINE);
845
846 name_expr = ast_for_dotted_name(c, CHILD(n, 1));
847 if (!name_expr)
848 return NULL;
849
850 if (NCH(n) == 3) { /* No arguments */
851 d = name_expr;
852 name_expr = NULL;
853 }
854 else if (NCH(n) == 5) { /* Call with no arguments */
855 d = Call(name_expr, NULL, NULL, NULL, NULL, LINENO(n),
856 n->n_col_offset, c->c_arena);
857 if (!d)
858 return NULL;
859 name_expr = NULL;
860 }
861 else {
862 d = ast_for_call(c, CHILD(n, 3), name_expr);
863 if (!d)
864 return NULL;
865 name_expr = NULL;
866 }
867
868 return d;
869 }
870
871 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)872 ast_for_decorators(struct compiling *c, const node *n)
873 {
874 asdl_seq* decorator_seq;
875 expr_ty d;
876 int i;
877
878 REQ(n, decorators);
879 decorator_seq = asdl_seq_new(NCH(n), c->c_arena);
880 if (!decorator_seq)
881 return NULL;
882
883 for (i = 0; i < NCH(n); i++) {
884 d = ast_for_decorator(c, CHILD(n, i));
885 if (!d)
886 return NULL;
887 asdl_seq_SET(decorator_seq, i, d);
888 }
889 return decorator_seq;
890 }
891
892 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)893 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
894 {
895 /* funcdef: 'def' NAME parameters ':' suite */
896 identifier name;
897 arguments_ty args;
898 asdl_seq *body;
899 int name_i = 1;
900
901 REQ(n, funcdef);
902
903 name = NEW_IDENTIFIER(CHILD(n, name_i));
904 if (!name)
905 return NULL;
906 else if (!forbidden_check(c, CHILD(n, name_i), STR(CHILD(n, name_i))))
907 return NULL;
908 args = ast_for_arguments(c, CHILD(n, name_i + 1));
909 if (!args)
910 return NULL;
911 body = ast_for_suite(c, CHILD(n, name_i + 3));
912 if (!body)
913 return NULL;
914
915 return FunctionDef(name, args, body, decorator_seq, LINENO(n),
916 n->n_col_offset, c->c_arena);
917 }
918
919 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)920 ast_for_decorated(struct compiling *c, const node *n)
921 {
922 /* decorated: decorators (classdef | funcdef) */
923 stmt_ty thing = NULL;
924 asdl_seq *decorator_seq = NULL;
925
926 REQ(n, decorated);
927
928 decorator_seq = ast_for_decorators(c, CHILD(n, 0));
929 if (!decorator_seq)
930 return NULL;
931
932 assert(TYPE(CHILD(n, 1)) == funcdef ||
933 TYPE(CHILD(n, 1)) == classdef);
934
935 if (TYPE(CHILD(n, 1)) == funcdef) {
936 thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
937 } else if (TYPE(CHILD(n, 1)) == classdef) {
938 thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
939 }
940 /* we count the decorators in when talking about the class' or
941 function's line number */
942 if (thing) {
943 thing->lineno = LINENO(n);
944 thing->col_offset = n->n_col_offset;
945 }
946 return thing;
947 }
948
949 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)950 ast_for_lambdef(struct compiling *c, const node *n)
951 {
952 /* lambdef: 'lambda' [varargslist] ':' test */
953 arguments_ty args;
954 expr_ty expression;
955
956 if (NCH(n) == 3) {
957 args = arguments(NULL, NULL, NULL, NULL, c->c_arena);
958 if (!args)
959 return NULL;
960 expression = ast_for_expr(c, CHILD(n, 2));
961 if (!expression)
962 return NULL;
963 }
964 else {
965 args = ast_for_arguments(c, CHILD(n, 1));
966 if (!args)
967 return NULL;
968 expression = ast_for_expr(c, CHILD(n, 3));
969 if (!expression)
970 return NULL;
971 }
972
973 return Lambda(args, expression, LINENO(n), n->n_col_offset, c->c_arena);
974 }
975
976 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)977 ast_for_ifexpr(struct compiling *c, const node *n)
978 {
979 /* test: or_test 'if' or_test 'else' test */
980 expr_ty expression, body, orelse;
981
982 assert(NCH(n) == 5);
983 body = ast_for_expr(c, CHILD(n, 0));
984 if (!body)
985 return NULL;
986 expression = ast_for_expr(c, CHILD(n, 2));
987 if (!expression)
988 return NULL;
989 orelse = ast_for_expr(c, CHILD(n, 4));
990 if (!orelse)
991 return NULL;
992 return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
993 c->c_arena);
994 }
995
996 /* XXX(nnorwitz): the listcomp and genexpr code should be refactored
997 so there is only a single version. Possibly for loops can also re-use
998 the code.
999 */
1000
1001 /* Count the number of 'for' loop in a list comprehension.
1002
1003 Helper for ast_for_listcomp().
1004 */
1005
1006 static int
count_list_fors(struct compiling * c,const node * n)1007 count_list_fors(struct compiling *c, const node *n)
1008 {
1009 int n_fors = 0;
1010 node *ch = CHILD(n, 1);
1011
1012 count_list_for:
1013 n_fors++;
1014 REQ(ch, list_for);
1015 if (NCH(ch) == 5)
1016 ch = CHILD(ch, 4);
1017 else
1018 return n_fors;
1019 count_list_iter:
1020 REQ(ch, list_iter);
1021 ch = CHILD(ch, 0);
1022 if (TYPE(ch) == list_for)
1023 goto count_list_for;
1024 else if (TYPE(ch) == list_if) {
1025 if (NCH(ch) == 3) {
1026 ch = CHILD(ch, 2);
1027 goto count_list_iter;
1028 }
1029 else
1030 return n_fors;
1031 }
1032
1033 /* Should never be reached */
1034 PyErr_SetString(PyExc_SystemError, "logic error in count_list_fors");
1035 return -1;
1036 }
1037
1038 /* Count the number of 'if' statements in a list comprehension.
1039
1040 Helper for ast_for_listcomp().
1041 */
1042
1043 static int
count_list_ifs(struct compiling * c,const node * n)1044 count_list_ifs(struct compiling *c, const node *n)
1045 {
1046 int n_ifs = 0;
1047
1048 count_list_iter:
1049 REQ(n, list_iter);
1050 if (TYPE(CHILD(n, 0)) == list_for)
1051 return n_ifs;
1052 n = CHILD(n, 0);
1053 REQ(n, list_if);
1054 n_ifs++;
1055 if (NCH(n) == 2)
1056 return n_ifs;
1057 n = CHILD(n, 2);
1058 goto count_list_iter;
1059 }
1060
1061 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)1062 ast_for_listcomp(struct compiling *c, const node *n)
1063 {
1064 /* listmaker: test ( list_for | (',' test)* [','] )
1065 list_for: 'for' exprlist 'in' testlist_safe [list_iter]
1066 list_iter: list_for | list_if
1067 list_if: 'if' test [list_iter]
1068 testlist_safe: test [(',' test)+ [',']]
1069 */
1070 expr_ty elt, first;
1071 asdl_seq *listcomps;
1072 int i, n_fors;
1073 node *ch;
1074
1075 REQ(n, listmaker);
1076 assert(NCH(n) > 1);
1077
1078 elt = ast_for_expr(c, CHILD(n, 0));
1079 if (!elt)
1080 return NULL;
1081
1082 n_fors = count_list_fors(c, n);
1083 if (n_fors == -1)
1084 return NULL;
1085
1086 listcomps = asdl_seq_new(n_fors, c->c_arena);
1087 if (!listcomps)
1088 return NULL;
1089
1090 ch = CHILD(n, 1);
1091 for (i = 0; i < n_fors; i++) {
1092 comprehension_ty lc;
1093 asdl_seq *t;
1094 expr_ty expression;
1095 node *for_ch;
1096
1097 REQ(ch, list_for);
1098
1099 for_ch = CHILD(ch, 1);
1100 t = ast_for_exprlist(c, for_ch, Store);
1101 if (!t)
1102 return NULL;
1103 expression = ast_for_testlist(c, CHILD(ch, 3));
1104 if (!expression)
1105 return NULL;
1106
1107 /* Check the # of children rather than the length of t, since
1108 [x for x, in ... ] has 1 element in t, but still requires a Tuple.
1109 */
1110 first = (expr_ty)asdl_seq_GET(t, 0);
1111 if (NCH(for_ch) == 1)
1112 lc = comprehension(first, expression, NULL, c->c_arena);
1113 else
1114 lc = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
1115 c->c_arena),
1116 expression, NULL, c->c_arena);
1117 if (!lc)
1118 return NULL;
1119
1120 if (NCH(ch) == 5) {
1121 int j, n_ifs;
1122 asdl_seq *ifs;
1123 expr_ty list_for_expr;
1124
1125 ch = CHILD(ch, 4);
1126 n_ifs = count_list_ifs(c, ch);
1127 if (n_ifs == -1)
1128 return NULL;
1129
1130 ifs = asdl_seq_new(n_ifs, c->c_arena);
1131 if (!ifs)
1132 return NULL;
1133
1134 for (j = 0; j < n_ifs; j++) {
1135 REQ(ch, list_iter);
1136 ch = CHILD(ch, 0);
1137 REQ(ch, list_if);
1138
1139 list_for_expr = ast_for_expr(c, CHILD(ch, 1));
1140 if (!list_for_expr)
1141 return NULL;
1142
1143 asdl_seq_SET(ifs, j, list_for_expr);
1144 if (NCH(ch) == 3)
1145 ch = CHILD(ch, 2);
1146 }
1147 /* on exit, must guarantee that ch is a list_for */
1148 if (TYPE(ch) == list_iter)
1149 ch = CHILD(ch, 0);
1150 lc->ifs = ifs;
1151 }
1152 asdl_seq_SET(listcomps, i, lc);
1153 }
1154
1155 return ListComp(elt, listcomps, LINENO(n), n->n_col_offset, c->c_arena);
1156 }
1157
1158 /*
1159 Count the number of 'for' loops in a comprehension.
1160
1161 Helper for ast_for_comprehension().
1162 */
1163
1164 static int
count_comp_fors(struct compiling * c,const node * n)1165 count_comp_fors(struct compiling *c, const node *n)
1166 {
1167 int n_fors = 0;
1168
1169 count_comp_for:
1170 n_fors++;
1171 REQ(n, comp_for);
1172 if (NCH(n) == 5)
1173 n = CHILD(n, 4);
1174 else
1175 return n_fors;
1176 count_comp_iter:
1177 REQ(n, comp_iter);
1178 n = CHILD(n, 0);
1179 if (TYPE(n) == comp_for)
1180 goto count_comp_for;
1181 else if (TYPE(n) == comp_if) {
1182 if (NCH(n) == 3) {
1183 n = CHILD(n, 2);
1184 goto count_comp_iter;
1185 }
1186 else
1187 return n_fors;
1188 }
1189
1190 /* Should never be reached */
1191 PyErr_SetString(PyExc_SystemError,
1192 "logic error in count_comp_fors");
1193 return -1;
1194 }
1195
1196 /* Count the number of 'if' statements in a comprehension.
1197
1198 Helper for ast_for_comprehension().
1199 */
1200
1201 static int
count_comp_ifs(struct compiling * c,const node * n)1202 count_comp_ifs(struct compiling *c, const node *n)
1203 {
1204 int n_ifs = 0;
1205
1206 while (1) {
1207 REQ(n, comp_iter);
1208 if (TYPE(CHILD(n, 0)) == comp_for)
1209 return n_ifs;
1210 n = CHILD(n, 0);
1211 REQ(n, comp_if);
1212 n_ifs++;
1213 if (NCH(n) == 2)
1214 return n_ifs;
1215 n = CHILD(n, 2);
1216 }
1217 }
1218
1219 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)1220 ast_for_comprehension(struct compiling *c, const node *n)
1221 {
1222 int i, n_fors;
1223 asdl_seq *comps;
1224
1225 n_fors = count_comp_fors(c, n);
1226 if (n_fors == -1)
1227 return NULL;
1228
1229 comps = asdl_seq_new(n_fors, c->c_arena);
1230 if (!comps)
1231 return NULL;
1232
1233 for (i = 0; i < n_fors; i++) {
1234 comprehension_ty comp;
1235 asdl_seq *t;
1236 expr_ty expression, first;
1237 node *for_ch;
1238
1239 REQ(n, comp_for);
1240
1241 for_ch = CHILD(n, 1);
1242 t = ast_for_exprlist(c, for_ch, Store);
1243 if (!t)
1244 return NULL;
1245 expression = ast_for_expr(c, CHILD(n, 3));
1246 if (!expression)
1247 return NULL;
1248
1249 /* Check the # of children rather than the length of t, since
1250 (x for x, in ...) has 1 element in t, but still requires a Tuple. */
1251 first = (expr_ty)asdl_seq_GET(t, 0);
1252 if (NCH(for_ch) == 1)
1253 comp = comprehension(first, expression, NULL, c->c_arena);
1254 else
1255 comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
1256 c->c_arena),
1257 expression, NULL, c->c_arena);
1258 if (!comp)
1259 return NULL;
1260
1261 if (NCH(n) == 5) {
1262 int j, n_ifs;
1263 asdl_seq *ifs;
1264
1265 n = CHILD(n, 4);
1266 n_ifs = count_comp_ifs(c, n);
1267 if (n_ifs == -1)
1268 return NULL;
1269
1270 ifs = asdl_seq_new(n_ifs, c->c_arena);
1271 if (!ifs)
1272 return NULL;
1273
1274 for (j = 0; j < n_ifs; j++) {
1275 REQ(n, comp_iter);
1276 n = CHILD(n, 0);
1277 REQ(n, comp_if);
1278
1279 expression = ast_for_expr(c, CHILD(n, 1));
1280 if (!expression)
1281 return NULL;
1282 asdl_seq_SET(ifs, j, expression);
1283 if (NCH(n) == 3)
1284 n = CHILD(n, 2);
1285 }
1286 /* on exit, must guarantee that n is a comp_for */
1287 if (TYPE(n) == comp_iter)
1288 n = CHILD(n, 0);
1289 comp->ifs = ifs;
1290 }
1291 asdl_seq_SET(comps, i, comp);
1292 }
1293 return comps;
1294 }
1295
1296 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)1297 ast_for_itercomp(struct compiling *c, const node *n, int type)
1298 {
1299 expr_ty elt;
1300 asdl_seq *comps;
1301
1302 assert(NCH(n) > 1);
1303
1304 elt = ast_for_expr(c, CHILD(n, 0));
1305 if (!elt)
1306 return NULL;
1307
1308 comps = ast_for_comprehension(c, CHILD(n, 1));
1309 if (!comps)
1310 return NULL;
1311
1312 if (type == COMP_GENEXP)
1313 return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1314 else if (type == COMP_SETCOMP)
1315 return SetComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1316 else
1317 /* Should never happen */
1318 return NULL;
1319 }
1320
1321 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)1322 ast_for_dictcomp(struct compiling *c, const node *n)
1323 {
1324 expr_ty key, value;
1325 asdl_seq *comps;
1326
1327 assert(NCH(n) > 3);
1328 REQ(CHILD(n, 1), COLON);
1329
1330 key = ast_for_expr(c, CHILD(n, 0));
1331 if (!key)
1332 return NULL;
1333
1334 value = ast_for_expr(c, CHILD(n, 2));
1335 if (!value)
1336 return NULL;
1337
1338 comps = ast_for_comprehension(c, CHILD(n, 3));
1339 if (!comps)
1340 return NULL;
1341
1342 return DictComp(key, value, comps, LINENO(n), n->n_col_offset, c->c_arena);
1343 }
1344
1345 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)1346 ast_for_genexp(struct compiling *c, const node *n)
1347 {
1348 assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
1349 return ast_for_itercomp(c, n, COMP_GENEXP);
1350 }
1351
1352 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)1353 ast_for_setcomp(struct compiling *c, const node *n)
1354 {
1355 assert(TYPE(n) == (dictorsetmaker));
1356 return ast_for_itercomp(c, n, COMP_SETCOMP);
1357 }
1358
1359 static expr_ty
ast_for_atom(struct compiling * c,const node * n)1360 ast_for_atom(struct compiling *c, const node *n)
1361 {
1362 /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [listmaker] ']'
1363 | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+
1364 */
1365 node *ch = CHILD(n, 0);
1366
1367 switch (TYPE(ch)) {
1368 case NAME: {
1369 /* All names start in Load context, but may later be
1370 changed. */
1371 PyObject *name = NEW_IDENTIFIER(ch);
1372 if (!name)
1373 return NULL;
1374 return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena);
1375 }
1376 case STRING: {
1377 PyObject *str = parsestrplus(c, n);
1378 if (!str) {
1379 #ifdef Py_USING_UNICODE
1380 if (PyErr_ExceptionMatches(PyExc_UnicodeError)){
1381 PyObject *type, *value, *tback, *errstr;
1382 PyErr_Fetch(&type, &value, &tback);
1383 errstr = PyObject_Str(value);
1384 if (errstr) {
1385 char *s = "";
1386 char buf[128];
1387 s = PyString_AsString(errstr);
1388 PyOS_snprintf(buf, sizeof(buf), "(unicode error) %s", s);
1389 ast_error(n, buf);
1390 Py_DECREF(errstr);
1391 } else {
1392 ast_error(n, "(unicode error) unknown error");
1393 }
1394 Py_DECREF(type);
1395 Py_DECREF(value);
1396 Py_XDECREF(tback);
1397 }
1398 #endif
1399 return NULL;
1400 }
1401 PyArena_AddPyObject(c->c_arena, str);
1402 return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
1403 }
1404 case NUMBER: {
1405 PyObject *pynum = parsenumber(c, STR(ch));
1406 if (!pynum)
1407 return NULL;
1408
1409 PyArena_AddPyObject(c->c_arena, pynum);
1410 return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1411 }
1412 case LPAR: /* some parenthesized expressions */
1413 ch = CHILD(n, 1);
1414
1415 if (TYPE(ch) == RPAR)
1416 return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1417
1418 if (TYPE(ch) == yield_expr)
1419 return ast_for_expr(c, ch);
1420
1421 return ast_for_testlist_comp(c, ch);
1422 case LSQB: /* list (or list comprehension) */
1423 ch = CHILD(n, 1);
1424
1425 if (TYPE(ch) == RSQB)
1426 return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1427
1428 REQ(ch, listmaker);
1429 if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1430 asdl_seq *elts = seq_for_testlist(c, ch);
1431 if (!elts)
1432 return NULL;
1433
1434 return List(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1435 }
1436 else
1437 return ast_for_listcomp(c, ch);
1438 case LBRACE: {
1439 /* dictorsetmaker:
1440 * (test ':' test (comp_for | (',' test ':' test)* [','])) |
1441 * (test (comp_for | (',' test)* [',']))
1442 */
1443 int i, size;
1444 asdl_seq *keys, *values;
1445
1446 ch = CHILD(n, 1);
1447 if (TYPE(ch) == RBRACE) {
1448 /* it's an empty dict */
1449 return Dict(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
1450 } else if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1451 /* it's a simple set */
1452 asdl_seq *elts;
1453 size = (NCH(ch) + 1) / 2; /* +1 in case no trailing comma */
1454 elts = asdl_seq_new(size, c->c_arena);
1455 if (!elts)
1456 return NULL;
1457 for (i = 0; i < NCH(ch); i += 2) {
1458 expr_ty expression;
1459 expression = ast_for_expr(c, CHILD(ch, i));
1460 if (!expression)
1461 return NULL;
1462 asdl_seq_SET(elts, i / 2, expression);
1463 }
1464 return Set(elts, LINENO(n), n->n_col_offset, c->c_arena);
1465 } else if (TYPE(CHILD(ch, 1)) == comp_for) {
1466 /* it's a set comprehension */
1467 return ast_for_setcomp(c, ch);
1468 } else if (NCH(ch) > 3 && TYPE(CHILD(ch, 3)) == comp_for) {
1469 return ast_for_dictcomp(c, ch);
1470 } else {
1471 /* it's a dict */
1472 size = (NCH(ch) + 1) / 4; /* +1 in case no trailing comma */
1473 keys = asdl_seq_new(size, c->c_arena);
1474 if (!keys)
1475 return NULL;
1476
1477 values = asdl_seq_new(size, c->c_arena);
1478 if (!values)
1479 return NULL;
1480
1481 for (i = 0; i < NCH(ch); i += 4) {
1482 expr_ty expression;
1483
1484 expression = ast_for_expr(c, CHILD(ch, i));
1485 if (!expression)
1486 return NULL;
1487
1488 asdl_seq_SET(keys, i / 4, expression);
1489
1490 expression = ast_for_expr(c, CHILD(ch, i + 2));
1491 if (!expression)
1492 return NULL;
1493
1494 asdl_seq_SET(values, i / 4, expression);
1495 }
1496 return Dict(keys, values, LINENO(n), n->n_col_offset, c->c_arena);
1497 }
1498 }
1499 case BACKQUOTE: { /* repr */
1500 expr_ty expression;
1501 if (Py_Py3kWarningFlag &&
1502 !ast_warn(c, n, "backquote not supported in 3.x; use repr()"))
1503 return NULL;
1504 expression = ast_for_testlist(c, CHILD(n, 1));
1505 if (!expression)
1506 return NULL;
1507
1508 return Repr(expression, LINENO(n), n->n_col_offset, c->c_arena);
1509 }
1510 default:
1511 PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
1512 return NULL;
1513 }
1514 }
1515
1516 static slice_ty
ast_for_slice(struct compiling * c,const node * n)1517 ast_for_slice(struct compiling *c, const node *n)
1518 {
1519 node *ch;
1520 expr_ty lower = NULL, upper = NULL, step = NULL;
1521
1522 REQ(n, subscript);
1523
1524 /*
1525 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1526 sliceop: ':' [test]
1527 */
1528 ch = CHILD(n, 0);
1529 if (TYPE(ch) == DOT)
1530 return Ellipsis(c->c_arena);
1531
1532 if (NCH(n) == 1 && TYPE(ch) == test) {
1533 /* 'step' variable hold no significance in terms of being used over
1534 other vars */
1535 step = ast_for_expr(c, ch);
1536 if (!step)
1537 return NULL;
1538
1539 return Index(step, c->c_arena);
1540 }
1541
1542 if (TYPE(ch) == test) {
1543 lower = ast_for_expr(c, ch);
1544 if (!lower)
1545 return NULL;
1546 }
1547
1548 /* If there's an upper bound it's in the second or third position. */
1549 if (TYPE(ch) == COLON) {
1550 if (NCH(n) > 1) {
1551 node *n2 = CHILD(n, 1);
1552
1553 if (TYPE(n2) == test) {
1554 upper = ast_for_expr(c, n2);
1555 if (!upper)
1556 return NULL;
1557 }
1558 }
1559 } else if (NCH(n) > 2) {
1560 node *n2 = CHILD(n, 2);
1561
1562 if (TYPE(n2) == test) {
1563 upper = ast_for_expr(c, n2);
1564 if (!upper)
1565 return NULL;
1566 }
1567 }
1568
1569 ch = CHILD(n, NCH(n) - 1);
1570 if (TYPE(ch) == sliceop) {
1571 if (NCH(ch) == 1) {
1572 /*
1573 This is an extended slice (ie "x[::]") with no expression in the
1574 step field. We set this literally to "None" in order to
1575 disambiguate it from x[:]. (The interpreter might have to call
1576 __getslice__ for x[:], but it must call __getitem__ for x[::].)
1577 */
1578 identifier none = new_identifier("None", c->c_arena);
1579 if (!none)
1580 return NULL;
1581 ch = CHILD(ch, 0);
1582 step = Name(none, Load, LINENO(ch), ch->n_col_offset, c->c_arena);
1583 if (!step)
1584 return NULL;
1585 } else {
1586 ch = CHILD(ch, 1);
1587 if (TYPE(ch) == test) {
1588 step = ast_for_expr(c, ch);
1589 if (!step)
1590 return NULL;
1591 }
1592 }
1593 }
1594
1595 return Slice(lower, upper, step, c->c_arena);
1596 }
1597
1598 static expr_ty
ast_for_binop(struct compiling * c,const node * n)1599 ast_for_binop(struct compiling *c, const node *n)
1600 {
1601 /* Must account for a sequence of expressions.
1602 How should A op B op C by represented?
1603 BinOp(BinOp(A, op, B), op, C).
1604 */
1605
1606 int i, nops;
1607 expr_ty expr1, expr2, result;
1608 operator_ty newoperator;
1609
1610 expr1 = ast_for_expr(c, CHILD(n, 0));
1611 if (!expr1)
1612 return NULL;
1613
1614 expr2 = ast_for_expr(c, CHILD(n, 2));
1615 if (!expr2)
1616 return NULL;
1617
1618 newoperator = get_operator(CHILD(n, 1));
1619 if (!newoperator)
1620 return NULL;
1621
1622 result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
1623 c->c_arena);
1624 if (!result)
1625 return NULL;
1626
1627 nops = (NCH(n) - 1) / 2;
1628 for (i = 1; i < nops; i++) {
1629 expr_ty tmp_result, tmp;
1630 const node* next_oper = CHILD(n, i * 2 + 1);
1631
1632 newoperator = get_operator(next_oper);
1633 if (!newoperator)
1634 return NULL;
1635
1636 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
1637 if (!tmp)
1638 return NULL;
1639
1640 tmp_result = BinOp(result, newoperator, tmp,
1641 LINENO(next_oper), next_oper->n_col_offset,
1642 c->c_arena);
1643 if (!tmp_result)
1644 return NULL;
1645 result = tmp_result;
1646 }
1647 return result;
1648 }
1649
1650 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr)1651 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr)
1652 {
1653 /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
1654 subscriptlist: subscript (',' subscript)* [',']
1655 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1656 */
1657 REQ(n, trailer);
1658 if (TYPE(CHILD(n, 0)) == LPAR) {
1659 if (NCH(n) == 2)
1660 return Call(left_expr, NULL, NULL, NULL, NULL, LINENO(n),
1661 n->n_col_offset, c->c_arena);
1662 else
1663 return ast_for_call(c, CHILD(n, 1), left_expr);
1664 }
1665 else if (TYPE(CHILD(n, 0)) == DOT ) {
1666 PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
1667 if (!attr_id)
1668 return NULL;
1669 return Attribute(left_expr, attr_id, Load,
1670 LINENO(n), n->n_col_offset, c->c_arena);
1671 }
1672 else {
1673 REQ(CHILD(n, 0), LSQB);
1674 REQ(CHILD(n, 2), RSQB);
1675 n = CHILD(n, 1);
1676 if (NCH(n) == 1) {
1677 slice_ty slc = ast_for_slice(c, CHILD(n, 0));
1678 if (!slc)
1679 return NULL;
1680 return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset,
1681 c->c_arena);
1682 }
1683 else {
1684 /* The grammar is ambiguous here. The ambiguity is resolved
1685 by treating the sequence as a tuple literal if there are
1686 no slice features.
1687 */
1688 int j;
1689 slice_ty slc;
1690 expr_ty e;
1691 bool simple = true;
1692 asdl_seq *slices, *elts;
1693 slices = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1694 if (!slices)
1695 return NULL;
1696 for (j = 0; j < NCH(n); j += 2) {
1697 slc = ast_for_slice(c, CHILD(n, j));
1698 if (!slc)
1699 return NULL;
1700 if (slc->kind != Index_kind)
1701 simple = false;
1702 asdl_seq_SET(slices, j / 2, slc);
1703 }
1704 if (!simple) {
1705 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
1706 Load, LINENO(n), n->n_col_offset, c->c_arena);
1707 }
1708 /* extract Index values and put them in a Tuple */
1709 elts = asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
1710 if (!elts)
1711 return NULL;
1712 for (j = 0; j < asdl_seq_LEN(slices); ++j) {
1713 slc = (slice_ty)asdl_seq_GET(slices, j);
1714 assert(slc->kind == Index_kind && slc->v.Index.value);
1715 asdl_seq_SET(elts, j, slc->v.Index.value);
1716 }
1717 e = Tuple(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1718 if (!e)
1719 return NULL;
1720 return Subscript(left_expr, Index(e, c->c_arena),
1721 Load, LINENO(n), n->n_col_offset, c->c_arena);
1722 }
1723 }
1724 }
1725
1726 static expr_ty
ast_for_factor(struct compiling * c,const node * n)1727 ast_for_factor(struct compiling *c, const node *n)
1728 {
1729 node *pfactor, *ppower, *patom, *pnum;
1730 expr_ty expression;
1731
1732 /* If the unary - operator is applied to a constant, don't generate
1733 a UNARY_NEGATIVE opcode. Just store the approriate value as a
1734 constant. The peephole optimizer already does something like
1735 this but it doesn't handle the case where the constant is
1736 (sys.maxint - 1). In that case, we want a PyIntObject, not a
1737 PyLongObject.
1738 */
1739 if (TYPE(CHILD(n, 0)) == MINUS &&
1740 NCH(n) == 2 &&
1741 TYPE((pfactor = CHILD(n, 1))) == factor &&
1742 NCH(pfactor) == 1 &&
1743 TYPE((ppower = CHILD(pfactor, 0))) == power &&
1744 NCH(ppower) == 1 &&
1745 TYPE((patom = CHILD(ppower, 0))) == atom &&
1746 TYPE((pnum = CHILD(patom, 0))) == NUMBER) {
1747 PyObject *pynum;
1748 char *s = PyObject_MALLOC(strlen(STR(pnum)) + 2);
1749 if (s == NULL)
1750 return NULL;
1751 s[0] = '-';
1752 strcpy(s + 1, STR(pnum));
1753 pynum = parsenumber(c, s);
1754 PyObject_FREE(s);
1755 if (!pynum)
1756 return NULL;
1757
1758 PyArena_AddPyObject(c->c_arena, pynum);
1759 return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1760 }
1761
1762 expression = ast_for_expr(c, CHILD(n, 1));
1763 if (!expression)
1764 return NULL;
1765
1766 switch (TYPE(CHILD(n, 0))) {
1767 case PLUS:
1768 return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
1769 c->c_arena);
1770 case MINUS:
1771 return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
1772 c->c_arena);
1773 case TILDE:
1774 return UnaryOp(Invert, expression, LINENO(n),
1775 n->n_col_offset, c->c_arena);
1776 }
1777 PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
1778 TYPE(CHILD(n, 0)));
1779 return NULL;
1780 }
1781
1782 static expr_ty
ast_for_power(struct compiling * c,const node * n)1783 ast_for_power(struct compiling *c, const node *n)
1784 {
1785 /* power: atom trailer* ('**' factor)*
1786 */
1787 int i;
1788 expr_ty e, tmp;
1789 REQ(n, power);
1790 e = ast_for_atom(c, CHILD(n, 0));
1791 if (!e)
1792 return NULL;
1793 if (NCH(n) == 1)
1794 return e;
1795 for (i = 1; i < NCH(n); i++) {
1796 node *ch = CHILD(n, i);
1797 if (TYPE(ch) != trailer)
1798 break;
1799 tmp = ast_for_trailer(c, ch, e);
1800 if (!tmp)
1801 return NULL;
1802 tmp->lineno = e->lineno;
1803 tmp->col_offset = e->col_offset;
1804 e = tmp;
1805 }
1806 if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
1807 expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
1808 if (!f)
1809 return NULL;
1810 tmp = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, c->c_arena);
1811 if (!tmp)
1812 return NULL;
1813 e = tmp;
1814 }
1815 return e;
1816 }
1817
1818 /* Do not name a variable 'expr'! Will cause a compile error.
1819 */
1820
1821 static expr_ty
ast_for_expr(struct compiling * c,const node * n)1822 ast_for_expr(struct compiling *c, const node *n)
1823 {
1824 /* handle the full range of simple expressions
1825 test: or_test ['if' or_test 'else' test] | lambdef
1826 or_test: and_test ('or' and_test)*
1827 and_test: not_test ('and' not_test)*
1828 not_test: 'not' not_test | comparison
1829 comparison: expr (comp_op expr)*
1830 expr: xor_expr ('|' xor_expr)*
1831 xor_expr: and_expr ('^' and_expr)*
1832 and_expr: shift_expr ('&' shift_expr)*
1833 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
1834 arith_expr: term (('+'|'-') term)*
1835 term: factor (('*'|'/'|'%'|'//') factor)*
1836 factor: ('+'|'-'|'~') factor | power
1837 power: atom trailer* ('**' factor)*
1838
1839 As well as modified versions that exist for backward compatibility,
1840 to explicitly allow:
1841 [ x for x in lambda: 0, lambda: 1 ]
1842 (which would be ambiguous without these extra rules)
1843
1844 old_test: or_test | old_lambdef
1845 old_lambdef: 'lambda' [vararglist] ':' old_test
1846
1847 */
1848
1849 asdl_seq *seq;
1850 int i;
1851
1852 loop:
1853 switch (TYPE(n)) {
1854 case test:
1855 case old_test:
1856 if (TYPE(CHILD(n, 0)) == lambdef ||
1857 TYPE(CHILD(n, 0)) == old_lambdef)
1858 return ast_for_lambdef(c, CHILD(n, 0));
1859 else if (NCH(n) > 1)
1860 return ast_for_ifexpr(c, n);
1861 /* Fallthrough */
1862 case or_test:
1863 case and_test:
1864 if (NCH(n) == 1) {
1865 n = CHILD(n, 0);
1866 goto loop;
1867 }
1868 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1869 if (!seq)
1870 return NULL;
1871 for (i = 0; i < NCH(n); i += 2) {
1872 expr_ty e = ast_for_expr(c, CHILD(n, i));
1873 if (!e)
1874 return NULL;
1875 asdl_seq_SET(seq, i / 2, e);
1876 }
1877 if (!strcmp(STR(CHILD(n, 1)), "and"))
1878 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
1879 c->c_arena);
1880 assert(!strcmp(STR(CHILD(n, 1)), "or"));
1881 return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena);
1882 case not_test:
1883 if (NCH(n) == 1) {
1884 n = CHILD(n, 0);
1885 goto loop;
1886 }
1887 else {
1888 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
1889 if (!expression)
1890 return NULL;
1891
1892 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
1893 c->c_arena);
1894 }
1895 case comparison:
1896 if (NCH(n) == 1) {
1897 n = CHILD(n, 0);
1898 goto loop;
1899 }
1900 else {
1901 expr_ty expression;
1902 asdl_int_seq *ops;
1903 asdl_seq *cmps;
1904 ops = asdl_int_seq_new(NCH(n) / 2, c->c_arena);
1905 if (!ops)
1906 return NULL;
1907 cmps = asdl_seq_new(NCH(n) / 2, c->c_arena);
1908 if (!cmps) {
1909 return NULL;
1910 }
1911 for (i = 1; i < NCH(n); i += 2) {
1912 cmpop_ty newoperator;
1913
1914 newoperator = ast_for_comp_op(c, CHILD(n, i));
1915 if (!newoperator) {
1916 return NULL;
1917 }
1918
1919 expression = ast_for_expr(c, CHILD(n, i + 1));
1920 if (!expression) {
1921 return NULL;
1922 }
1923
1924 asdl_seq_SET(ops, i / 2, newoperator);
1925 asdl_seq_SET(cmps, i / 2, expression);
1926 }
1927 expression = ast_for_expr(c, CHILD(n, 0));
1928 if (!expression) {
1929 return NULL;
1930 }
1931
1932 return Compare(expression, ops, cmps, LINENO(n),
1933 n->n_col_offset, c->c_arena);
1934 }
1935 break;
1936
1937 /* The next five cases all handle BinOps. The main body of code
1938 is the same in each case, but the switch turned inside out to
1939 reuse the code for each type of operator.
1940 */
1941 case expr:
1942 case xor_expr:
1943 case and_expr:
1944 case shift_expr:
1945 case arith_expr:
1946 case term:
1947 if (NCH(n) == 1) {
1948 n = CHILD(n, 0);
1949 goto loop;
1950 }
1951 return ast_for_binop(c, n);
1952 case yield_expr: {
1953 expr_ty exp = NULL;
1954 if (NCH(n) == 2) {
1955 exp = ast_for_testlist(c, CHILD(n, 1));
1956 if (!exp)
1957 return NULL;
1958 }
1959 return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena);
1960 }
1961 case factor:
1962 if (NCH(n) == 1) {
1963 n = CHILD(n, 0);
1964 goto loop;
1965 }
1966 return ast_for_factor(c, n);
1967 case power:
1968 return ast_for_power(c, n);
1969 default:
1970 PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
1971 return NULL;
1972 }
1973 /* should never get here unless if error is set */
1974 return NULL;
1975 }
1976
1977 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func)1978 ast_for_call(struct compiling *c, const node *n, expr_ty func)
1979 {
1980 /*
1981 arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
1982 | '**' test)
1983 argument: [test '='] test [comp_for] # Really [keyword '='] test
1984 */
1985
1986 int i, nargs, nkeywords, ngens;
1987 asdl_seq *args;
1988 asdl_seq *keywords;
1989 expr_ty vararg = NULL, kwarg = NULL;
1990
1991 REQ(n, arglist);
1992
1993 nargs = 0;
1994 nkeywords = 0;
1995 ngens = 0;
1996 for (i = 0; i < NCH(n); i++) {
1997 node *ch = CHILD(n, i);
1998 if (TYPE(ch) == argument) {
1999 if (NCH(ch) == 1)
2000 nargs++;
2001 else if (TYPE(CHILD(ch, 1)) == comp_for)
2002 ngens++;
2003 else
2004 nkeywords++;
2005 }
2006 }
2007 if (ngens > 1 || (ngens && (nargs || nkeywords))) {
2008 ast_error(n, "Generator expression must be parenthesized "
2009 "if not sole argument");
2010 return NULL;
2011 }
2012
2013 if (nargs + nkeywords + ngens > 255) {
2014 ast_error(n, "more than 255 arguments");
2015 return NULL;
2016 }
2017
2018 args = asdl_seq_new(nargs + ngens, c->c_arena);
2019 if (!args)
2020 return NULL;
2021 keywords = asdl_seq_new(nkeywords, c->c_arena);
2022 if (!keywords)
2023 return NULL;
2024 nargs = 0;
2025 nkeywords = 0;
2026 for (i = 0; i < NCH(n); i++) {
2027 node *ch = CHILD(n, i);
2028 if (TYPE(ch) == argument) {
2029 expr_ty e;
2030 if (NCH(ch) == 1) {
2031 if (nkeywords) {
2032 ast_error(CHILD(ch, 0),
2033 "non-keyword arg after keyword arg");
2034 return NULL;
2035 }
2036 if (vararg) {
2037 ast_error(CHILD(ch, 0),
2038 "only named arguments may follow *expression");
2039 return NULL;
2040 }
2041 e = ast_for_expr(c, CHILD(ch, 0));
2042 if (!e)
2043 return NULL;
2044 asdl_seq_SET(args, nargs++, e);
2045 }
2046 else if (TYPE(CHILD(ch, 1)) == comp_for) {
2047 e = ast_for_genexp(c, ch);
2048 if (!e)
2049 return NULL;
2050 asdl_seq_SET(args, nargs++, e);
2051 }
2052 else {
2053 keyword_ty kw;
2054 identifier key;
2055 int k;
2056 char *tmp;
2057
2058 /* CHILD(ch, 0) is test, but must be an identifier? */
2059 e = ast_for_expr(c, CHILD(ch, 0));
2060 if (!e)
2061 return NULL;
2062 /* f(lambda x: x[0] = 3) ends up getting parsed with
2063 * LHS test = lambda x: x[0], and RHS test = 3.
2064 * SF bug 132313 points out that complaining about a keyword
2065 * then is very confusing.
2066 */
2067 if (e->kind == Lambda_kind) {
2068 ast_error(CHILD(ch, 0),
2069 "lambda cannot contain assignment");
2070 return NULL;
2071 } else if (e->kind != Name_kind) {
2072 ast_error(CHILD(ch, 0), "keyword can't be an expression");
2073 return NULL;
2074 }
2075 key = e->v.Name.id;
2076 if (!forbidden_check(c, CHILD(ch, 0), PyBytes_AS_STRING(key)))
2077 return NULL;
2078 for (k = 0; k < nkeywords; k++) {
2079 tmp = PyString_AS_STRING(
2080 ((keyword_ty)asdl_seq_GET(keywords, k))->arg);
2081 if (!strcmp(tmp, PyString_AS_STRING(key))) {
2082 ast_error(CHILD(ch, 0), "keyword argument repeated");
2083 return NULL;
2084 }
2085 }
2086 e = ast_for_expr(c, CHILD(ch, 2));
2087 if (!e)
2088 return NULL;
2089 kw = keyword(key, e, c->c_arena);
2090 if (!kw)
2091 return NULL;
2092 asdl_seq_SET(keywords, nkeywords++, kw);
2093 }
2094 }
2095 else if (TYPE(ch) == STAR) {
2096 vararg = ast_for_expr(c, CHILD(n, i+1));
2097 if (!vararg)
2098 return NULL;
2099 i++;
2100 }
2101 else if (TYPE(ch) == DOUBLESTAR) {
2102 kwarg = ast_for_expr(c, CHILD(n, i+1));
2103 if (!kwarg)
2104 return NULL;
2105 i++;
2106 }
2107 }
2108
2109 return Call(func, args, keywords, vararg, kwarg, func->lineno,
2110 func->col_offset, c->c_arena);
2111 }
2112
2113 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)2114 ast_for_testlist(struct compiling *c, const node* n)
2115 {
2116 /* testlist_comp: test (',' test)* [','] */
2117 /* testlist: test (',' test)* [','] */
2118 /* testlist_safe: test (',' test)+ [','] */
2119 /* testlist1: test (',' test)* */
2120 assert(NCH(n) > 0);
2121 if (TYPE(n) == testlist_comp) {
2122 if (NCH(n) > 1)
2123 assert(TYPE(CHILD(n, 1)) != comp_for);
2124 }
2125 else {
2126 assert(TYPE(n) == testlist ||
2127 TYPE(n) == testlist_safe ||
2128 TYPE(n) == testlist1);
2129 }
2130 if (NCH(n) == 1)
2131 return ast_for_expr(c, CHILD(n, 0));
2132 else {
2133 asdl_seq *tmp = seq_for_testlist(c, n);
2134 if (!tmp)
2135 return NULL;
2136 return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
2137 }
2138 }
2139
2140 static expr_ty
ast_for_testlist_comp(struct compiling * c,const node * n)2141 ast_for_testlist_comp(struct compiling *c, const node* n)
2142 {
2143 /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2144 /* argument: test [ comp_for ] */
2145 assert(TYPE(n) == testlist_comp || TYPE(n) == argument);
2146 if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == comp_for)
2147 return ast_for_genexp(c, n);
2148 return ast_for_testlist(c, n);
2149 }
2150
2151 /* like ast_for_testlist() but returns a sequence */
2152 static asdl_seq*
ast_for_class_bases(struct compiling * c,const node * n)2153 ast_for_class_bases(struct compiling *c, const node* n)
2154 {
2155 /* testlist: test (',' test)* [','] */
2156 assert(NCH(n) > 0);
2157 REQ(n, testlist);
2158 if (NCH(n) == 1) {
2159 expr_ty base;
2160 asdl_seq *bases = asdl_seq_new(1, c->c_arena);
2161 if (!bases)
2162 return NULL;
2163 base = ast_for_expr(c, CHILD(n, 0));
2164 if (!base)
2165 return NULL;
2166 asdl_seq_SET(bases, 0, base);
2167 return bases;
2168 }
2169
2170 return seq_for_testlist(c, n);
2171 }
2172
2173 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)2174 ast_for_expr_stmt(struct compiling *c, const node *n)
2175 {
2176 REQ(n, expr_stmt);
2177 /* expr_stmt: testlist (augassign (yield_expr|testlist)
2178 | ('=' (yield_expr|testlist))*)
2179 testlist: test (',' test)* [',']
2180 augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^='
2181 | '<<=' | '>>=' | '**=' | '//='
2182 test: ... here starts the operator precedence dance
2183 */
2184
2185 if (NCH(n) == 1) {
2186 expr_ty e = ast_for_testlist(c, CHILD(n, 0));
2187 if (!e)
2188 return NULL;
2189
2190 return Expr(e, LINENO(n), n->n_col_offset, c->c_arena);
2191 }
2192 else if (TYPE(CHILD(n, 1)) == augassign) {
2193 expr_ty expr1, expr2;
2194 operator_ty newoperator;
2195 node *ch = CHILD(n, 0);
2196
2197 expr1 = ast_for_testlist(c, ch);
2198 if (!expr1)
2199 return NULL;
2200 if(!set_context(c, expr1, Store, ch))
2201 return NULL;
2202 /* set_context checks that most expressions are not the left side.
2203 Augmented assignments can only have a name, a subscript, or an
2204 attribute on the left, though, so we have to explicitly check for
2205 those. */
2206 switch (expr1->kind) {
2207 case Name_kind:
2208 case Attribute_kind:
2209 case Subscript_kind:
2210 break;
2211 default:
2212 ast_error(ch, "illegal expression for augmented assignment");
2213 return NULL;
2214 }
2215
2216 ch = CHILD(n, 2);
2217 if (TYPE(ch) == testlist)
2218 expr2 = ast_for_testlist(c, ch);
2219 else
2220 expr2 = ast_for_expr(c, ch);
2221 if (!expr2)
2222 return NULL;
2223
2224 newoperator = ast_for_augassign(c, CHILD(n, 1));
2225 if (!newoperator)
2226 return NULL;
2227
2228 return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2229 c->c_arena);
2230 }
2231 else {
2232 int i;
2233 asdl_seq *targets;
2234 node *value;
2235 expr_ty expression;
2236
2237 /* a normal assignment */
2238 REQ(CHILD(n, 1), EQUAL);
2239 targets = asdl_seq_new(NCH(n) / 2, c->c_arena);
2240 if (!targets)
2241 return NULL;
2242 for (i = 0; i < NCH(n) - 2; i += 2) {
2243 expr_ty e;
2244 node *ch = CHILD(n, i);
2245 if (TYPE(ch) == yield_expr) {
2246 ast_error(ch, "assignment to yield expression not possible");
2247 return NULL;
2248 }
2249 e = ast_for_testlist(c, ch);
2250 if (!e)
2251 return NULL;
2252
2253 /* set context to assign */
2254 if (!set_context(c, e, Store, CHILD(n, i)))
2255 return NULL;
2256
2257 asdl_seq_SET(targets, i / 2, e);
2258 }
2259 value = CHILD(n, NCH(n) - 1);
2260 if (TYPE(value) == testlist)
2261 expression = ast_for_testlist(c, value);
2262 else
2263 expression = ast_for_expr(c, value);
2264 if (!expression)
2265 return NULL;
2266 return Assign(targets, expression, LINENO(n), n->n_col_offset,
2267 c->c_arena);
2268 }
2269 }
2270
2271 static stmt_ty
ast_for_print_stmt(struct compiling * c,const node * n)2272 ast_for_print_stmt(struct compiling *c, const node *n)
2273 {
2274 /* print_stmt: 'print' ( [ test (',' test)* [','] ]
2275 | '>>' test [ (',' test)+ [','] ] )
2276 */
2277 expr_ty dest = NULL, expression;
2278 asdl_seq *seq = NULL;
2279 bool nl;
2280 int i, j, values_count, start = 1;
2281
2282 REQ(n, print_stmt);
2283 if (NCH(n) >= 2 && TYPE(CHILD(n, 1)) == RIGHTSHIFT) {
2284 dest = ast_for_expr(c, CHILD(n, 2));
2285 if (!dest)
2286 return NULL;
2287 start = 4;
2288 }
2289 values_count = (NCH(n) + 1 - start) / 2;
2290 if (values_count) {
2291 seq = asdl_seq_new(values_count, c->c_arena);
2292 if (!seq)
2293 return NULL;
2294 for (i = start, j = 0; i < NCH(n); i += 2, ++j) {
2295 expression = ast_for_expr(c, CHILD(n, i));
2296 if (!expression)
2297 return NULL;
2298 asdl_seq_SET(seq, j, expression);
2299 }
2300 }
2301 nl = (TYPE(CHILD(n, NCH(n) - 1)) == COMMA) ? false : true;
2302 return Print(dest, seq, nl, LINENO(n), n->n_col_offset, c->c_arena);
2303 }
2304
2305 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)2306 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
2307 {
2308 asdl_seq *seq;
2309 int i;
2310 expr_ty e;
2311
2312 REQ(n, exprlist);
2313
2314 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2315 if (!seq)
2316 return NULL;
2317 for (i = 0; i < NCH(n); i += 2) {
2318 e = ast_for_expr(c, CHILD(n, i));
2319 if (!e)
2320 return NULL;
2321 asdl_seq_SET(seq, i / 2, e);
2322 if (context && !set_context(c, e, context, CHILD(n, i)))
2323 return NULL;
2324 }
2325 return seq;
2326 }
2327
2328 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)2329 ast_for_del_stmt(struct compiling *c, const node *n)
2330 {
2331 asdl_seq *expr_list;
2332
2333 /* del_stmt: 'del' exprlist */
2334 REQ(n, del_stmt);
2335
2336 expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
2337 if (!expr_list)
2338 return NULL;
2339 return Delete(expr_list, LINENO(n), n->n_col_offset, c->c_arena);
2340 }
2341
2342 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)2343 ast_for_flow_stmt(struct compiling *c, const node *n)
2344 {
2345 /*
2346 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
2347 | yield_stmt
2348 break_stmt: 'break'
2349 continue_stmt: 'continue'
2350 return_stmt: 'return' [testlist]
2351 yield_stmt: yield_expr
2352 yield_expr: 'yield' testlist
2353 raise_stmt: 'raise' [test [',' test [',' test]]]
2354 */
2355 node *ch;
2356
2357 REQ(n, flow_stmt);
2358 ch = CHILD(n, 0);
2359 switch (TYPE(ch)) {
2360 case break_stmt:
2361 return Break(LINENO(n), n->n_col_offset, c->c_arena);
2362 case continue_stmt:
2363 return Continue(LINENO(n), n->n_col_offset, c->c_arena);
2364 case yield_stmt: { /* will reduce to yield_expr */
2365 expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
2366 if (!exp)
2367 return NULL;
2368 return Expr(exp, LINENO(n), n->n_col_offset, c->c_arena);
2369 }
2370 case return_stmt:
2371 if (NCH(ch) == 1)
2372 return Return(NULL, LINENO(n), n->n_col_offset, c->c_arena);
2373 else {
2374 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
2375 if (!expression)
2376 return NULL;
2377 return Return(expression, LINENO(n), n->n_col_offset,
2378 c->c_arena);
2379 }
2380 case raise_stmt:
2381 if (NCH(ch) == 1)
2382 return Raise(NULL, NULL, NULL, LINENO(n), n->n_col_offset,
2383 c->c_arena);
2384 else if (NCH(ch) == 2) {
2385 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
2386 if (!expression)
2387 return NULL;
2388 return Raise(expression, NULL, NULL, LINENO(n),
2389 n->n_col_offset, c->c_arena);
2390 }
2391 else if (NCH(ch) == 4) {
2392 expr_ty expr1, expr2;
2393
2394 expr1 = ast_for_expr(c, CHILD(ch, 1));
2395 if (!expr1)
2396 return NULL;
2397 expr2 = ast_for_expr(c, CHILD(ch, 3));
2398 if (!expr2)
2399 return NULL;
2400
2401 return Raise(expr1, expr2, NULL, LINENO(n), n->n_col_offset,
2402 c->c_arena);
2403 }
2404 else if (NCH(ch) == 6) {
2405 expr_ty expr1, expr2, expr3;
2406
2407 expr1 = ast_for_expr(c, CHILD(ch, 1));
2408 if (!expr1)
2409 return NULL;
2410 expr2 = ast_for_expr(c, CHILD(ch, 3));
2411 if (!expr2)
2412 return NULL;
2413 expr3 = ast_for_expr(c, CHILD(ch, 5));
2414 if (!expr3)
2415 return NULL;
2416
2417 return Raise(expr1, expr2, expr3, LINENO(n), n->n_col_offset,
2418 c->c_arena);
2419 }
2420 default:
2421 PyErr_Format(PyExc_SystemError,
2422 "unexpected flow_stmt: %d", TYPE(ch));
2423 return NULL;
2424 }
2425
2426 PyErr_SetString(PyExc_SystemError, "unhandled flow statement");
2427 return NULL;
2428 }
2429
2430 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)2431 alias_for_import_name(struct compiling *c, const node *n, int store)
2432 {
2433 /*
2434 import_as_name: NAME ['as' NAME]
2435 dotted_as_name: dotted_name ['as' NAME]
2436 dotted_name: NAME ('.' NAME)*
2437 */
2438 PyObject *str, *name;
2439
2440 loop:
2441 switch (TYPE(n)) {
2442 case import_as_name: {
2443 node *name_node = CHILD(n, 0);
2444 str = NULL;
2445 if (NCH(n) == 3) {
2446 node *str_node = CHILD(n, 2);
2447 if (store && !forbidden_check(c, str_node, STR(str_node)))
2448 return NULL;
2449 str = NEW_IDENTIFIER(str_node);
2450 if (!str)
2451 return NULL;
2452 }
2453 else {
2454 if (!forbidden_check(c, name_node, STR(name_node)))
2455 return NULL;
2456 }
2457 name = NEW_IDENTIFIER(name_node);
2458 if (!name)
2459 return NULL;
2460 return alias(name, str, c->c_arena);
2461 }
2462 case dotted_as_name:
2463 if (NCH(n) == 1) {
2464 n = CHILD(n, 0);
2465 goto loop;
2466 }
2467 else {
2468 node *asname_node = CHILD(n, 2);
2469 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
2470 if (!a)
2471 return NULL;
2472 assert(!a->asname);
2473 if (!forbidden_check(c, asname_node, STR(asname_node)))
2474 return NULL;
2475 a->asname = NEW_IDENTIFIER(asname_node);
2476 if (!a->asname)
2477 return NULL;
2478 return a;
2479 }
2480 break;
2481 case dotted_name:
2482 if (NCH(n) == 1) {
2483 node *name_node = CHILD(n, 0);
2484 if (store && !forbidden_check(c, name_node, STR(name_node)))
2485 return NULL;
2486 name = NEW_IDENTIFIER(name_node);
2487 if (!name)
2488 return NULL;
2489 return alias(name, NULL, c->c_arena);
2490 }
2491 else {
2492 /* Create a string of the form "a.b.c" */
2493 int i;
2494 size_t len;
2495 char *s;
2496
2497 len = 0;
2498 for (i = 0; i < NCH(n); i += 2)
2499 /* length of string plus one for the dot */
2500 len += strlen(STR(CHILD(n, i))) + 1;
2501 len--; /* the last name doesn't have a dot */
2502 str = PyString_FromStringAndSize(NULL, len);
2503 if (!str)
2504 return NULL;
2505 s = PyString_AS_STRING(str);
2506 if (!s)
2507 return NULL;
2508 for (i = 0; i < NCH(n); i += 2) {
2509 char *sch = STR(CHILD(n, i));
2510 strcpy(s, STR(CHILD(n, i)));
2511 s += strlen(sch);
2512 *s++ = '.';
2513 }
2514 --s;
2515 *s = '\0';
2516 PyString_InternInPlace(&str);
2517 PyArena_AddPyObject(c->c_arena, str);
2518 return alias(str, NULL, c->c_arena);
2519 }
2520 break;
2521 case STAR:
2522 str = PyString_InternFromString("*");
2523 PyArena_AddPyObject(c->c_arena, str);
2524 return alias(str, NULL, c->c_arena);
2525 default:
2526 PyErr_Format(PyExc_SystemError,
2527 "unexpected import name: %d", TYPE(n));
2528 return NULL;
2529 }
2530
2531 PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
2532 return NULL;
2533 }
2534
2535 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)2536 ast_for_import_stmt(struct compiling *c, const node *n)
2537 {
2538 /*
2539 import_stmt: import_name | import_from
2540 import_name: 'import' dotted_as_names
2541 import_from: 'from' ('.'* dotted_name | '.') 'import'
2542 ('*' | '(' import_as_names ')' | import_as_names)
2543 */
2544 int lineno;
2545 int col_offset;
2546 int i;
2547 asdl_seq *aliases;
2548
2549 REQ(n, import_stmt);
2550 lineno = LINENO(n);
2551 col_offset = n->n_col_offset;
2552 n = CHILD(n, 0);
2553 if (TYPE(n) == import_name) {
2554 n = CHILD(n, 1);
2555 REQ(n, dotted_as_names);
2556 aliases = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2557 if (!aliases)
2558 return NULL;
2559 for (i = 0; i < NCH(n); i += 2) {
2560 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
2561 if (!import_alias)
2562 return NULL;
2563 asdl_seq_SET(aliases, i / 2, import_alias);
2564 }
2565 return Import(aliases, lineno, col_offset, c->c_arena);
2566 }
2567 else if (TYPE(n) == import_from) {
2568 int n_children;
2569 int idx, ndots = 0;
2570 alias_ty mod = NULL;
2571 identifier modname = NULL;
2572
2573 /* Count the number of dots (for relative imports) and check for the
2574 optional module name */
2575 for (idx = 1; idx < NCH(n); idx++) {
2576 if (TYPE(CHILD(n, idx)) == dotted_name) {
2577 mod = alias_for_import_name(c, CHILD(n, idx), 0);
2578 if (!mod)
2579 return NULL;
2580 idx++;
2581 break;
2582 } else if (TYPE(CHILD(n, idx)) != DOT) {
2583 break;
2584 }
2585 ndots++;
2586 }
2587 idx++; /* skip over the 'import' keyword */
2588 switch (TYPE(CHILD(n, idx))) {
2589 case STAR:
2590 /* from ... import * */
2591 n = CHILD(n, idx);
2592 n_children = 1;
2593 break;
2594 case LPAR:
2595 /* from ... import (x, y, z) */
2596 n = CHILD(n, idx + 1);
2597 n_children = NCH(n);
2598 break;
2599 case import_as_names:
2600 /* from ... import x, y, z */
2601 n = CHILD(n, idx);
2602 n_children = NCH(n);
2603 if (n_children % 2 == 0) {
2604 ast_error(n, "trailing comma not allowed without"
2605 " surrounding parentheses");
2606 return NULL;
2607 }
2608 break;
2609 default:
2610 ast_error(n, "Unexpected node-type in from-import");
2611 return NULL;
2612 }
2613
2614 aliases = asdl_seq_new((n_children + 1) / 2, c->c_arena);
2615 if (!aliases)
2616 return NULL;
2617
2618 /* handle "from ... import *" special b/c there's no children */
2619 if (TYPE(n) == STAR) {
2620 alias_ty import_alias = alias_for_import_name(c, n, 1);
2621 if (!import_alias)
2622 return NULL;
2623 asdl_seq_SET(aliases, 0, import_alias);
2624 }
2625 else {
2626 for (i = 0; i < NCH(n); i += 2) {
2627 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
2628 if (!import_alias)
2629 return NULL;
2630 asdl_seq_SET(aliases, i / 2, import_alias);
2631 }
2632 }
2633 if (mod != NULL)
2634 modname = mod->name;
2635 return ImportFrom(modname, aliases, ndots, lineno, col_offset,
2636 c->c_arena);
2637 }
2638 PyErr_Format(PyExc_SystemError,
2639 "unknown import statement: starts with command '%s'",
2640 STR(CHILD(n, 0)));
2641 return NULL;
2642 }
2643
2644 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)2645 ast_for_global_stmt(struct compiling *c, const node *n)
2646 {
2647 /* global_stmt: 'global' NAME (',' NAME)* */
2648 identifier name;
2649 asdl_seq *s;
2650 int i;
2651
2652 REQ(n, global_stmt);
2653 s = asdl_seq_new(NCH(n) / 2, c->c_arena);
2654 if (!s)
2655 return NULL;
2656 for (i = 1; i < NCH(n); i += 2) {
2657 name = NEW_IDENTIFIER(CHILD(n, i));
2658 if (!name)
2659 return NULL;
2660 asdl_seq_SET(s, i / 2, name);
2661 }
2662 return Global(s, LINENO(n), n->n_col_offset, c->c_arena);
2663 }
2664
2665 static stmt_ty
ast_for_exec_stmt(struct compiling * c,const node * n)2666 ast_for_exec_stmt(struct compiling *c, const node *n)
2667 {
2668 expr_ty expr1, globals = NULL, locals = NULL;
2669 int n_children = NCH(n);
2670 if (n_children != 2 && n_children != 4 && n_children != 6) {
2671 PyErr_Format(PyExc_SystemError,
2672 "poorly formed 'exec' statement: %d parts to statement",
2673 n_children);
2674 return NULL;
2675 }
2676
2677 /* exec_stmt: 'exec' expr ['in' test [',' test]] */
2678 REQ(n, exec_stmt);
2679 expr1 = ast_for_expr(c, CHILD(n, 1));
2680 if (!expr1)
2681 return NULL;
2682
2683 if (expr1->kind == Tuple_kind && n_children < 4 &&
2684 (asdl_seq_LEN(expr1->v.Tuple.elts) == 2 ||
2685 asdl_seq_LEN(expr1->v.Tuple.elts) == 3)) {
2686 /* Backwards compatibility: passing exec args as a tuple */
2687 globals = asdl_seq_GET(expr1->v.Tuple.elts, 1);
2688 if (asdl_seq_LEN(expr1->v.Tuple.elts) == 3) {
2689 locals = asdl_seq_GET(expr1->v.Tuple.elts, 2);
2690 }
2691 expr1 = asdl_seq_GET(expr1->v.Tuple.elts, 0);
2692 }
2693
2694 if (n_children >= 4) {
2695 globals = ast_for_expr(c, CHILD(n, 3));
2696 if (!globals)
2697 return NULL;
2698 }
2699 if (n_children == 6) {
2700 locals = ast_for_expr(c, CHILD(n, 5));
2701 if (!locals)
2702 return NULL;
2703 }
2704
2705 return Exec(expr1, globals, locals, LINENO(n), n->n_col_offset,
2706 c->c_arena);
2707 }
2708
2709 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)2710 ast_for_assert_stmt(struct compiling *c, const node *n)
2711 {
2712 /* assert_stmt: 'assert' test [',' test] */
2713 REQ(n, assert_stmt);
2714 if (NCH(n) == 2) {
2715 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2716 if (!expression)
2717 return NULL;
2718 return Assert(expression, NULL, LINENO(n), n->n_col_offset,
2719 c->c_arena);
2720 }
2721 else if (NCH(n) == 4) {
2722 expr_ty expr1, expr2;
2723
2724 expr1 = ast_for_expr(c, CHILD(n, 1));
2725 if (!expr1)
2726 return NULL;
2727 expr2 = ast_for_expr(c, CHILD(n, 3));
2728 if (!expr2)
2729 return NULL;
2730
2731 return Assert(expr1, expr2, LINENO(n), n->n_col_offset, c->c_arena);
2732 }
2733 PyErr_Format(PyExc_SystemError,
2734 "improper number of parts to 'assert' statement: %d",
2735 NCH(n));
2736 return NULL;
2737 }
2738
2739 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)2740 ast_for_suite(struct compiling *c, const node *n)
2741 {
2742 /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
2743 asdl_seq *seq;
2744 stmt_ty s;
2745 int i, total, num, end, pos = 0;
2746 node *ch;
2747
2748 REQ(n, suite);
2749
2750 total = num_stmts(n);
2751 seq = asdl_seq_new(total, c->c_arena);
2752 if (!seq)
2753 return NULL;
2754 if (TYPE(CHILD(n, 0)) == simple_stmt) {
2755 n = CHILD(n, 0);
2756 /* simple_stmt always ends with a NEWLINE,
2757 and may have a trailing SEMI
2758 */
2759 end = NCH(n) - 1;
2760 if (TYPE(CHILD(n, end - 1)) == SEMI)
2761 end--;
2762 /* loop by 2 to skip semi-colons */
2763 for (i = 0; i < end; i += 2) {
2764 ch = CHILD(n, i);
2765 s = ast_for_stmt(c, ch);
2766 if (!s)
2767 return NULL;
2768 asdl_seq_SET(seq, pos++, s);
2769 }
2770 }
2771 else {
2772 for (i = 2; i < (NCH(n) - 1); i++) {
2773 ch = CHILD(n, i);
2774 REQ(ch, stmt);
2775 num = num_stmts(ch);
2776 if (num == 1) {
2777 /* small_stmt or compound_stmt with only one child */
2778 s = ast_for_stmt(c, ch);
2779 if (!s)
2780 return NULL;
2781 asdl_seq_SET(seq, pos++, s);
2782 }
2783 else {
2784 int j;
2785 ch = CHILD(ch, 0);
2786 REQ(ch, simple_stmt);
2787 for (j = 0; j < NCH(ch); j += 2) {
2788 /* statement terminates with a semi-colon ';' */
2789 if (NCH(CHILD(ch, j)) == 0) {
2790 assert((j + 1) == NCH(ch));
2791 break;
2792 }
2793 s = ast_for_stmt(c, CHILD(ch, j));
2794 if (!s)
2795 return NULL;
2796 asdl_seq_SET(seq, pos++, s);
2797 }
2798 }
2799 }
2800 }
2801 assert(pos == seq->size);
2802 return seq;
2803 }
2804
2805 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)2806 ast_for_if_stmt(struct compiling *c, const node *n)
2807 {
2808 /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
2809 ['else' ':' suite]
2810 */
2811 char *s;
2812
2813 REQ(n, if_stmt);
2814
2815 if (NCH(n) == 4) {
2816 expr_ty expression;
2817 asdl_seq *suite_seq;
2818
2819 expression = ast_for_expr(c, CHILD(n, 1));
2820 if (!expression)
2821 return NULL;
2822 suite_seq = ast_for_suite(c, CHILD(n, 3));
2823 if (!suite_seq)
2824 return NULL;
2825
2826 return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
2827 c->c_arena);
2828 }
2829
2830 s = STR(CHILD(n, 4));
2831 /* s[2], the third character in the string, will be
2832 's' for el_s_e, or
2833 'i' for el_i_f
2834 */
2835 if (s[2] == 's') {
2836 expr_ty expression;
2837 asdl_seq *seq1, *seq2;
2838
2839 expression = ast_for_expr(c, CHILD(n, 1));
2840 if (!expression)
2841 return NULL;
2842 seq1 = ast_for_suite(c, CHILD(n, 3));
2843 if (!seq1)
2844 return NULL;
2845 seq2 = ast_for_suite(c, CHILD(n, 6));
2846 if (!seq2)
2847 return NULL;
2848
2849 return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
2850 c->c_arena);
2851 }
2852 else if (s[2] == 'i') {
2853 int i, n_elif, has_else = 0;
2854 expr_ty expression;
2855 asdl_seq *suite_seq;
2856 asdl_seq *orelse = NULL;
2857 n_elif = NCH(n) - 4;
2858 /* must reference the child n_elif+1 since 'else' token is third,
2859 not fourth, child from the end. */
2860 if (TYPE(CHILD(n, (n_elif + 1))) == NAME
2861 && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
2862 has_else = 1;
2863 n_elif -= 3;
2864 }
2865 n_elif /= 4;
2866
2867 if (has_else) {
2868 asdl_seq *suite_seq2;
2869
2870 orelse = asdl_seq_new(1, c->c_arena);
2871 if (!orelse)
2872 return NULL;
2873 expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
2874 if (!expression)
2875 return NULL;
2876 suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
2877 if (!suite_seq)
2878 return NULL;
2879 suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
2880 if (!suite_seq2)
2881 return NULL;
2882
2883 asdl_seq_SET(orelse, 0,
2884 If(expression, suite_seq, suite_seq2,
2885 LINENO(CHILD(n, NCH(n) - 6)),
2886 CHILD(n, NCH(n) - 6)->n_col_offset,
2887 c->c_arena));
2888 /* the just-created orelse handled the last elif */
2889 n_elif--;
2890 }
2891
2892 for (i = 0; i < n_elif; i++) {
2893 int off = 5 + (n_elif - i - 1) * 4;
2894 asdl_seq *newobj = asdl_seq_new(1, c->c_arena);
2895 if (!newobj)
2896 return NULL;
2897 expression = ast_for_expr(c, CHILD(n, off));
2898 if (!expression)
2899 return NULL;
2900 suite_seq = ast_for_suite(c, CHILD(n, off + 2));
2901 if (!suite_seq)
2902 return NULL;
2903
2904 asdl_seq_SET(newobj, 0,
2905 If(expression, suite_seq, orelse,
2906 LINENO(CHILD(n, off)),
2907 CHILD(n, off)->n_col_offset, c->c_arena));
2908 orelse = newobj;
2909 }
2910 expression = ast_for_expr(c, CHILD(n, 1));
2911 if (!expression)
2912 return NULL;
2913 suite_seq = ast_for_suite(c, CHILD(n, 3));
2914 if (!suite_seq)
2915 return NULL;
2916 return If(expression, suite_seq, orelse,
2917 LINENO(n), n->n_col_offset, c->c_arena);
2918 }
2919
2920 PyErr_Format(PyExc_SystemError,
2921 "unexpected token in 'if' statement: %s", s);
2922 return NULL;
2923 }
2924
2925 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)2926 ast_for_while_stmt(struct compiling *c, const node *n)
2927 {
2928 /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
2929 REQ(n, while_stmt);
2930
2931 if (NCH(n) == 4) {
2932 expr_ty expression;
2933 asdl_seq *suite_seq;
2934
2935 expression = ast_for_expr(c, CHILD(n, 1));
2936 if (!expression)
2937 return NULL;
2938 suite_seq = ast_for_suite(c, CHILD(n, 3));
2939 if (!suite_seq)
2940 return NULL;
2941 return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
2942 c->c_arena);
2943 }
2944 else if (NCH(n) == 7) {
2945 expr_ty expression;
2946 asdl_seq *seq1, *seq2;
2947
2948 expression = ast_for_expr(c, CHILD(n, 1));
2949 if (!expression)
2950 return NULL;
2951 seq1 = ast_for_suite(c, CHILD(n, 3));
2952 if (!seq1)
2953 return NULL;
2954 seq2 = ast_for_suite(c, CHILD(n, 6));
2955 if (!seq2)
2956 return NULL;
2957
2958 return While(expression, seq1, seq2, LINENO(n), n->n_col_offset,
2959 c->c_arena);
2960 }
2961
2962 PyErr_Format(PyExc_SystemError,
2963 "wrong number of tokens for 'while' statement: %d",
2964 NCH(n));
2965 return NULL;
2966 }
2967
2968 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n)2969 ast_for_for_stmt(struct compiling *c, const node *n)
2970 {
2971 asdl_seq *_target, *seq = NULL, *suite_seq;
2972 expr_ty expression;
2973 expr_ty target, first;
2974 const node *node_target;
2975 /* for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] */
2976 REQ(n, for_stmt);
2977
2978 if (NCH(n) == 9) {
2979 seq = ast_for_suite(c, CHILD(n, 8));
2980 if (!seq)
2981 return NULL;
2982 }
2983
2984 node_target = CHILD(n, 1);
2985 _target = ast_for_exprlist(c, node_target, Store);
2986 if (!_target)
2987 return NULL;
2988 /* Check the # of children rather than the length of _target, since
2989 for x, in ... has 1 element in _target, but still requires a Tuple. */
2990 first = (expr_ty)asdl_seq_GET(_target, 0);
2991 if (NCH(node_target) == 1)
2992 target = first;
2993 else
2994 target = Tuple(_target, Store, first->lineno, first->col_offset, c->c_arena);
2995
2996 expression = ast_for_testlist(c, CHILD(n, 3));
2997 if (!expression)
2998 return NULL;
2999 suite_seq = ast_for_suite(c, CHILD(n, 5));
3000 if (!suite_seq)
3001 return NULL;
3002
3003 return For(target, expression, suite_seq, seq, LINENO(n), n->n_col_offset,
3004 c->c_arena);
3005 }
3006
3007 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)3008 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
3009 {
3010 /* except_clause: 'except' [test [(',' | 'as') test]] */
3011 REQ(exc, except_clause);
3012 REQ(body, suite);
3013
3014 if (NCH(exc) == 1) {
3015 asdl_seq *suite_seq = ast_for_suite(c, body);
3016 if (!suite_seq)
3017 return NULL;
3018
3019 return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
3020 exc->n_col_offset, c->c_arena);
3021 }
3022 else if (NCH(exc) == 2) {
3023 expr_ty expression;
3024 asdl_seq *suite_seq;
3025
3026 expression = ast_for_expr(c, CHILD(exc, 1));
3027 if (!expression)
3028 return NULL;
3029 suite_seq = ast_for_suite(c, body);
3030 if (!suite_seq)
3031 return NULL;
3032
3033 return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
3034 exc->n_col_offset, c->c_arena);
3035 }
3036 else if (NCH(exc) == 4) {
3037 asdl_seq *suite_seq;
3038 expr_ty expression;
3039 expr_ty e = ast_for_expr(c, CHILD(exc, 3));
3040 if (!e)
3041 return NULL;
3042 if (!set_context(c, e, Store, CHILD(exc, 3)))
3043 return NULL;
3044 expression = ast_for_expr(c, CHILD(exc, 1));
3045 if (!expression)
3046 return NULL;
3047 suite_seq = ast_for_suite(c, body);
3048 if (!suite_seq)
3049 return NULL;
3050
3051 return ExceptHandler(expression, e, suite_seq, LINENO(exc),
3052 exc->n_col_offset, c->c_arena);
3053 }
3054
3055 PyErr_Format(PyExc_SystemError,
3056 "wrong number of children for 'except' clause: %d",
3057 NCH(exc));
3058 return NULL;
3059 }
3060
3061 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)3062 ast_for_try_stmt(struct compiling *c, const node *n)
3063 {
3064 const int nch = NCH(n);
3065 int n_except = (nch - 3)/3;
3066 asdl_seq *body, *orelse = NULL, *finally = NULL;
3067
3068 REQ(n, try_stmt);
3069
3070 body = ast_for_suite(c, CHILD(n, 2));
3071 if (body == NULL)
3072 return NULL;
3073
3074 if (TYPE(CHILD(n, nch - 3)) == NAME) {
3075 if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
3076 if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
3077 /* we can assume it's an "else",
3078 because nch >= 9 for try-else-finally and
3079 it would otherwise have a type of except_clause */
3080 orelse = ast_for_suite(c, CHILD(n, nch - 4));
3081 if (orelse == NULL)
3082 return NULL;
3083 n_except--;
3084 }
3085
3086 finally = ast_for_suite(c, CHILD(n, nch - 1));
3087 if (finally == NULL)
3088 return NULL;
3089 n_except--;
3090 }
3091 else {
3092 /* we can assume it's an "else",
3093 otherwise it would have a type of except_clause */
3094 orelse = ast_for_suite(c, CHILD(n, nch - 1));
3095 if (orelse == NULL)
3096 return NULL;
3097 n_except--;
3098 }
3099 }
3100 else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
3101 ast_error(n, "malformed 'try' statement");
3102 return NULL;
3103 }
3104
3105 if (n_except > 0) {
3106 int i;
3107 stmt_ty except_st;
3108 /* process except statements to create a try ... except */
3109 asdl_seq *handlers = asdl_seq_new(n_except, c->c_arena);
3110 if (handlers == NULL)
3111 return NULL;
3112
3113 for (i = 0; i < n_except; i++) {
3114 excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
3115 CHILD(n, 5 + i * 3));
3116 if (!e)
3117 return NULL;
3118 asdl_seq_SET(handlers, i, e);
3119 }
3120
3121 except_st = TryExcept(body, handlers, orelse, LINENO(n),
3122 n->n_col_offset, c->c_arena);
3123 if (!finally)
3124 return except_st;
3125
3126 /* if a 'finally' is present too, we nest the TryExcept within a
3127 TryFinally to emulate try ... except ... finally */
3128 body = asdl_seq_new(1, c->c_arena);
3129 if (body == NULL)
3130 return NULL;
3131 asdl_seq_SET(body, 0, except_st);
3132 }
3133
3134 /* must be a try ... finally (except clauses are in body, if any exist) */
3135 assert(finally != NULL);
3136 return TryFinally(body, finally, LINENO(n), n->n_col_offset, c->c_arena);
3137 }
3138
3139 /* with_item: test ['as' expr] */
3140 static stmt_ty
ast_for_with_item(struct compiling * c,const node * n,asdl_seq * content)3141 ast_for_with_item(struct compiling *c, const node *n, asdl_seq *content)
3142 {
3143 expr_ty context_expr, optional_vars = NULL;
3144
3145 REQ(n, with_item);
3146 context_expr = ast_for_expr(c, CHILD(n, 0));
3147 if (!context_expr)
3148 return NULL;
3149 if (NCH(n) == 3) {
3150 optional_vars = ast_for_expr(c, CHILD(n, 2));
3151
3152 if (!optional_vars) {
3153 return NULL;
3154 }
3155 if (!set_context(c, optional_vars, Store, n)) {
3156 return NULL;
3157 }
3158 }
3159
3160 return With(context_expr, optional_vars, content, LINENO(n),
3161 n->n_col_offset, c->c_arena);
3162 }
3163
3164 /* with_stmt: 'with' with_item (',' with_item)* ':' suite */
3165 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n)3166 ast_for_with_stmt(struct compiling *c, const node *n)
3167 {
3168 int i;
3169 stmt_ty ret;
3170 asdl_seq *inner;
3171
3172 REQ(n, with_stmt);
3173
3174 /* process the with items inside-out */
3175 i = NCH(n) - 1;
3176 /* the suite of the innermost with item is the suite of the with stmt */
3177 inner = ast_for_suite(c, CHILD(n, i));
3178 if (!inner)
3179 return NULL;
3180
3181 for (;;) {
3182 i -= 2;
3183 ret = ast_for_with_item(c, CHILD(n, i), inner);
3184 if (!ret)
3185 return NULL;
3186 /* was this the last item? */
3187 if (i == 1)
3188 break;
3189 /* if not, wrap the result so far in a new sequence */
3190 inner = asdl_seq_new(1, c->c_arena);
3191 if (!inner)
3192 return NULL;
3193 asdl_seq_SET(inner, 0, ret);
3194 }
3195
3196 return ret;
3197 }
3198
3199 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)3200 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
3201 {
3202 /* classdef: 'class' NAME ['(' testlist ')'] ':' suite */
3203 PyObject *classname;
3204 asdl_seq *bases, *s;
3205
3206 REQ(n, classdef);
3207
3208 if (!forbidden_check(c, n, STR(CHILD(n, 1))))
3209 return NULL;
3210
3211 if (NCH(n) == 4) {
3212 s = ast_for_suite(c, CHILD(n, 3));
3213 if (!s)
3214 return NULL;
3215 classname = NEW_IDENTIFIER(CHILD(n, 1));
3216 if (!classname)
3217 return NULL;
3218 return ClassDef(classname, NULL, s, decorator_seq, LINENO(n),
3219 n->n_col_offset, c->c_arena);
3220 }
3221 /* check for empty base list */
3222 if (TYPE(CHILD(n,3)) == RPAR) {
3223 s = ast_for_suite(c, CHILD(n,5));
3224 if (!s)
3225 return NULL;
3226 classname = NEW_IDENTIFIER(CHILD(n, 1));
3227 if (!classname)
3228 return NULL;
3229 return ClassDef(classname, NULL, s, decorator_seq, LINENO(n),
3230 n->n_col_offset, c->c_arena);
3231 }
3232
3233 /* else handle the base class list */
3234 bases = ast_for_class_bases(c, CHILD(n, 3));
3235 if (!bases)
3236 return NULL;
3237
3238 s = ast_for_suite(c, CHILD(n, 6));
3239 if (!s)
3240 return NULL;
3241 classname = NEW_IDENTIFIER(CHILD(n, 1));
3242 if (!classname)
3243 return NULL;
3244 return ClassDef(classname, bases, s, decorator_seq,
3245 LINENO(n), n->n_col_offset, c->c_arena);
3246 }
3247
3248 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)3249 ast_for_stmt(struct compiling *c, const node *n)
3250 {
3251 if (TYPE(n) == stmt) {
3252 assert(NCH(n) == 1);
3253 n = CHILD(n, 0);
3254 }
3255 if (TYPE(n) == simple_stmt) {
3256 assert(num_stmts(n) == 1);
3257 n = CHILD(n, 0);
3258 }
3259 if (TYPE(n) == small_stmt) {
3260 n = CHILD(n, 0);
3261 /* small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt
3262 | flow_stmt | import_stmt | global_stmt | exec_stmt
3263 | assert_stmt
3264 */
3265 switch (TYPE(n)) {
3266 case expr_stmt:
3267 return ast_for_expr_stmt(c, n);
3268 case print_stmt:
3269 return ast_for_print_stmt(c, n);
3270 case del_stmt:
3271 return ast_for_del_stmt(c, n);
3272 case pass_stmt:
3273 return Pass(LINENO(n), n->n_col_offset, c->c_arena);
3274 case flow_stmt:
3275 return ast_for_flow_stmt(c, n);
3276 case import_stmt:
3277 return ast_for_import_stmt(c, n);
3278 case global_stmt:
3279 return ast_for_global_stmt(c, n);
3280 case exec_stmt:
3281 return ast_for_exec_stmt(c, n);
3282 case assert_stmt:
3283 return ast_for_assert_stmt(c, n);
3284 default:
3285 PyErr_Format(PyExc_SystemError,
3286 "unhandled small_stmt: TYPE=%d NCH=%d\n",
3287 TYPE(n), NCH(n));
3288 return NULL;
3289 }
3290 }
3291 else {
3292 /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
3293 | funcdef | classdef | decorated
3294 */
3295 node *ch = CHILD(n, 0);
3296 REQ(n, compound_stmt);
3297 switch (TYPE(ch)) {
3298 case if_stmt:
3299 return ast_for_if_stmt(c, ch);
3300 case while_stmt:
3301 return ast_for_while_stmt(c, ch);
3302 case for_stmt:
3303 return ast_for_for_stmt(c, ch);
3304 case try_stmt:
3305 return ast_for_try_stmt(c, ch);
3306 case with_stmt:
3307 return ast_for_with_stmt(c, ch);
3308 case funcdef:
3309 return ast_for_funcdef(c, ch, NULL);
3310 case classdef:
3311 return ast_for_classdef(c, ch, NULL);
3312 case decorated:
3313 return ast_for_decorated(c, ch);
3314 default:
3315 PyErr_Format(PyExc_SystemError,
3316 "unhandled small_stmt: TYPE=%d NCH=%d\n",
3317 TYPE(n), NCH(n));
3318 return NULL;
3319 }
3320 }
3321 }
3322
3323 static PyObject *
parsenumber(struct compiling * c,const char * s)3324 parsenumber(struct compiling *c, const char *s)
3325 {
3326 const char *end;
3327 long x;
3328 double dx;
3329 #ifndef WITHOUT_COMPLEX
3330 Py_complex complex;
3331 int imflag;
3332 #endif
3333
3334 assert(s != NULL);
3335 errno = 0;
3336 end = s + strlen(s) - 1;
3337 #ifndef WITHOUT_COMPLEX
3338 imflag = *end == 'j' || *end == 'J';
3339 #endif
3340 if (*end == 'l' || *end == 'L')
3341 return PyLong_FromString((char *)s, (char **)0, 0);
3342 x = PyOS_strtol((char *)s, (char **)&end, 0);
3343 if (*end == '\0') {
3344 if (errno != 0)
3345 return PyLong_FromString((char *)s, (char **)0, 0);
3346 return PyInt_FromLong(x);
3347 }
3348 /* XXX Huge floats may silently fail */
3349 #ifndef WITHOUT_COMPLEX
3350 if (imflag) {
3351 complex.real = 0.;
3352 complex.imag = PyOS_string_to_double(s, (char **)&end, NULL);
3353 if (complex.imag == -1.0 && PyErr_Occurred())
3354 return NULL;
3355 return PyComplex_FromCComplex(complex);
3356 }
3357 else
3358 #endif
3359 {
3360 dx = PyOS_string_to_double(s, NULL, NULL);
3361 if (dx == -1.0 && PyErr_Occurred())
3362 return NULL;
3363 return PyFloat_FromDouble(dx);
3364 }
3365 }
3366
3367 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end,char * encoding)3368 decode_utf8(struct compiling *c, const char **sPtr, const char *end, char* encoding)
3369 {
3370 #ifndef Py_USING_UNICODE
3371 Py_FatalError("decode_utf8 should not be called in this build.");
3372 return NULL;
3373 #else
3374 PyObject *u, *v;
3375 char *s, *t;
3376 t = s = (char *)*sPtr;
3377 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
3378 while (s < end && (*s & 0x80)) s++;
3379 *sPtr = s;
3380 u = PyUnicode_DecodeUTF8(t, s - t, NULL);
3381 if (u == NULL)
3382 return NULL;
3383 v = PyUnicode_AsEncodedString(u, encoding, NULL);
3384 Py_DECREF(u);
3385 return v;
3386 #endif
3387 }
3388
3389 #ifdef Py_USING_UNICODE
3390 static PyObject *
decode_unicode(struct compiling * c,const char * s,size_t len,int rawmode,const char * encoding)3391 decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, const char *encoding)
3392 {
3393 PyObject *v;
3394 PyObject *u = NULL;
3395 char *buf;
3396 char *p;
3397 const char *end;
3398 if (encoding != NULL && strcmp(encoding, "iso-8859-1")) {
3399 /* check for integer overflow */
3400 if (len > PY_SIZE_MAX / 6)
3401 return NULL;
3402 /* "<C3><A4>" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
3403 "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
3404 u = PyString_FromStringAndSize((char *)NULL, len * 6);
3405 if (u == NULL)
3406 return NULL;
3407 p = buf = PyString_AsString(u);
3408 end = s + len;
3409 while (s < end) {
3410 if (*s == '\\') {
3411 *p++ = *s++;
3412 if (*s & 0x80) {
3413 strcpy(p, "u005c");
3414 p += 5;
3415 }
3416 }
3417 if (*s & 0x80) { /* XXX inefficient */
3418 PyObject *w;
3419 char *r;
3420 Py_ssize_t rn, i;
3421 w = decode_utf8(c, &s, end, "utf-32-be");
3422 if (w == NULL) {
3423 Py_DECREF(u);
3424 return NULL;
3425 }
3426 r = PyString_AsString(w);
3427 rn = PyString_Size(w);
3428 assert(rn % 4 == 0);
3429 for (i = 0; i < rn; i += 4) {
3430 sprintf(p, "\\U%02x%02x%02x%02x",
3431 r[i + 0] & 0xFF,
3432 r[i + 1] & 0xFF,
3433 r[i + 2] & 0xFF,
3434 r[i + 3] & 0xFF);
3435 p += 10;
3436 }
3437 Py_DECREF(w);
3438 } else {
3439 *p++ = *s++;
3440 }
3441 }
3442 len = p - buf;
3443 s = buf;
3444 }
3445 if (rawmode)
3446 v = PyUnicode_DecodeRawUnicodeEscape(s, len, NULL);
3447 else
3448 v = PyUnicode_DecodeUnicodeEscape(s, len, NULL);
3449 Py_XDECREF(u);
3450 return v;
3451 }
3452 #endif
3453
3454 /* s is a Python string literal, including the bracketing quote characters,
3455 * and r &/or u prefixes (if any), and embedded escape sequences (if any).
3456 * parsestr parses it, and returns the decoded Python string object.
3457 */
3458 static PyObject *
parsestr(struct compiling * c,const node * n,const char * s)3459 parsestr(struct compiling *c, const node *n, const char *s)
3460 {
3461 size_t len, i;
3462 int quote = Py_CHARMASK(*s);
3463 int rawmode = 0;
3464 int need_encoding;
3465 int unicode = c->c_future_unicode;
3466 int bytes = 0;
3467
3468 if (isalpha(quote) || quote == '_') {
3469 if (quote == 'u' || quote == 'U') {
3470 quote = *++s;
3471 unicode = 1;
3472 }
3473 if (quote == 'b' || quote == 'B') {
3474 quote = *++s;
3475 unicode = 0;
3476 bytes = 1;
3477 }
3478 if (quote == 'r' || quote == 'R') {
3479 quote = *++s;
3480 rawmode = 1;
3481 }
3482 }
3483 if (quote != '\'' && quote != '\"') {
3484 PyErr_BadInternalCall();
3485 return NULL;
3486 }
3487 s++;
3488 len = strlen(s);
3489 if (len > INT_MAX) {
3490 PyErr_SetString(PyExc_OverflowError,
3491 "string to parse is too long");
3492 return NULL;
3493 }
3494 if (s[--len] != quote) {
3495 PyErr_BadInternalCall();
3496 return NULL;
3497 }
3498 if (len >= 4 && s[0] == quote && s[1] == quote) {
3499 s += 2;
3500 len -= 2;
3501 if (s[--len] != quote || s[--len] != quote) {
3502 PyErr_BadInternalCall();
3503 return NULL;
3504 }
3505 }
3506 if (Py_Py3kWarningFlag && bytes) {
3507 for (i = 0; i < len; i++) {
3508 if ((unsigned char)s[i] > 127) {
3509 if (!ast_warn(c, n,
3510 "non-ascii bytes literals not supported in 3.x"))
3511 return NULL;
3512 break;
3513 }
3514 }
3515 }
3516 #ifdef Py_USING_UNICODE
3517 if (unicode || Py_UnicodeFlag) {
3518 return decode_unicode(c, s, len, rawmode, c->c_encoding);
3519 }
3520 #endif
3521 need_encoding = (c->c_encoding != NULL &&
3522 strcmp(c->c_encoding, "utf-8") != 0 &&
3523 strcmp(c->c_encoding, "iso-8859-1") != 0);
3524 if (rawmode || strchr(s, '\\') == NULL) {
3525 if (need_encoding) {
3526 #ifndef Py_USING_UNICODE
3527 /* This should not happen - we never see any other
3528 encoding. */
3529 Py_FatalError(
3530 "cannot deal with encodings in this build.");
3531 #else
3532 PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);
3533 if (u == NULL)
3534 return NULL;
3535 v = PyUnicode_AsEncodedString(u, c->c_encoding, NULL);
3536 Py_DECREF(u);
3537 return v;
3538 #endif
3539 } else {
3540 return PyString_FromStringAndSize(s, len);
3541 }
3542 }
3543
3544 return PyString_DecodeEscape(s, len, NULL, unicode,
3545 need_encoding ? c->c_encoding : NULL);
3546 }
3547
3548 /* Build a Python string object out of a STRING atom. This takes care of
3549 * compile-time literal catenation, calling parsestr() on each piece, and
3550 * pasting the intermediate results together.
3551 */
3552 static PyObject *
parsestrplus(struct compiling * c,const node * n)3553 parsestrplus(struct compiling *c, const node *n)
3554 {
3555 PyObject *v;
3556 int i;
3557 REQ(CHILD(n, 0), STRING);
3558 if ((v = parsestr(c, n, STR(CHILD(n, 0)))) != NULL) {
3559 /* String literal concatenation */
3560 for (i = 1; i < NCH(n); i++) {
3561 PyObject *s;
3562 s = parsestr(c, n, STR(CHILD(n, i)));
3563 if (s == NULL)
3564 goto onError;
3565 if (PyString_Check(v) && PyString_Check(s)) {
3566 PyString_ConcatAndDel(&v, s);
3567 if (v == NULL)
3568 goto onError;
3569 }
3570 #ifdef Py_USING_UNICODE
3571 else {
3572 PyObject *temp = PyUnicode_Concat(v, s);
3573 Py_DECREF(s);
3574 Py_DECREF(v);
3575 v = temp;
3576 if (v == NULL)
3577 goto onError;
3578 }
3579 #endif
3580 }
3581 }
3582 return v;
3583
3584 onError:
3585 Py_XDECREF(v);
3586 return NULL;
3587 }
3588