• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This file includes functions to transform a concrete syntax tree (CST) to
3  * an abstract syntax tree (AST).  The main function is PyAST_FromNode().
4  *
5  */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "grammar.h"
9 #include "node.h"
10 #include "pyarena.h"
11 #include "ast.h"
12 #include "token.h"
13 #include "parsetok.h"
14 #include "graminit.h"
15 
16 #include <assert.h>
17 
18 /* Data structure used internally */
19 struct compiling {
20     char *c_encoding; /* source encoding */
21     int c_future_unicode; /* __future__ unicode literals flag */
22     PyArena *c_arena; /* arena for allocating memeory */
23     const char *c_filename; /* filename */
24 };
25 
26 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
27 static expr_ty ast_for_expr(struct compiling *, const node *);
28 static stmt_ty ast_for_stmt(struct compiling *, const node *);
29 static asdl_seq *ast_for_suite(struct compiling *, const node *);
30 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
31                                   expr_context_ty);
32 static expr_ty ast_for_testlist(struct compiling *, const node *);
33 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
34 static expr_ty ast_for_testlist_comp(struct compiling *, const node *);
35 
36 /* Note different signature for ast_for_call */
37 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
38 
39 static PyObject *parsenumber(struct compiling *, const char *);
40 static PyObject *parsestr(struct compiling *, const node *n, const char *);
41 static PyObject *parsestrplus(struct compiling *, const node *n);
42 
43 #ifndef LINENO
44 #define LINENO(n)       ((n)->n_lineno)
45 #endif
46 
47 #define COMP_GENEXP 0
48 #define COMP_SETCOMP  1
49 
50 static identifier
new_identifier(const char * n,PyArena * arena)51 new_identifier(const char* n, PyArena *arena) {
52     PyObject* id = PyString_InternFromString(n);
53     if (id != NULL)
54         PyArena_AddPyObject(arena, id);
55     return id;
56 }
57 
58 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena)
59 
60 /* This routine provides an invalid object for the syntax error.
61    The outermost routine must unpack this error and create the
62    proper object.  We do this so that we don't have to pass
63    the filename to everything function.
64 
65    XXX Maybe we should just pass the filename...
66 */
67 
68 static int
ast_error(const node * n,const char * errstr)69 ast_error(const node *n, const char *errstr)
70 {
71     PyObject *u = Py_BuildValue("zi", errstr, LINENO(n));
72     if (!u)
73         return 0;
74     PyErr_SetObject(PyExc_SyntaxError, u);
75     Py_DECREF(u);
76     return 0;
77 }
78 
79 static void
ast_error_finish(const char * filename)80 ast_error_finish(const char *filename)
81 {
82     PyObject *type, *value, *tback, *errstr, *loc, *tmp;
83     long lineno;
84 
85     assert(PyErr_Occurred());
86     if (!PyErr_ExceptionMatches(PyExc_SyntaxError))
87         return;
88 
89     PyErr_Fetch(&type, &value, &tback);
90     errstr = PyTuple_GetItem(value, 0);
91     if (!errstr)
92         return;
93     Py_INCREF(errstr);
94     lineno = PyInt_AsLong(PyTuple_GetItem(value, 1));
95     if (lineno == -1) {
96         Py_DECREF(errstr);
97         return;
98     }
99     Py_DECREF(value);
100 
101     loc = PyErr_ProgramText(filename, lineno);
102     if (!loc) {
103         Py_INCREF(Py_None);
104         loc = Py_None;
105     }
106     tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc);
107     Py_DECREF(loc);
108     if (!tmp) {
109         Py_DECREF(errstr);
110         return;
111     }
112     value = PyTuple_Pack(2, errstr, tmp);
113     Py_DECREF(errstr);
114     Py_DECREF(tmp);
115     if (!value)
116         return;
117     PyErr_Restore(type, value, tback);
118 }
119 
120 static int
ast_warn(struct compiling * c,const node * n,char * msg)121 ast_warn(struct compiling *c, const node *n, char *msg)
122 {
123     if (PyErr_WarnExplicit(PyExc_SyntaxWarning, msg, c->c_filename, LINENO(n),
124                            NULL, NULL) < 0) {
125         /* if -Werr, change it to a SyntaxError */
126         if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_SyntaxWarning))
127             ast_error(n, msg);
128         return 0;
129     }
130     return 1;
131 }
132 
133 static int
forbidden_check(struct compiling * c,const node * n,const char * x)134 forbidden_check(struct compiling *c, const node *n, const char *x)
135 {
136     if (!strcmp(x, "None"))
137         return ast_error(n, "cannot assign to None");
138     if (!strcmp(x, "__debug__"))
139         return ast_error(n, "cannot assign to __debug__");
140     if (Py_Py3kWarningFlag) {
141         if (!(strcmp(x, "True") && strcmp(x, "False")) &&
142             !ast_warn(c, n, "assignment to True or False is forbidden in 3.x"))
143             return 0;
144         if (!strcmp(x, "nonlocal") &&
145             !ast_warn(c, n, "nonlocal is a keyword in 3.x"))
146             return 0;
147     }
148     return 1;
149 }
150 
151 /* num_stmts() returns number of contained statements.
152 
153    Use this routine to determine how big a sequence is needed for
154    the statements in a parse tree.  Its raison d'etre is this bit of
155    grammar:
156 
157    stmt: simple_stmt | compound_stmt
158    simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
159 
160    A simple_stmt can contain multiple small_stmt elements joined
161    by semicolons.  If the arg is a simple_stmt, the number of
162    small_stmt elements is returned.
163 */
164 
165 static int
num_stmts(const node * n)166 num_stmts(const node *n)
167 {
168     int i, l;
169     node *ch;
170 
171     switch (TYPE(n)) {
172         case single_input:
173             if (TYPE(CHILD(n, 0)) == NEWLINE)
174                 return 0;
175             else
176                 return num_stmts(CHILD(n, 0));
177         case file_input:
178             l = 0;
179             for (i = 0; i < NCH(n); i++) {
180                 ch = CHILD(n, i);
181                 if (TYPE(ch) == stmt)
182                     l += num_stmts(ch);
183             }
184             return l;
185         case stmt:
186             return num_stmts(CHILD(n, 0));
187         case compound_stmt:
188             return 1;
189         case simple_stmt:
190             return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
191         case suite:
192             if (NCH(n) == 1)
193                 return num_stmts(CHILD(n, 0));
194             else {
195                 l = 0;
196                 for (i = 2; i < (NCH(n) - 1); i++)
197                     l += num_stmts(CHILD(n, i));
198                 return l;
199             }
200         default: {
201             char buf[128];
202 
203             sprintf(buf, "Non-statement found: %d %d",
204                     TYPE(n), NCH(n));
205             Py_FatalError(buf);
206         }
207     }
208     assert(0);
209     return 0;
210 }
211 
212 /* Transform the CST rooted at node * to the appropriate AST
213 */
214 
215 mod_ty
PyAST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename,PyArena * arena)216 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
217                PyArena *arena)
218 {
219     int i, j, k, num;
220     asdl_seq *stmts = NULL;
221     stmt_ty s;
222     node *ch;
223     struct compiling c;
224 
225     if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {
226         c.c_encoding = "utf-8";
227         if (TYPE(n) == encoding_decl) {
228             ast_error(n, "encoding declaration in Unicode string");
229             goto error;
230         }
231     } else if (TYPE(n) == encoding_decl) {
232         c.c_encoding = STR(n);
233         n = CHILD(n, 0);
234     } else {
235         c.c_encoding = NULL;
236     }
237     c.c_future_unicode = flags && flags->cf_flags & CO_FUTURE_UNICODE_LITERALS;
238     c.c_arena = arena;
239     c.c_filename = filename;
240 
241     k = 0;
242     switch (TYPE(n)) {
243         case file_input:
244             stmts = asdl_seq_new(num_stmts(n), arena);
245             if (!stmts)
246                 return NULL;
247             for (i = 0; i < NCH(n) - 1; i++) {
248                 ch = CHILD(n, i);
249                 if (TYPE(ch) == NEWLINE)
250                     continue;
251                 REQ(ch, stmt);
252                 num = num_stmts(ch);
253                 if (num == 1) {
254                     s = ast_for_stmt(&c, ch);
255                     if (!s)
256                         goto error;
257                     asdl_seq_SET(stmts, k++, s);
258                 }
259                 else {
260                     ch = CHILD(ch, 0);
261                     REQ(ch, simple_stmt);
262                     for (j = 0; j < num; j++) {
263                         s = ast_for_stmt(&c, CHILD(ch, j * 2));
264                         if (!s)
265                             goto error;
266                         asdl_seq_SET(stmts, k++, s);
267                     }
268                 }
269             }
270             return Module(stmts, arena);
271         case eval_input: {
272             expr_ty testlist_ast;
273 
274             /* XXX Why not comp_for here? */
275             testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
276             if (!testlist_ast)
277                 goto error;
278             return Expression(testlist_ast, arena);
279         }
280         case single_input:
281             if (TYPE(CHILD(n, 0)) == NEWLINE) {
282                 stmts = asdl_seq_new(1, arena);
283                 if (!stmts)
284                     goto error;
285                 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
286                                             arena));
287                 if (!asdl_seq_GET(stmts, 0))
288                     goto error;
289                 return Interactive(stmts, arena);
290             }
291             else {
292                 n = CHILD(n, 0);
293                 num = num_stmts(n);
294                 stmts = asdl_seq_new(num, arena);
295                 if (!stmts)
296                     goto error;
297                 if (num == 1) {
298                     s = ast_for_stmt(&c, n);
299                     if (!s)
300                         goto error;
301                     asdl_seq_SET(stmts, 0, s);
302                 }
303                 else {
304                     /* Only a simple_stmt can contain multiple statements. */
305                     REQ(n, simple_stmt);
306                     for (i = 0; i < NCH(n); i += 2) {
307                         if (TYPE(CHILD(n, i)) == NEWLINE)
308                             break;
309                         s = ast_for_stmt(&c, CHILD(n, i));
310                         if (!s)
311                             goto error;
312                         asdl_seq_SET(stmts, i / 2, s);
313                     }
314                 }
315 
316                 return Interactive(stmts, arena);
317             }
318         default:
319             PyErr_Format(PyExc_SystemError,
320                          "invalid node %d for PyAST_FromNode", TYPE(n));
321             goto error;
322     }
323  error:
324     ast_error_finish(filename);
325     return NULL;
326 }
327 
328 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
329 */
330 
331 static operator_ty
get_operator(const node * n)332 get_operator(const node *n)
333 {
334     switch (TYPE(n)) {
335         case VBAR:
336             return BitOr;
337         case CIRCUMFLEX:
338             return BitXor;
339         case AMPER:
340             return BitAnd;
341         case LEFTSHIFT:
342             return LShift;
343         case RIGHTSHIFT:
344             return RShift;
345         case PLUS:
346             return Add;
347         case MINUS:
348             return Sub;
349         case STAR:
350             return Mult;
351         case SLASH:
352             return Div;
353         case DOUBLESLASH:
354             return FloorDiv;
355         case PERCENT:
356             return Mod;
357         default:
358             return (operator_ty)0;
359     }
360 }
361 
362 /* Set the context ctx for expr_ty e, recursively traversing e.
363 
364    Only sets context for expr kinds that "can appear in assignment context"
365    (according to ../Parser/Python.asdl).  For other expr kinds, it sets
366    an appropriate syntax error and returns false.
367 */
368 
369 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)370 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
371 {
372     asdl_seq *s = NULL;
373     /* If a particular expression type can't be used for assign / delete,
374        set expr_name to its name and an error message will be generated.
375     */
376     const char* expr_name = NULL;
377 
378     /* The ast defines augmented store and load contexts, but the
379        implementation here doesn't actually use them.  The code may be
380        a little more complex than necessary as a result.  It also means
381        that expressions in an augmented assignment have a Store context.
382        Consider restructuring so that augmented assignment uses
383        set_context(), too.
384     */
385     assert(ctx != AugStore && ctx != AugLoad);
386 
387     switch (e->kind) {
388         case Attribute_kind:
389             if (ctx == Store && !forbidden_check(c, n,
390                                 PyBytes_AS_STRING(e->v.Attribute.attr)))
391                     return 0;
392             e->v.Attribute.ctx = ctx;
393             break;
394         case Subscript_kind:
395             e->v.Subscript.ctx = ctx;
396             break;
397         case Name_kind:
398             if (ctx == Store && !forbidden_check(c, n,
399                                 PyBytes_AS_STRING(e->v.Name.id)))
400                     return 0;
401             e->v.Name.ctx = ctx;
402             break;
403         case List_kind:
404             e->v.List.ctx = ctx;
405             s = e->v.List.elts;
406             break;
407         case Tuple_kind:
408             if (asdl_seq_LEN(e->v.Tuple.elts))  {
409                 e->v.Tuple.ctx = ctx;
410                 s = e->v.Tuple.elts;
411             }
412             else {
413                 expr_name = "()";
414             }
415             break;
416         case Lambda_kind:
417             expr_name = "lambda";
418             break;
419         case Call_kind:
420             expr_name = "function call";
421             break;
422         case BoolOp_kind:
423         case BinOp_kind:
424         case UnaryOp_kind:
425             expr_name = "operator";
426             break;
427         case GeneratorExp_kind:
428             expr_name = "generator expression";
429             break;
430         case Yield_kind:
431             expr_name = "yield expression";
432             break;
433         case ListComp_kind:
434             expr_name = "list comprehension";
435             break;
436         case SetComp_kind:
437             expr_name = "set comprehension";
438             break;
439         case DictComp_kind:
440             expr_name = "dict comprehension";
441             break;
442         case Dict_kind:
443         case Set_kind:
444         case Num_kind:
445         case Str_kind:
446             expr_name = "literal";
447             break;
448         case Compare_kind:
449             expr_name = "comparison";
450             break;
451         case Repr_kind:
452             expr_name = "repr";
453             break;
454         case IfExp_kind:
455             expr_name = "conditional expression";
456             break;
457         default:
458             PyErr_Format(PyExc_SystemError,
459                          "unexpected expression in assignment %d (line %d)",
460                          e->kind, e->lineno);
461             return 0;
462     }
463     /* Check for error string set by switch */
464     if (expr_name) {
465         char buf[300];
466         PyOS_snprintf(buf, sizeof(buf),
467                       "can't %s %s",
468                       ctx == Store ? "assign to" : "delete",
469                       expr_name);
470         return ast_error(n, buf);
471     }
472 
473     /* If the LHS is a list or tuple, we need to set the assignment
474        context for all the contained elements.
475     */
476     if (s) {
477         int i;
478 
479         for (i = 0; i < asdl_seq_LEN(s); i++) {
480             if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
481                 return 0;
482         }
483     }
484     return 1;
485 }
486 
487 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)488 ast_for_augassign(struct compiling *c, const node *n)
489 {
490     REQ(n, augassign);
491     n = CHILD(n, 0);
492     switch (STR(n)[0]) {
493         case '+':
494             return Add;
495         case '-':
496             return Sub;
497         case '/':
498             if (STR(n)[1] == '/')
499                 return FloorDiv;
500             else
501                 return Div;
502         case '%':
503             return Mod;
504         case '<':
505             return LShift;
506         case '>':
507             return RShift;
508         case '&':
509             return BitAnd;
510         case '^':
511             return BitXor;
512         case '|':
513             return BitOr;
514         case '*':
515             if (STR(n)[1] == '*')
516                 return Pow;
517             else
518                 return Mult;
519         default:
520             PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
521             return (operator_ty)0;
522     }
523 }
524 
525 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)526 ast_for_comp_op(struct compiling *c, const node *n)
527 {
528     /* comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'
529                |'is' 'not'
530     */
531     REQ(n, comp_op);
532     if (NCH(n) == 1) {
533         n = CHILD(n, 0);
534         switch (TYPE(n)) {
535             case LESS:
536                 return Lt;
537             case GREATER:
538                 return Gt;
539             case EQEQUAL:                       /* == */
540                 return Eq;
541             case LESSEQUAL:
542                 return LtE;
543             case GREATEREQUAL:
544                 return GtE;
545             case NOTEQUAL:
546                 return NotEq;
547             case NAME:
548                 if (strcmp(STR(n), "in") == 0)
549                     return In;
550                 if (strcmp(STR(n), "is") == 0)
551                     return Is;
552             default:
553                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
554                              STR(n));
555                 return (cmpop_ty)0;
556         }
557     }
558     else if (NCH(n) == 2) {
559         /* handle "not in" and "is not" */
560         switch (TYPE(CHILD(n, 0))) {
561             case NAME:
562                 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
563                     return NotIn;
564                 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
565                     return IsNot;
566             default:
567                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
568                              STR(CHILD(n, 0)), STR(CHILD(n, 1)));
569                 return (cmpop_ty)0;
570         }
571     }
572     PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
573                  NCH(n));
574     return (cmpop_ty)0;
575 }
576 
577 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)578 seq_for_testlist(struct compiling *c, const node *n)
579 {
580     /* testlist: test (',' test)* [','] */
581     asdl_seq *seq;
582     expr_ty expression;
583     int i;
584     assert(TYPE(n) == testlist ||
585            TYPE(n) == listmaker ||
586            TYPE(n) == testlist_comp ||
587            TYPE(n) == testlist_safe ||
588            TYPE(n) == testlist1);
589 
590     seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
591     if (!seq)
592         return NULL;
593 
594     for (i = 0; i < NCH(n); i += 2) {
595         assert(TYPE(CHILD(n, i)) == test || TYPE(CHILD(n, i)) == old_test);
596 
597         expression = ast_for_expr(c, CHILD(n, i));
598         if (!expression)
599             return NULL;
600 
601         assert(i / 2 < seq->size);
602         asdl_seq_SET(seq, i / 2, expression);
603     }
604     return seq;
605 }
606 
607 static expr_ty
compiler_complex_args(struct compiling * c,const node * n)608 compiler_complex_args(struct compiling *c, const node *n)
609 {
610     int i, len = (NCH(n) + 1) / 2;
611     expr_ty result;
612     asdl_seq *args = asdl_seq_new(len, c->c_arena);
613     if (!args)
614         return NULL;
615 
616     /* fpdef: NAME | '(' fplist ')'
617        fplist: fpdef (',' fpdef)* [',']
618     */
619     REQ(n, fplist);
620     for (i = 0; i < len; i++) {
621         PyObject *arg_id;
622         const node *fpdef_node = CHILD(n, 2*i);
623         const node *child;
624         expr_ty arg;
625 set_name:
626         /* fpdef_node is either a NAME or an fplist */
627         child = CHILD(fpdef_node, 0);
628         if (TYPE(child) == NAME) {
629             if (!forbidden_check(c, n, STR(child)))
630                 return NULL;
631             arg_id = NEW_IDENTIFIER(child);
632             if (!arg_id)
633                 return NULL;
634             arg = Name(arg_id, Store, LINENO(child), child->n_col_offset,
635                        c->c_arena);
636         }
637         else {
638             assert(TYPE(fpdef_node) == fpdef);
639             /* fpdef_node[0] is not a name, so it must be '(', get CHILD[1] */
640             child = CHILD(fpdef_node, 1);
641             assert(TYPE(child) == fplist);
642             /* NCH == 1 means we have (x), we need to elide the extra parens */
643             if (NCH(child) == 1) {
644                 fpdef_node = CHILD(child, 0);
645                 assert(TYPE(fpdef_node) == fpdef);
646                 goto set_name;
647             }
648             arg = compiler_complex_args(c, child);
649         }
650         asdl_seq_SET(args, i, arg);
651     }
652 
653     result = Tuple(args, Store, LINENO(n), n->n_col_offset, c->c_arena);
654     if (!set_context(c, result, Store, n))
655         return NULL;
656     return result;
657 }
658 
659 
660 /* Create AST for argument list. */
661 
662 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)663 ast_for_arguments(struct compiling *c, const node *n)
664 {
665     /* parameters: '(' [varargslist] ')'
666        varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME]
667             | '**' NAME) | fpdef ['=' test] (',' fpdef ['=' test])* [',']
668     */
669     int i, j, k, n_args = 0, n_defaults = 0, found_default = 0;
670     asdl_seq *args, *defaults;
671     identifier vararg = NULL, kwarg = NULL;
672     node *ch;
673 
674     if (TYPE(n) == parameters) {
675         if (NCH(n) == 2) /* () as argument list */
676             return arguments(NULL, NULL, NULL, NULL, c->c_arena);
677         n = CHILD(n, 1);
678     }
679     REQ(n, varargslist);
680 
681     /* first count the number of normal args & defaults */
682     for (i = 0; i < NCH(n); i++) {
683         ch = CHILD(n, i);
684         if (TYPE(ch) == fpdef)
685             n_args++;
686         if (TYPE(ch) == EQUAL)
687             n_defaults++;
688     }
689     args = (n_args ? asdl_seq_new(n_args, c->c_arena) : NULL);
690     if (!args && n_args)
691         return NULL;
692     defaults = (n_defaults ? asdl_seq_new(n_defaults, c->c_arena) : NULL);
693     if (!defaults && n_defaults)
694         return NULL;
695 
696     /* fpdef: NAME | '(' fplist ')'
697        fplist: fpdef (',' fpdef)* [',']
698     */
699     i = 0;
700     j = 0;  /* index for defaults */
701     k = 0;  /* index for args */
702     while (i < NCH(n)) {
703         ch = CHILD(n, i);
704         switch (TYPE(ch)) {
705             case fpdef: {
706                 int complex_args = 0, parenthesized = 0;
707             handle_fpdef:
708                 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
709                    anything other than EQUAL or a comma? */
710                 /* XXX Should NCH(n) check be made a separate check? */
711                 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
712                     expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
713                     if (!expression)
714                         return NULL;
715                     assert(defaults != NULL);
716                     asdl_seq_SET(defaults, j++, expression);
717                     i += 2;
718                     found_default = 1;
719                 }
720                 else if (found_default) {
721                     /* def f((x)=4): pass should raise an error.
722                        def f((x, (y))): pass will just incur the tuple unpacking warning. */
723                     if (parenthesized && !complex_args) {
724                         ast_error(n, "parenthesized arg with default");
725                         return NULL;
726                     }
727                     ast_error(n,
728                              "non-default argument follows default argument");
729                     return NULL;
730                 }
731                 if (NCH(ch) == 3) {
732                     ch = CHILD(ch, 1);
733                     /* def foo((x)): is not complex, special case. */
734                     if (NCH(ch) != 1) {
735                         /* We have complex arguments, setup for unpacking. */
736                         if (Py_Py3kWarningFlag && !ast_warn(c, ch,
737                             "tuple parameter unpacking has been removed in 3.x"))
738                             return NULL;
739                         complex_args = 1;
740                         asdl_seq_SET(args, k++, compiler_complex_args(c, ch));
741                         if (!asdl_seq_GET(args, k-1))
742                                 return NULL;
743                     } else {
744                         /* def foo((x)): setup for checking NAME below. */
745                         /* Loop because there can be many parens and tuple
746                            unpacking mixed in. */
747                         parenthesized = 1;
748                         ch = CHILD(ch, 0);
749                         assert(TYPE(ch) == fpdef);
750                         goto handle_fpdef;
751                     }
752                 }
753                 if (TYPE(CHILD(ch, 0)) == NAME) {
754                     PyObject *id;
755                     expr_ty name;
756                     if (!forbidden_check(c, n, STR(CHILD(ch, 0))))
757                         return NULL;
758                     id = NEW_IDENTIFIER(CHILD(ch, 0));
759                     if (!id)
760                         return NULL;
761                     name = Name(id, Param, LINENO(ch), ch->n_col_offset,
762                                 c->c_arena);
763                     if (!name)
764                         return NULL;
765                     asdl_seq_SET(args, k++, name);
766 
767                 }
768                 i += 2; /* the name and the comma */
769                 if (parenthesized && Py_Py3kWarningFlag &&
770                     !ast_warn(c, ch, "parenthesized argument names "
771                               "are invalid in 3.x"))
772                     return NULL;
773 
774                 break;
775             }
776             case STAR:
777                 if (!forbidden_check(c, CHILD(n, i+1), STR(CHILD(n, i+1))))
778                     return NULL;
779                 vararg = NEW_IDENTIFIER(CHILD(n, i+1));
780                 if (!vararg)
781                     return NULL;
782                 i += 3;
783                 break;
784             case DOUBLESTAR:
785                 if (!forbidden_check(c, CHILD(n, i+1), STR(CHILD(n, i+1))))
786                     return NULL;
787                 kwarg = NEW_IDENTIFIER(CHILD(n, i+1));
788                 if (!kwarg)
789                     return NULL;
790                 i += 3;
791                 break;
792             default:
793                 PyErr_Format(PyExc_SystemError,
794                              "unexpected node in varargslist: %d @ %d",
795                              TYPE(ch), i);
796                 return NULL;
797         }
798     }
799 
800     return arguments(args, vararg, kwarg, defaults, c->c_arena);
801 }
802 
803 static expr_ty
ast_for_dotted_name(struct compiling * c,const node * n)804 ast_for_dotted_name(struct compiling *c, const node *n)
805 {
806     expr_ty e;
807     identifier id;
808     int lineno, col_offset;
809     int i;
810 
811     REQ(n, dotted_name);
812 
813     lineno = LINENO(n);
814     col_offset = n->n_col_offset;
815 
816     id = NEW_IDENTIFIER(CHILD(n, 0));
817     if (!id)
818         return NULL;
819     e = Name(id, Load, lineno, col_offset, c->c_arena);
820     if (!e)
821         return NULL;
822 
823     for (i = 2; i < NCH(n); i+=2) {
824         id = NEW_IDENTIFIER(CHILD(n, i));
825         if (!id)
826             return NULL;
827         e = Attribute(e, id, Load, lineno, col_offset, c->c_arena);
828         if (!e)
829             return NULL;
830     }
831 
832     return e;
833 }
834 
835 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)836 ast_for_decorator(struct compiling *c, const node *n)
837 {
838     /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
839     expr_ty d = NULL;
840     expr_ty name_expr;
841 
842     REQ(n, decorator);
843     REQ(CHILD(n, 0), AT);
844     REQ(RCHILD(n, -1), NEWLINE);
845 
846     name_expr = ast_for_dotted_name(c, CHILD(n, 1));
847     if (!name_expr)
848         return NULL;
849 
850     if (NCH(n) == 3) { /* No arguments */
851         d = name_expr;
852         name_expr = NULL;
853     }
854     else if (NCH(n) == 5) { /* Call with no arguments */
855         d = Call(name_expr, NULL, NULL, NULL, NULL, LINENO(n),
856                  n->n_col_offset, c->c_arena);
857         if (!d)
858             return NULL;
859         name_expr = NULL;
860     }
861     else {
862         d = ast_for_call(c, CHILD(n, 3), name_expr);
863         if (!d)
864             return NULL;
865         name_expr = NULL;
866     }
867 
868     return d;
869 }
870 
871 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)872 ast_for_decorators(struct compiling *c, const node *n)
873 {
874     asdl_seq* decorator_seq;
875     expr_ty d;
876     int i;
877 
878     REQ(n, decorators);
879     decorator_seq = asdl_seq_new(NCH(n), c->c_arena);
880     if (!decorator_seq)
881         return NULL;
882 
883     for (i = 0; i < NCH(n); i++) {
884         d = ast_for_decorator(c, CHILD(n, i));
885         if (!d)
886             return NULL;
887         asdl_seq_SET(decorator_seq, i, d);
888     }
889     return decorator_seq;
890 }
891 
892 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)893 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
894 {
895     /* funcdef: 'def' NAME parameters ':' suite */
896     identifier name;
897     arguments_ty args;
898     asdl_seq *body;
899     int name_i = 1;
900 
901     REQ(n, funcdef);
902 
903     name = NEW_IDENTIFIER(CHILD(n, name_i));
904     if (!name)
905         return NULL;
906     else if (!forbidden_check(c, CHILD(n, name_i), STR(CHILD(n, name_i))))
907         return NULL;
908     args = ast_for_arguments(c, CHILD(n, name_i + 1));
909     if (!args)
910         return NULL;
911     body = ast_for_suite(c, CHILD(n, name_i + 3));
912     if (!body)
913         return NULL;
914 
915     return FunctionDef(name, args, body, decorator_seq, LINENO(n),
916                        n->n_col_offset, c->c_arena);
917 }
918 
919 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)920 ast_for_decorated(struct compiling *c, const node *n)
921 {
922     /* decorated: decorators (classdef | funcdef) */
923     stmt_ty thing = NULL;
924     asdl_seq *decorator_seq = NULL;
925 
926     REQ(n, decorated);
927 
928     decorator_seq = ast_for_decorators(c, CHILD(n, 0));
929     if (!decorator_seq)
930       return NULL;
931 
932     assert(TYPE(CHILD(n, 1)) == funcdef ||
933            TYPE(CHILD(n, 1)) == classdef);
934 
935     if (TYPE(CHILD(n, 1)) == funcdef) {
936       thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
937     } else if (TYPE(CHILD(n, 1)) == classdef) {
938       thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
939     }
940     /* we count the decorators in when talking about the class' or
941        function's line number */
942     if (thing) {
943         thing->lineno = LINENO(n);
944         thing->col_offset = n->n_col_offset;
945     }
946     return thing;
947 }
948 
949 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)950 ast_for_lambdef(struct compiling *c, const node *n)
951 {
952     /* lambdef: 'lambda' [varargslist] ':' test */
953     arguments_ty args;
954     expr_ty expression;
955 
956     if (NCH(n) == 3) {
957         args = arguments(NULL, NULL, NULL, NULL, c->c_arena);
958         if (!args)
959             return NULL;
960         expression = ast_for_expr(c, CHILD(n, 2));
961         if (!expression)
962             return NULL;
963     }
964     else {
965         args = ast_for_arguments(c, CHILD(n, 1));
966         if (!args)
967             return NULL;
968         expression = ast_for_expr(c, CHILD(n, 3));
969         if (!expression)
970             return NULL;
971     }
972 
973     return Lambda(args, expression, LINENO(n), n->n_col_offset, c->c_arena);
974 }
975 
976 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)977 ast_for_ifexpr(struct compiling *c, const node *n)
978 {
979     /* test: or_test 'if' or_test 'else' test */
980     expr_ty expression, body, orelse;
981 
982     assert(NCH(n) == 5);
983     body = ast_for_expr(c, CHILD(n, 0));
984     if (!body)
985         return NULL;
986     expression = ast_for_expr(c, CHILD(n, 2));
987     if (!expression)
988         return NULL;
989     orelse = ast_for_expr(c, CHILD(n, 4));
990     if (!orelse)
991         return NULL;
992     return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
993                  c->c_arena);
994 }
995 
996 /* XXX(nnorwitz): the listcomp and genexpr code should be refactored
997    so there is only a single version.  Possibly for loops can also re-use
998    the code.
999 */
1000 
1001 /* Count the number of 'for' loop in a list comprehension.
1002 
1003    Helper for ast_for_listcomp().
1004 */
1005 
1006 static int
count_list_fors(struct compiling * c,const node * n)1007 count_list_fors(struct compiling *c, const node *n)
1008 {
1009     int n_fors = 0;
1010     node *ch = CHILD(n, 1);
1011 
1012  count_list_for:
1013     n_fors++;
1014     REQ(ch, list_for);
1015     if (NCH(ch) == 5)
1016         ch = CHILD(ch, 4);
1017     else
1018         return n_fors;
1019  count_list_iter:
1020     REQ(ch, list_iter);
1021     ch = CHILD(ch, 0);
1022     if (TYPE(ch) == list_for)
1023         goto count_list_for;
1024     else if (TYPE(ch) == list_if) {
1025         if (NCH(ch) == 3) {
1026             ch = CHILD(ch, 2);
1027             goto count_list_iter;
1028         }
1029         else
1030             return n_fors;
1031     }
1032 
1033     /* Should never be reached */
1034     PyErr_SetString(PyExc_SystemError, "logic error in count_list_fors");
1035     return -1;
1036 }
1037 
1038 /* Count the number of 'if' statements in a list comprehension.
1039 
1040    Helper for ast_for_listcomp().
1041 */
1042 
1043 static int
count_list_ifs(struct compiling * c,const node * n)1044 count_list_ifs(struct compiling *c, const node *n)
1045 {
1046     int n_ifs = 0;
1047 
1048  count_list_iter:
1049     REQ(n, list_iter);
1050     if (TYPE(CHILD(n, 0)) == list_for)
1051         return n_ifs;
1052     n = CHILD(n, 0);
1053     REQ(n, list_if);
1054     n_ifs++;
1055     if (NCH(n) == 2)
1056         return n_ifs;
1057     n = CHILD(n, 2);
1058     goto count_list_iter;
1059 }
1060 
1061 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)1062 ast_for_listcomp(struct compiling *c, const node *n)
1063 {
1064     /* listmaker: test ( list_for | (',' test)* [','] )
1065        list_for: 'for' exprlist 'in' testlist_safe [list_iter]
1066        list_iter: list_for | list_if
1067        list_if: 'if' test [list_iter]
1068        testlist_safe: test [(',' test)+ [',']]
1069     */
1070     expr_ty elt, first;
1071     asdl_seq *listcomps;
1072     int i, n_fors;
1073     node *ch;
1074 
1075     REQ(n, listmaker);
1076     assert(NCH(n) > 1);
1077 
1078     elt = ast_for_expr(c, CHILD(n, 0));
1079     if (!elt)
1080         return NULL;
1081 
1082     n_fors = count_list_fors(c, n);
1083     if (n_fors == -1)
1084         return NULL;
1085 
1086     listcomps = asdl_seq_new(n_fors, c->c_arena);
1087     if (!listcomps)
1088         return NULL;
1089 
1090     ch = CHILD(n, 1);
1091     for (i = 0; i < n_fors; i++) {
1092         comprehension_ty lc;
1093         asdl_seq *t;
1094         expr_ty expression;
1095         node *for_ch;
1096 
1097         REQ(ch, list_for);
1098 
1099         for_ch = CHILD(ch, 1);
1100         t = ast_for_exprlist(c, for_ch, Store);
1101         if (!t)
1102             return NULL;
1103         expression = ast_for_testlist(c, CHILD(ch, 3));
1104         if (!expression)
1105             return NULL;
1106 
1107         /* Check the # of children rather than the length of t, since
1108            [x for x, in ... ] has 1 element in t, but still requires a Tuple.
1109         */
1110         first = (expr_ty)asdl_seq_GET(t, 0);
1111         if (NCH(for_ch) == 1)
1112             lc = comprehension(first, expression, NULL, c->c_arena);
1113         else
1114             lc = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
1115                                      c->c_arena),
1116                                expression, NULL, c->c_arena);
1117         if (!lc)
1118             return NULL;
1119 
1120         if (NCH(ch) == 5) {
1121             int j, n_ifs;
1122             asdl_seq *ifs;
1123             expr_ty list_for_expr;
1124 
1125             ch = CHILD(ch, 4);
1126             n_ifs = count_list_ifs(c, ch);
1127             if (n_ifs == -1)
1128                 return NULL;
1129 
1130             ifs = asdl_seq_new(n_ifs, c->c_arena);
1131             if (!ifs)
1132                 return NULL;
1133 
1134             for (j = 0; j < n_ifs; j++) {
1135                 REQ(ch, list_iter);
1136                 ch = CHILD(ch, 0);
1137                 REQ(ch, list_if);
1138 
1139                 list_for_expr = ast_for_expr(c, CHILD(ch, 1));
1140                 if (!list_for_expr)
1141                     return NULL;
1142 
1143                 asdl_seq_SET(ifs, j, list_for_expr);
1144                 if (NCH(ch) == 3)
1145                     ch = CHILD(ch, 2);
1146             }
1147             /* on exit, must guarantee that ch is a list_for */
1148             if (TYPE(ch) == list_iter)
1149                 ch = CHILD(ch, 0);
1150             lc->ifs = ifs;
1151         }
1152         asdl_seq_SET(listcomps, i, lc);
1153     }
1154 
1155     return ListComp(elt, listcomps, LINENO(n), n->n_col_offset, c->c_arena);
1156 }
1157 
1158 /*
1159    Count the number of 'for' loops in a comprehension.
1160 
1161    Helper for ast_for_comprehension().
1162 */
1163 
1164 static int
count_comp_fors(struct compiling * c,const node * n)1165 count_comp_fors(struct compiling *c, const node *n)
1166 {
1167     int n_fors = 0;
1168 
1169   count_comp_for:
1170     n_fors++;
1171     REQ(n, comp_for);
1172     if (NCH(n) == 5)
1173         n = CHILD(n, 4);
1174     else
1175         return n_fors;
1176   count_comp_iter:
1177     REQ(n, comp_iter);
1178     n = CHILD(n, 0);
1179     if (TYPE(n) == comp_for)
1180         goto count_comp_for;
1181     else if (TYPE(n) == comp_if) {
1182         if (NCH(n) == 3) {
1183             n = CHILD(n, 2);
1184             goto count_comp_iter;
1185         }
1186         else
1187             return n_fors;
1188     }
1189 
1190     /* Should never be reached */
1191     PyErr_SetString(PyExc_SystemError,
1192                     "logic error in count_comp_fors");
1193     return -1;
1194 }
1195 
1196 /* Count the number of 'if' statements in a comprehension.
1197 
1198    Helper for ast_for_comprehension().
1199 */
1200 
1201 static int
count_comp_ifs(struct compiling * c,const node * n)1202 count_comp_ifs(struct compiling *c, const node *n)
1203 {
1204     int n_ifs = 0;
1205 
1206     while (1) {
1207         REQ(n, comp_iter);
1208         if (TYPE(CHILD(n, 0)) == comp_for)
1209             return n_ifs;
1210         n = CHILD(n, 0);
1211         REQ(n, comp_if);
1212         n_ifs++;
1213         if (NCH(n) == 2)
1214             return n_ifs;
1215         n = CHILD(n, 2);
1216     }
1217 }
1218 
1219 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)1220 ast_for_comprehension(struct compiling *c, const node *n)
1221 {
1222     int i, n_fors;
1223     asdl_seq *comps;
1224 
1225     n_fors = count_comp_fors(c, n);
1226     if (n_fors == -1)
1227         return NULL;
1228 
1229     comps = asdl_seq_new(n_fors, c->c_arena);
1230     if (!comps)
1231         return NULL;
1232 
1233     for (i = 0; i < n_fors; i++) {
1234         comprehension_ty comp;
1235         asdl_seq *t;
1236         expr_ty expression, first;
1237         node *for_ch;
1238 
1239         REQ(n, comp_for);
1240 
1241         for_ch = CHILD(n, 1);
1242         t = ast_for_exprlist(c, for_ch, Store);
1243         if (!t)
1244             return NULL;
1245         expression = ast_for_expr(c, CHILD(n, 3));
1246         if (!expression)
1247             return NULL;
1248 
1249         /* Check the # of children rather than the length of t, since
1250            (x for x, in ...) has 1 element in t, but still requires a Tuple. */
1251         first = (expr_ty)asdl_seq_GET(t, 0);
1252         if (NCH(for_ch) == 1)
1253             comp = comprehension(first, expression, NULL, c->c_arena);
1254         else
1255             comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
1256                                      c->c_arena),
1257                                expression, NULL, c->c_arena);
1258         if (!comp)
1259             return NULL;
1260 
1261         if (NCH(n) == 5) {
1262             int j, n_ifs;
1263             asdl_seq *ifs;
1264 
1265             n = CHILD(n, 4);
1266             n_ifs = count_comp_ifs(c, n);
1267             if (n_ifs == -1)
1268                 return NULL;
1269 
1270             ifs = asdl_seq_new(n_ifs, c->c_arena);
1271             if (!ifs)
1272                 return NULL;
1273 
1274             for (j = 0; j < n_ifs; j++) {
1275                 REQ(n, comp_iter);
1276                 n = CHILD(n, 0);
1277                 REQ(n, comp_if);
1278 
1279                 expression = ast_for_expr(c, CHILD(n, 1));
1280                 if (!expression)
1281                     return NULL;
1282                 asdl_seq_SET(ifs, j, expression);
1283                 if (NCH(n) == 3)
1284                     n = CHILD(n, 2);
1285             }
1286             /* on exit, must guarantee that n is a comp_for */
1287             if (TYPE(n) == comp_iter)
1288                 n = CHILD(n, 0);
1289             comp->ifs = ifs;
1290         }
1291         asdl_seq_SET(comps, i, comp);
1292     }
1293     return comps;
1294 }
1295 
1296 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)1297 ast_for_itercomp(struct compiling *c, const node *n, int type)
1298 {
1299     expr_ty elt;
1300     asdl_seq *comps;
1301 
1302     assert(NCH(n) > 1);
1303 
1304     elt = ast_for_expr(c, CHILD(n, 0));
1305     if (!elt)
1306         return NULL;
1307 
1308     comps = ast_for_comprehension(c, CHILD(n, 1));
1309     if (!comps)
1310         return NULL;
1311 
1312     if (type == COMP_GENEXP)
1313         return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1314     else if (type == COMP_SETCOMP)
1315         return SetComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1316     else
1317         /* Should never happen */
1318         return NULL;
1319 }
1320 
1321 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)1322 ast_for_dictcomp(struct compiling *c, const node *n)
1323 {
1324     expr_ty key, value;
1325     asdl_seq *comps;
1326 
1327     assert(NCH(n) > 3);
1328     REQ(CHILD(n, 1), COLON);
1329 
1330     key = ast_for_expr(c, CHILD(n, 0));
1331     if (!key)
1332         return NULL;
1333 
1334     value = ast_for_expr(c, CHILD(n, 2));
1335     if (!value)
1336         return NULL;
1337 
1338     comps = ast_for_comprehension(c, CHILD(n, 3));
1339     if (!comps)
1340         return NULL;
1341 
1342     return DictComp(key, value, comps, LINENO(n), n->n_col_offset, c->c_arena);
1343 }
1344 
1345 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)1346 ast_for_genexp(struct compiling *c, const node *n)
1347 {
1348     assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
1349     return ast_for_itercomp(c, n, COMP_GENEXP);
1350 }
1351 
1352 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)1353 ast_for_setcomp(struct compiling *c, const node *n)
1354 {
1355     assert(TYPE(n) == (dictorsetmaker));
1356     return ast_for_itercomp(c, n, COMP_SETCOMP);
1357 }
1358 
1359 static expr_ty
ast_for_atom(struct compiling * c,const node * n)1360 ast_for_atom(struct compiling *c, const node *n)
1361 {
1362     /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [listmaker] ']'
1363        | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+
1364     */
1365     node *ch = CHILD(n, 0);
1366 
1367     switch (TYPE(ch)) {
1368     case NAME: {
1369         /* All names start in Load context, but may later be
1370            changed. */
1371         PyObject *name = NEW_IDENTIFIER(ch);
1372         if (!name)
1373             return NULL;
1374         return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena);
1375     }
1376     case STRING: {
1377         PyObject *str = parsestrplus(c, n);
1378         if (!str) {
1379 #ifdef Py_USING_UNICODE
1380             if (PyErr_ExceptionMatches(PyExc_UnicodeError)){
1381                 PyObject *type, *value, *tback, *errstr;
1382                 PyErr_Fetch(&type, &value, &tback);
1383                 errstr = PyObject_Str(value);
1384                 if (errstr) {
1385                     char *s = "";
1386                     char buf[128];
1387                     s = PyString_AsString(errstr);
1388                     PyOS_snprintf(buf, sizeof(buf), "(unicode error) %s", s);
1389                     ast_error(n, buf);
1390                     Py_DECREF(errstr);
1391                 } else {
1392                     ast_error(n, "(unicode error) unknown error");
1393                 }
1394                 Py_DECREF(type);
1395                 Py_DECREF(value);
1396                 Py_XDECREF(tback);
1397             }
1398 #endif
1399             return NULL;
1400         }
1401         PyArena_AddPyObject(c->c_arena, str);
1402         return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
1403     }
1404     case NUMBER: {
1405         PyObject *pynum = parsenumber(c, STR(ch));
1406         if (!pynum)
1407             return NULL;
1408 
1409         PyArena_AddPyObject(c->c_arena, pynum);
1410         return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1411     }
1412     case LPAR: /* some parenthesized expressions */
1413         ch = CHILD(n, 1);
1414 
1415         if (TYPE(ch) == RPAR)
1416             return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1417 
1418         if (TYPE(ch) == yield_expr)
1419             return ast_for_expr(c, ch);
1420 
1421         return ast_for_testlist_comp(c, ch);
1422     case LSQB: /* list (or list comprehension) */
1423         ch = CHILD(n, 1);
1424 
1425         if (TYPE(ch) == RSQB)
1426             return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1427 
1428         REQ(ch, listmaker);
1429         if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1430             asdl_seq *elts = seq_for_testlist(c, ch);
1431             if (!elts)
1432                 return NULL;
1433 
1434             return List(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1435         }
1436         else
1437             return ast_for_listcomp(c, ch);
1438     case LBRACE: {
1439         /* dictorsetmaker:
1440          *    (test ':' test (comp_for | (',' test ':' test)* [','])) |
1441          *    (test (comp_for | (',' test)* [',']))
1442          */
1443         int i, size;
1444         asdl_seq *keys, *values;
1445 
1446         ch = CHILD(n, 1);
1447         if (TYPE(ch) == RBRACE) {
1448             /* it's an empty dict */
1449             return Dict(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
1450         } else if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1451             /* it's a simple set */
1452             asdl_seq *elts;
1453             size = (NCH(ch) + 1) / 2; /* +1 in case no trailing comma */
1454             elts = asdl_seq_new(size, c->c_arena);
1455             if (!elts)
1456                 return NULL;
1457             for (i = 0; i < NCH(ch); i += 2) {
1458                 expr_ty expression;
1459                 expression = ast_for_expr(c, CHILD(ch, i));
1460                 if (!expression)
1461                     return NULL;
1462                 asdl_seq_SET(elts, i / 2, expression);
1463             }
1464             return Set(elts, LINENO(n), n->n_col_offset, c->c_arena);
1465         } else if (TYPE(CHILD(ch, 1)) == comp_for) {
1466             /* it's a set comprehension */
1467             return ast_for_setcomp(c, ch);
1468         } else if (NCH(ch) > 3 && TYPE(CHILD(ch, 3)) == comp_for) {
1469             return ast_for_dictcomp(c, ch);
1470         } else {
1471             /* it's a dict */
1472             size = (NCH(ch) + 1) / 4; /* +1 in case no trailing comma */
1473             keys = asdl_seq_new(size, c->c_arena);
1474             if (!keys)
1475                 return NULL;
1476 
1477             values = asdl_seq_new(size, c->c_arena);
1478             if (!values)
1479                 return NULL;
1480 
1481             for (i = 0; i < NCH(ch); i += 4) {
1482                 expr_ty expression;
1483 
1484                 expression = ast_for_expr(c, CHILD(ch, i));
1485                 if (!expression)
1486                     return NULL;
1487 
1488                 asdl_seq_SET(keys, i / 4, expression);
1489 
1490                 expression = ast_for_expr(c, CHILD(ch, i + 2));
1491                 if (!expression)
1492                     return NULL;
1493 
1494                 asdl_seq_SET(values, i / 4, expression);
1495             }
1496             return Dict(keys, values, LINENO(n), n->n_col_offset, c->c_arena);
1497         }
1498     }
1499     case BACKQUOTE: { /* repr */
1500         expr_ty expression;
1501         if (Py_Py3kWarningFlag &&
1502             !ast_warn(c, n, "backquote not supported in 3.x; use repr()"))
1503             return NULL;
1504         expression = ast_for_testlist(c, CHILD(n, 1));
1505         if (!expression)
1506             return NULL;
1507 
1508         return Repr(expression, LINENO(n), n->n_col_offset, c->c_arena);
1509     }
1510     default:
1511         PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
1512         return NULL;
1513     }
1514 }
1515 
1516 static slice_ty
ast_for_slice(struct compiling * c,const node * n)1517 ast_for_slice(struct compiling *c, const node *n)
1518 {
1519     node *ch;
1520     expr_ty lower = NULL, upper = NULL, step = NULL;
1521 
1522     REQ(n, subscript);
1523 
1524     /*
1525        subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1526        sliceop: ':' [test]
1527     */
1528     ch = CHILD(n, 0);
1529     if (TYPE(ch) == DOT)
1530         return Ellipsis(c->c_arena);
1531 
1532     if (NCH(n) == 1 && TYPE(ch) == test) {
1533         /* 'step' variable hold no significance in terms of being used over
1534            other vars */
1535         step = ast_for_expr(c, ch);
1536         if (!step)
1537             return NULL;
1538 
1539         return Index(step, c->c_arena);
1540     }
1541 
1542     if (TYPE(ch) == test) {
1543         lower = ast_for_expr(c, ch);
1544         if (!lower)
1545             return NULL;
1546     }
1547 
1548     /* If there's an upper bound it's in the second or third position. */
1549     if (TYPE(ch) == COLON) {
1550         if (NCH(n) > 1) {
1551             node *n2 = CHILD(n, 1);
1552 
1553             if (TYPE(n2) == test) {
1554                 upper = ast_for_expr(c, n2);
1555                 if (!upper)
1556                     return NULL;
1557             }
1558         }
1559     } else if (NCH(n) > 2) {
1560         node *n2 = CHILD(n, 2);
1561 
1562         if (TYPE(n2) == test) {
1563             upper = ast_for_expr(c, n2);
1564             if (!upper)
1565                 return NULL;
1566         }
1567     }
1568 
1569     ch = CHILD(n, NCH(n) - 1);
1570     if (TYPE(ch) == sliceop) {
1571         if (NCH(ch) == 1) {
1572             /*
1573               This is an extended slice (ie "x[::]") with no expression in the
1574               step field. We set this literally to "None" in order to
1575               disambiguate it from x[:]. (The interpreter might have to call
1576               __getslice__ for x[:], but it must call __getitem__ for x[::].)
1577             */
1578             identifier none = new_identifier("None", c->c_arena);
1579             if (!none)
1580                 return NULL;
1581             ch = CHILD(ch, 0);
1582             step = Name(none, Load, LINENO(ch), ch->n_col_offset, c->c_arena);
1583             if (!step)
1584                 return NULL;
1585         } else {
1586             ch = CHILD(ch, 1);
1587             if (TYPE(ch) == test) {
1588                 step = ast_for_expr(c, ch);
1589                 if (!step)
1590                     return NULL;
1591             }
1592         }
1593     }
1594 
1595     return Slice(lower, upper, step, c->c_arena);
1596 }
1597 
1598 static expr_ty
ast_for_binop(struct compiling * c,const node * n)1599 ast_for_binop(struct compiling *c, const node *n)
1600 {
1601         /* Must account for a sequence of expressions.
1602            How should A op B op C by represented?
1603            BinOp(BinOp(A, op, B), op, C).
1604         */
1605 
1606         int i, nops;
1607         expr_ty expr1, expr2, result;
1608         operator_ty newoperator;
1609 
1610         expr1 = ast_for_expr(c, CHILD(n, 0));
1611         if (!expr1)
1612             return NULL;
1613 
1614         expr2 = ast_for_expr(c, CHILD(n, 2));
1615         if (!expr2)
1616             return NULL;
1617 
1618         newoperator = get_operator(CHILD(n, 1));
1619         if (!newoperator)
1620             return NULL;
1621 
1622         result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
1623                        c->c_arena);
1624         if (!result)
1625             return NULL;
1626 
1627         nops = (NCH(n) - 1) / 2;
1628         for (i = 1; i < nops; i++) {
1629                 expr_ty tmp_result, tmp;
1630                 const node* next_oper = CHILD(n, i * 2 + 1);
1631 
1632                 newoperator = get_operator(next_oper);
1633                 if (!newoperator)
1634                     return NULL;
1635 
1636                 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
1637                 if (!tmp)
1638                     return NULL;
1639 
1640                 tmp_result = BinOp(result, newoperator, tmp,
1641                                    LINENO(next_oper), next_oper->n_col_offset,
1642                                    c->c_arena);
1643                 if (!tmp_result)
1644                         return NULL;
1645                 result = tmp_result;
1646         }
1647         return result;
1648 }
1649 
1650 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr)1651 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr)
1652 {
1653     /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
1654        subscriptlist: subscript (',' subscript)* [',']
1655        subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1656      */
1657     REQ(n, trailer);
1658     if (TYPE(CHILD(n, 0)) == LPAR) {
1659         if (NCH(n) == 2)
1660             return Call(left_expr, NULL, NULL, NULL, NULL, LINENO(n),
1661                         n->n_col_offset, c->c_arena);
1662         else
1663             return ast_for_call(c, CHILD(n, 1), left_expr);
1664     }
1665     else if (TYPE(CHILD(n, 0)) == DOT ) {
1666         PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
1667         if (!attr_id)
1668             return NULL;
1669         return Attribute(left_expr, attr_id, Load,
1670                          LINENO(n), n->n_col_offset, c->c_arena);
1671     }
1672     else {
1673         REQ(CHILD(n, 0), LSQB);
1674         REQ(CHILD(n, 2), RSQB);
1675         n = CHILD(n, 1);
1676         if (NCH(n) == 1) {
1677             slice_ty slc = ast_for_slice(c, CHILD(n, 0));
1678             if (!slc)
1679                 return NULL;
1680             return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset,
1681                              c->c_arena);
1682         }
1683         else {
1684             /* The grammar is ambiguous here. The ambiguity is resolved
1685                by treating the sequence as a tuple literal if there are
1686                no slice features.
1687             */
1688             int j;
1689             slice_ty slc;
1690             expr_ty e;
1691             bool simple = true;
1692             asdl_seq *slices, *elts;
1693             slices = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1694             if (!slices)
1695                 return NULL;
1696             for (j = 0; j < NCH(n); j += 2) {
1697                 slc = ast_for_slice(c, CHILD(n, j));
1698                 if (!slc)
1699                     return NULL;
1700                 if (slc->kind != Index_kind)
1701                     simple = false;
1702                 asdl_seq_SET(slices, j / 2, slc);
1703             }
1704             if (!simple) {
1705                 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
1706                                  Load, LINENO(n), n->n_col_offset, c->c_arena);
1707             }
1708             /* extract Index values and put them in a Tuple */
1709             elts = asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
1710             if (!elts)
1711                 return NULL;
1712             for (j = 0; j < asdl_seq_LEN(slices); ++j) {
1713                 slc = (slice_ty)asdl_seq_GET(slices, j);
1714                 assert(slc->kind == Index_kind  && slc->v.Index.value);
1715                 asdl_seq_SET(elts, j, slc->v.Index.value);
1716             }
1717             e = Tuple(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1718             if (!e)
1719                 return NULL;
1720             return Subscript(left_expr, Index(e, c->c_arena),
1721                              Load, LINENO(n), n->n_col_offset, c->c_arena);
1722         }
1723     }
1724 }
1725 
1726 static expr_ty
ast_for_factor(struct compiling * c,const node * n)1727 ast_for_factor(struct compiling *c, const node *n)
1728 {
1729     node *pfactor, *ppower, *patom, *pnum;
1730     expr_ty expression;
1731 
1732     /* If the unary - operator is applied to a constant, don't generate
1733        a UNARY_NEGATIVE opcode.  Just store the approriate value as a
1734        constant.  The peephole optimizer already does something like
1735        this but it doesn't handle the case where the constant is
1736        (sys.maxint - 1).  In that case, we want a PyIntObject, not a
1737        PyLongObject.
1738     */
1739     if (TYPE(CHILD(n, 0)) == MINUS &&
1740         NCH(n) == 2 &&
1741         TYPE((pfactor = CHILD(n, 1))) == factor &&
1742         NCH(pfactor) == 1 &&
1743         TYPE((ppower = CHILD(pfactor, 0))) == power &&
1744         NCH(ppower) == 1 &&
1745         TYPE((patom = CHILD(ppower, 0))) == atom &&
1746         TYPE((pnum = CHILD(patom, 0))) == NUMBER) {
1747         PyObject *pynum;
1748         char *s = PyObject_MALLOC(strlen(STR(pnum)) + 2);
1749         if (s == NULL)
1750             return NULL;
1751         s[0] = '-';
1752         strcpy(s + 1, STR(pnum));
1753         pynum = parsenumber(c, s);
1754         PyObject_FREE(s);
1755         if (!pynum)
1756             return NULL;
1757 
1758         PyArena_AddPyObject(c->c_arena, pynum);
1759         return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1760     }
1761 
1762     expression = ast_for_expr(c, CHILD(n, 1));
1763     if (!expression)
1764         return NULL;
1765 
1766     switch (TYPE(CHILD(n, 0))) {
1767         case PLUS:
1768             return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
1769                            c->c_arena);
1770         case MINUS:
1771             return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
1772                            c->c_arena);
1773         case TILDE:
1774             return UnaryOp(Invert, expression, LINENO(n),
1775                            n->n_col_offset, c->c_arena);
1776     }
1777     PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
1778                  TYPE(CHILD(n, 0)));
1779     return NULL;
1780 }
1781 
1782 static expr_ty
ast_for_power(struct compiling * c,const node * n)1783 ast_for_power(struct compiling *c, const node *n)
1784 {
1785     /* power: atom trailer* ('**' factor)*
1786      */
1787     int i;
1788     expr_ty e, tmp;
1789     REQ(n, power);
1790     e = ast_for_atom(c, CHILD(n, 0));
1791     if (!e)
1792         return NULL;
1793     if (NCH(n) == 1)
1794         return e;
1795     for (i = 1; i < NCH(n); i++) {
1796         node *ch = CHILD(n, i);
1797         if (TYPE(ch) != trailer)
1798             break;
1799         tmp = ast_for_trailer(c, ch, e);
1800         if (!tmp)
1801             return NULL;
1802         tmp->lineno = e->lineno;
1803         tmp->col_offset = e->col_offset;
1804         e = tmp;
1805     }
1806     if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
1807         expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
1808         if (!f)
1809             return NULL;
1810         tmp = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, c->c_arena);
1811         if (!tmp)
1812             return NULL;
1813         e = tmp;
1814     }
1815     return e;
1816 }
1817 
1818 /* Do not name a variable 'expr'!  Will cause a compile error.
1819 */
1820 
1821 static expr_ty
ast_for_expr(struct compiling * c,const node * n)1822 ast_for_expr(struct compiling *c, const node *n)
1823 {
1824     /* handle the full range of simple expressions
1825        test: or_test ['if' or_test 'else' test] | lambdef
1826        or_test: and_test ('or' and_test)*
1827        and_test: not_test ('and' not_test)*
1828        not_test: 'not' not_test | comparison
1829        comparison: expr (comp_op expr)*
1830        expr: xor_expr ('|' xor_expr)*
1831        xor_expr: and_expr ('^' and_expr)*
1832        and_expr: shift_expr ('&' shift_expr)*
1833        shift_expr: arith_expr (('<<'|'>>') arith_expr)*
1834        arith_expr: term (('+'|'-') term)*
1835        term: factor (('*'|'/'|'%'|'//') factor)*
1836        factor: ('+'|'-'|'~') factor | power
1837        power: atom trailer* ('**' factor)*
1838 
1839        As well as modified versions that exist for backward compatibility,
1840        to explicitly allow:
1841        [ x for x in lambda: 0, lambda: 1 ]
1842        (which would be ambiguous without these extra rules)
1843 
1844        old_test: or_test | old_lambdef
1845        old_lambdef: 'lambda' [vararglist] ':' old_test
1846 
1847     */
1848 
1849     asdl_seq *seq;
1850     int i;
1851 
1852  loop:
1853     switch (TYPE(n)) {
1854         case test:
1855         case old_test:
1856             if (TYPE(CHILD(n, 0)) == lambdef ||
1857                 TYPE(CHILD(n, 0)) == old_lambdef)
1858                 return ast_for_lambdef(c, CHILD(n, 0));
1859             else if (NCH(n) > 1)
1860                 return ast_for_ifexpr(c, n);
1861             /* Fallthrough */
1862         case or_test:
1863         case and_test:
1864             if (NCH(n) == 1) {
1865                 n = CHILD(n, 0);
1866                 goto loop;
1867             }
1868             seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1869             if (!seq)
1870                 return NULL;
1871             for (i = 0; i < NCH(n); i += 2) {
1872                 expr_ty e = ast_for_expr(c, CHILD(n, i));
1873                 if (!e)
1874                     return NULL;
1875                 asdl_seq_SET(seq, i / 2, e);
1876             }
1877             if (!strcmp(STR(CHILD(n, 1)), "and"))
1878                 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
1879                               c->c_arena);
1880             assert(!strcmp(STR(CHILD(n, 1)), "or"));
1881             return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena);
1882         case not_test:
1883             if (NCH(n) == 1) {
1884                 n = CHILD(n, 0);
1885                 goto loop;
1886             }
1887             else {
1888                 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
1889                 if (!expression)
1890                     return NULL;
1891 
1892                 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
1893                                c->c_arena);
1894             }
1895         case comparison:
1896             if (NCH(n) == 1) {
1897                 n = CHILD(n, 0);
1898                 goto loop;
1899             }
1900             else {
1901                 expr_ty expression;
1902                 asdl_int_seq *ops;
1903                 asdl_seq *cmps;
1904                 ops = asdl_int_seq_new(NCH(n) / 2, c->c_arena);
1905                 if (!ops)
1906                     return NULL;
1907                 cmps = asdl_seq_new(NCH(n) / 2, c->c_arena);
1908                 if (!cmps) {
1909                     return NULL;
1910                 }
1911                 for (i = 1; i < NCH(n); i += 2) {
1912                     cmpop_ty newoperator;
1913 
1914                     newoperator = ast_for_comp_op(c, CHILD(n, i));
1915                     if (!newoperator) {
1916                         return NULL;
1917                     }
1918 
1919                     expression = ast_for_expr(c, CHILD(n, i + 1));
1920                     if (!expression) {
1921                         return NULL;
1922                     }
1923 
1924                     asdl_seq_SET(ops, i / 2, newoperator);
1925                     asdl_seq_SET(cmps, i / 2, expression);
1926                 }
1927                 expression = ast_for_expr(c, CHILD(n, 0));
1928                 if (!expression) {
1929                     return NULL;
1930                 }
1931 
1932                 return Compare(expression, ops, cmps, LINENO(n),
1933                                n->n_col_offset, c->c_arena);
1934             }
1935             break;
1936 
1937         /* The next five cases all handle BinOps.  The main body of code
1938            is the same in each case, but the switch turned inside out to
1939            reuse the code for each type of operator.
1940          */
1941         case expr:
1942         case xor_expr:
1943         case and_expr:
1944         case shift_expr:
1945         case arith_expr:
1946         case term:
1947             if (NCH(n) == 1) {
1948                 n = CHILD(n, 0);
1949                 goto loop;
1950             }
1951             return ast_for_binop(c, n);
1952         case yield_expr: {
1953             expr_ty exp = NULL;
1954             if (NCH(n) == 2) {
1955                 exp = ast_for_testlist(c, CHILD(n, 1));
1956                 if (!exp)
1957                     return NULL;
1958             }
1959             return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena);
1960         }
1961         case factor:
1962             if (NCH(n) == 1) {
1963                 n = CHILD(n, 0);
1964                 goto loop;
1965             }
1966             return ast_for_factor(c, n);
1967         case power:
1968             return ast_for_power(c, n);
1969         default:
1970             PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
1971             return NULL;
1972     }
1973     /* should never get here unless if error is set */
1974     return NULL;
1975 }
1976 
1977 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func)1978 ast_for_call(struct compiling *c, const node *n, expr_ty func)
1979 {
1980     /*
1981       arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
1982                | '**' test)
1983       argument: [test '='] test [comp_for]        # Really [keyword '='] test
1984     */
1985 
1986     int i, nargs, nkeywords, ngens;
1987     asdl_seq *args;
1988     asdl_seq *keywords;
1989     expr_ty vararg = NULL, kwarg = NULL;
1990 
1991     REQ(n, arglist);
1992 
1993     nargs = 0;
1994     nkeywords = 0;
1995     ngens = 0;
1996     for (i = 0; i < NCH(n); i++) {
1997         node *ch = CHILD(n, i);
1998         if (TYPE(ch) == argument) {
1999             if (NCH(ch) == 1)
2000                 nargs++;
2001             else if (TYPE(CHILD(ch, 1)) == comp_for)
2002                 ngens++;
2003             else
2004                 nkeywords++;
2005         }
2006     }
2007     if (ngens > 1 || (ngens && (nargs || nkeywords))) {
2008         ast_error(n, "Generator expression must be parenthesized "
2009                   "if not sole argument");
2010         return NULL;
2011     }
2012 
2013     if (nargs + nkeywords + ngens > 255) {
2014       ast_error(n, "more than 255 arguments");
2015       return NULL;
2016     }
2017 
2018     args = asdl_seq_new(nargs + ngens, c->c_arena);
2019     if (!args)
2020         return NULL;
2021     keywords = asdl_seq_new(nkeywords, c->c_arena);
2022     if (!keywords)
2023         return NULL;
2024     nargs = 0;
2025     nkeywords = 0;
2026     for (i = 0; i < NCH(n); i++) {
2027         node *ch = CHILD(n, i);
2028         if (TYPE(ch) == argument) {
2029             expr_ty e;
2030             if (NCH(ch) == 1) {
2031                 if (nkeywords) {
2032                     ast_error(CHILD(ch, 0),
2033                               "non-keyword arg after keyword arg");
2034                     return NULL;
2035                 }
2036                 if (vararg) {
2037                     ast_error(CHILD(ch, 0),
2038                               "only named arguments may follow *expression");
2039                     return NULL;
2040                 }
2041                 e = ast_for_expr(c, CHILD(ch, 0));
2042                 if (!e)
2043                     return NULL;
2044                 asdl_seq_SET(args, nargs++, e);
2045             }
2046             else if (TYPE(CHILD(ch, 1)) == comp_for) {
2047                 e = ast_for_genexp(c, ch);
2048                 if (!e)
2049                     return NULL;
2050                 asdl_seq_SET(args, nargs++, e);
2051             }
2052             else {
2053                 keyword_ty kw;
2054                 identifier key;
2055                 int k;
2056                 char *tmp;
2057 
2058                 /* CHILD(ch, 0) is test, but must be an identifier? */
2059                 e = ast_for_expr(c, CHILD(ch, 0));
2060                 if (!e)
2061                     return NULL;
2062                 /* f(lambda x: x[0] = 3) ends up getting parsed with
2063                  * LHS test = lambda x: x[0], and RHS test = 3.
2064                  * SF bug 132313 points out that complaining about a keyword
2065                  * then is very confusing.
2066                  */
2067                 if (e->kind == Lambda_kind) {
2068                     ast_error(CHILD(ch, 0),
2069                               "lambda cannot contain assignment");
2070                     return NULL;
2071                 } else if (e->kind != Name_kind) {
2072                     ast_error(CHILD(ch, 0), "keyword can't be an expression");
2073                     return NULL;
2074                 }
2075                 key = e->v.Name.id;
2076                 if (!forbidden_check(c, CHILD(ch, 0), PyBytes_AS_STRING(key)))
2077                     return NULL;
2078                 for (k = 0; k < nkeywords; k++) {
2079                     tmp = PyString_AS_STRING(
2080                         ((keyword_ty)asdl_seq_GET(keywords, k))->arg);
2081                     if (!strcmp(tmp, PyString_AS_STRING(key))) {
2082                         ast_error(CHILD(ch, 0), "keyword argument repeated");
2083                         return NULL;
2084                     }
2085                 }
2086                 e = ast_for_expr(c, CHILD(ch, 2));
2087                 if (!e)
2088                     return NULL;
2089                 kw = keyword(key, e, c->c_arena);
2090                 if (!kw)
2091                     return NULL;
2092                 asdl_seq_SET(keywords, nkeywords++, kw);
2093             }
2094         }
2095         else if (TYPE(ch) == STAR) {
2096             vararg = ast_for_expr(c, CHILD(n, i+1));
2097             if (!vararg)
2098                 return NULL;
2099             i++;
2100         }
2101         else if (TYPE(ch) == DOUBLESTAR) {
2102             kwarg = ast_for_expr(c, CHILD(n, i+1));
2103             if (!kwarg)
2104                 return NULL;
2105             i++;
2106         }
2107     }
2108 
2109     return Call(func, args, keywords, vararg, kwarg, func->lineno,
2110                 func->col_offset, c->c_arena);
2111 }
2112 
2113 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)2114 ast_for_testlist(struct compiling *c, const node* n)
2115 {
2116     /* testlist_comp: test (',' test)* [','] */
2117     /* testlist: test (',' test)* [','] */
2118     /* testlist_safe: test (',' test)+ [','] */
2119     /* testlist1: test (',' test)* */
2120     assert(NCH(n) > 0);
2121     if (TYPE(n) == testlist_comp) {
2122         if (NCH(n) > 1)
2123             assert(TYPE(CHILD(n, 1)) != comp_for);
2124     }
2125     else {
2126         assert(TYPE(n) == testlist ||
2127                TYPE(n) == testlist_safe ||
2128                TYPE(n) == testlist1);
2129     }
2130     if (NCH(n) == 1)
2131         return ast_for_expr(c, CHILD(n, 0));
2132     else {
2133         asdl_seq *tmp = seq_for_testlist(c, n);
2134         if (!tmp)
2135             return NULL;
2136         return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
2137     }
2138 }
2139 
2140 static expr_ty
ast_for_testlist_comp(struct compiling * c,const node * n)2141 ast_for_testlist_comp(struct compiling *c, const node* n)
2142 {
2143     /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2144     /* argument: test [ comp_for ] */
2145     assert(TYPE(n) == testlist_comp || TYPE(n) == argument);
2146     if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == comp_for)
2147         return ast_for_genexp(c, n);
2148     return ast_for_testlist(c, n);
2149 }
2150 
2151 /* like ast_for_testlist() but returns a sequence */
2152 static asdl_seq*
ast_for_class_bases(struct compiling * c,const node * n)2153 ast_for_class_bases(struct compiling *c, const node* n)
2154 {
2155     /* testlist: test (',' test)* [','] */
2156     assert(NCH(n) > 0);
2157     REQ(n, testlist);
2158     if (NCH(n) == 1) {
2159         expr_ty base;
2160         asdl_seq *bases = asdl_seq_new(1, c->c_arena);
2161         if (!bases)
2162             return NULL;
2163         base = ast_for_expr(c, CHILD(n, 0));
2164         if (!base)
2165             return NULL;
2166         asdl_seq_SET(bases, 0, base);
2167         return bases;
2168     }
2169 
2170     return seq_for_testlist(c, n);
2171 }
2172 
2173 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)2174 ast_for_expr_stmt(struct compiling *c, const node *n)
2175 {
2176     REQ(n, expr_stmt);
2177     /* expr_stmt: testlist (augassign (yield_expr|testlist)
2178                 | ('=' (yield_expr|testlist))*)
2179        testlist: test (',' test)* [',']
2180        augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^='
2181                 | '<<=' | '>>=' | '**=' | '//='
2182        test: ... here starts the operator precedence dance
2183      */
2184 
2185     if (NCH(n) == 1) {
2186         expr_ty e = ast_for_testlist(c, CHILD(n, 0));
2187         if (!e)
2188             return NULL;
2189 
2190         return Expr(e, LINENO(n), n->n_col_offset, c->c_arena);
2191     }
2192     else if (TYPE(CHILD(n, 1)) == augassign) {
2193         expr_ty expr1, expr2;
2194         operator_ty newoperator;
2195         node *ch = CHILD(n, 0);
2196 
2197         expr1 = ast_for_testlist(c, ch);
2198         if (!expr1)
2199             return NULL;
2200         if(!set_context(c, expr1, Store, ch))
2201             return NULL;
2202         /* set_context checks that most expressions are not the left side.
2203           Augmented assignments can only have a name, a subscript, or an
2204           attribute on the left, though, so we have to explicitly check for
2205           those. */
2206         switch (expr1->kind) {
2207             case Name_kind:
2208             case Attribute_kind:
2209             case Subscript_kind:
2210                 break;
2211             default:
2212                 ast_error(ch, "illegal expression for augmented assignment");
2213                 return NULL;
2214         }
2215 
2216         ch = CHILD(n, 2);
2217         if (TYPE(ch) == testlist)
2218             expr2 = ast_for_testlist(c, ch);
2219         else
2220             expr2 = ast_for_expr(c, ch);
2221         if (!expr2)
2222             return NULL;
2223 
2224         newoperator = ast_for_augassign(c, CHILD(n, 1));
2225         if (!newoperator)
2226             return NULL;
2227 
2228         return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2229                          c->c_arena);
2230     }
2231     else {
2232         int i;
2233         asdl_seq *targets;
2234         node *value;
2235         expr_ty expression;
2236 
2237         /* a normal assignment */
2238         REQ(CHILD(n, 1), EQUAL);
2239         targets = asdl_seq_new(NCH(n) / 2, c->c_arena);
2240         if (!targets)
2241             return NULL;
2242         for (i = 0; i < NCH(n) - 2; i += 2) {
2243             expr_ty e;
2244             node *ch = CHILD(n, i);
2245             if (TYPE(ch) == yield_expr) {
2246                 ast_error(ch, "assignment to yield expression not possible");
2247                 return NULL;
2248             }
2249             e = ast_for_testlist(c, ch);
2250             if (!e)
2251                 return NULL;
2252 
2253             /* set context to assign */
2254             if (!set_context(c, e, Store, CHILD(n, i)))
2255                 return NULL;
2256 
2257             asdl_seq_SET(targets, i / 2, e);
2258         }
2259         value = CHILD(n, NCH(n) - 1);
2260         if (TYPE(value) == testlist)
2261             expression = ast_for_testlist(c, value);
2262         else
2263             expression = ast_for_expr(c, value);
2264         if (!expression)
2265             return NULL;
2266         return Assign(targets, expression, LINENO(n), n->n_col_offset,
2267                       c->c_arena);
2268     }
2269 }
2270 
2271 static stmt_ty
ast_for_print_stmt(struct compiling * c,const node * n)2272 ast_for_print_stmt(struct compiling *c, const node *n)
2273 {
2274     /* print_stmt: 'print' ( [ test (',' test)* [','] ]
2275                              | '>>' test [ (',' test)+ [','] ] )
2276      */
2277     expr_ty dest = NULL, expression;
2278     asdl_seq *seq = NULL;
2279     bool nl;
2280     int i, j, values_count, start = 1;
2281 
2282     REQ(n, print_stmt);
2283     if (NCH(n) >= 2 && TYPE(CHILD(n, 1)) == RIGHTSHIFT) {
2284         dest = ast_for_expr(c, CHILD(n, 2));
2285         if (!dest)
2286             return NULL;
2287         start = 4;
2288     }
2289     values_count = (NCH(n) + 1 - start) / 2;
2290     if (values_count) {
2291         seq = asdl_seq_new(values_count, c->c_arena);
2292         if (!seq)
2293             return NULL;
2294         for (i = start, j = 0; i < NCH(n); i += 2, ++j) {
2295             expression = ast_for_expr(c, CHILD(n, i));
2296             if (!expression)
2297                 return NULL;
2298             asdl_seq_SET(seq, j, expression);
2299         }
2300     }
2301     nl = (TYPE(CHILD(n, NCH(n) - 1)) == COMMA) ? false : true;
2302     return Print(dest, seq, nl, LINENO(n), n->n_col_offset, c->c_arena);
2303 }
2304 
2305 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)2306 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
2307 {
2308     asdl_seq *seq;
2309     int i;
2310     expr_ty e;
2311 
2312     REQ(n, exprlist);
2313 
2314     seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2315     if (!seq)
2316         return NULL;
2317     for (i = 0; i < NCH(n); i += 2) {
2318         e = ast_for_expr(c, CHILD(n, i));
2319         if (!e)
2320             return NULL;
2321         asdl_seq_SET(seq, i / 2, e);
2322         if (context && !set_context(c, e, context, CHILD(n, i)))
2323             return NULL;
2324     }
2325     return seq;
2326 }
2327 
2328 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)2329 ast_for_del_stmt(struct compiling *c, const node *n)
2330 {
2331     asdl_seq *expr_list;
2332 
2333     /* del_stmt: 'del' exprlist */
2334     REQ(n, del_stmt);
2335 
2336     expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
2337     if (!expr_list)
2338         return NULL;
2339     return Delete(expr_list, LINENO(n), n->n_col_offset, c->c_arena);
2340 }
2341 
2342 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)2343 ast_for_flow_stmt(struct compiling *c, const node *n)
2344 {
2345     /*
2346       flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
2347                  | yield_stmt
2348       break_stmt: 'break'
2349       continue_stmt: 'continue'
2350       return_stmt: 'return' [testlist]
2351       yield_stmt: yield_expr
2352       yield_expr: 'yield' testlist
2353       raise_stmt: 'raise' [test [',' test [',' test]]]
2354     */
2355     node *ch;
2356 
2357     REQ(n, flow_stmt);
2358     ch = CHILD(n, 0);
2359     switch (TYPE(ch)) {
2360         case break_stmt:
2361             return Break(LINENO(n), n->n_col_offset, c->c_arena);
2362         case continue_stmt:
2363             return Continue(LINENO(n), n->n_col_offset, c->c_arena);
2364         case yield_stmt: { /* will reduce to yield_expr */
2365             expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
2366             if (!exp)
2367                 return NULL;
2368             return Expr(exp, LINENO(n), n->n_col_offset, c->c_arena);
2369         }
2370         case return_stmt:
2371             if (NCH(ch) == 1)
2372                 return Return(NULL, LINENO(n), n->n_col_offset, c->c_arena);
2373             else {
2374                 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
2375                 if (!expression)
2376                     return NULL;
2377                 return Return(expression, LINENO(n), n->n_col_offset,
2378                               c->c_arena);
2379             }
2380         case raise_stmt:
2381             if (NCH(ch) == 1)
2382                 return Raise(NULL, NULL, NULL, LINENO(n), n->n_col_offset,
2383                              c->c_arena);
2384             else if (NCH(ch) == 2) {
2385                 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
2386                 if (!expression)
2387                     return NULL;
2388                 return Raise(expression, NULL, NULL, LINENO(n),
2389                              n->n_col_offset, c->c_arena);
2390             }
2391             else if (NCH(ch) == 4) {
2392                 expr_ty expr1, expr2;
2393 
2394                 expr1 = ast_for_expr(c, CHILD(ch, 1));
2395                 if (!expr1)
2396                     return NULL;
2397                 expr2 = ast_for_expr(c, CHILD(ch, 3));
2398                 if (!expr2)
2399                     return NULL;
2400 
2401                 return Raise(expr1, expr2, NULL, LINENO(n), n->n_col_offset,
2402                              c->c_arena);
2403             }
2404             else if (NCH(ch) == 6) {
2405                 expr_ty expr1, expr2, expr3;
2406 
2407                 expr1 = ast_for_expr(c, CHILD(ch, 1));
2408                 if (!expr1)
2409                     return NULL;
2410                 expr2 = ast_for_expr(c, CHILD(ch, 3));
2411                 if (!expr2)
2412                     return NULL;
2413                 expr3 = ast_for_expr(c, CHILD(ch, 5));
2414                 if (!expr3)
2415                     return NULL;
2416 
2417                 return Raise(expr1, expr2, expr3, LINENO(n), n->n_col_offset,
2418                              c->c_arena);
2419             }
2420         default:
2421             PyErr_Format(PyExc_SystemError,
2422                          "unexpected flow_stmt: %d", TYPE(ch));
2423             return NULL;
2424     }
2425 
2426     PyErr_SetString(PyExc_SystemError, "unhandled flow statement");
2427     return NULL;
2428 }
2429 
2430 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)2431 alias_for_import_name(struct compiling *c, const node *n, int store)
2432 {
2433     /*
2434       import_as_name: NAME ['as' NAME]
2435       dotted_as_name: dotted_name ['as' NAME]
2436       dotted_name: NAME ('.' NAME)*
2437     */
2438     PyObject *str, *name;
2439 
2440  loop:
2441     switch (TYPE(n)) {
2442          case import_as_name: {
2443             node *name_node = CHILD(n, 0);
2444             str = NULL;
2445             if (NCH(n) == 3) {
2446                 node *str_node = CHILD(n, 2);
2447                 if (store && !forbidden_check(c, str_node, STR(str_node)))
2448                     return NULL;
2449                 str = NEW_IDENTIFIER(str_node);
2450                 if (!str)
2451                     return NULL;
2452             }
2453             else {
2454                 if (!forbidden_check(c, name_node, STR(name_node)))
2455                     return NULL;
2456             }
2457             name = NEW_IDENTIFIER(name_node);
2458             if (!name)
2459                 return NULL;
2460             return alias(name, str, c->c_arena);
2461         }
2462         case dotted_as_name:
2463             if (NCH(n) == 1) {
2464                 n = CHILD(n, 0);
2465                 goto loop;
2466             }
2467             else {
2468                 node *asname_node = CHILD(n, 2);
2469                 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
2470                 if (!a)
2471                     return NULL;
2472                 assert(!a->asname);
2473                 if (!forbidden_check(c, asname_node, STR(asname_node)))
2474                     return NULL;
2475                 a->asname = NEW_IDENTIFIER(asname_node);
2476                 if (!a->asname)
2477                     return NULL;
2478                 return a;
2479             }
2480             break;
2481         case dotted_name:
2482             if (NCH(n) == 1) {
2483                 node *name_node = CHILD(n, 0);
2484                 if (store && !forbidden_check(c, name_node, STR(name_node)))
2485                     return NULL;
2486                 name = NEW_IDENTIFIER(name_node);
2487                 if (!name)
2488                     return NULL;
2489                 return alias(name, NULL, c->c_arena);
2490             }
2491             else {
2492                 /* Create a string of the form "a.b.c" */
2493                 int i;
2494                 size_t len;
2495                 char *s;
2496 
2497                 len = 0;
2498                 for (i = 0; i < NCH(n); i += 2)
2499                     /* length of string plus one for the dot */
2500                     len += strlen(STR(CHILD(n, i))) + 1;
2501                 len--; /* the last name doesn't have a dot */
2502                 str = PyString_FromStringAndSize(NULL, len);
2503                 if (!str)
2504                     return NULL;
2505                 s = PyString_AS_STRING(str);
2506                 if (!s)
2507                     return NULL;
2508                 for (i = 0; i < NCH(n); i += 2) {
2509                     char *sch = STR(CHILD(n, i));
2510                     strcpy(s, STR(CHILD(n, i)));
2511                     s += strlen(sch);
2512                     *s++ = '.';
2513                 }
2514                 --s;
2515                 *s = '\0';
2516                 PyString_InternInPlace(&str);
2517                 PyArena_AddPyObject(c->c_arena, str);
2518                 return alias(str, NULL, c->c_arena);
2519             }
2520             break;
2521         case STAR:
2522             str = PyString_InternFromString("*");
2523             PyArena_AddPyObject(c->c_arena, str);
2524             return alias(str, NULL, c->c_arena);
2525         default:
2526             PyErr_Format(PyExc_SystemError,
2527                          "unexpected import name: %d", TYPE(n));
2528             return NULL;
2529     }
2530 
2531     PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
2532     return NULL;
2533 }
2534 
2535 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)2536 ast_for_import_stmt(struct compiling *c, const node *n)
2537 {
2538     /*
2539       import_stmt: import_name | import_from
2540       import_name: 'import' dotted_as_names
2541       import_from: 'from' ('.'* dotted_name | '.') 'import'
2542                           ('*' | '(' import_as_names ')' | import_as_names)
2543     */
2544     int lineno;
2545     int col_offset;
2546     int i;
2547     asdl_seq *aliases;
2548 
2549     REQ(n, import_stmt);
2550     lineno = LINENO(n);
2551     col_offset = n->n_col_offset;
2552     n = CHILD(n, 0);
2553     if (TYPE(n) == import_name) {
2554         n = CHILD(n, 1);
2555         REQ(n, dotted_as_names);
2556         aliases = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2557         if (!aliases)
2558             return NULL;
2559         for (i = 0; i < NCH(n); i += 2) {
2560             alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
2561             if (!import_alias)
2562                 return NULL;
2563             asdl_seq_SET(aliases, i / 2, import_alias);
2564         }
2565         return Import(aliases, lineno, col_offset, c->c_arena);
2566     }
2567     else if (TYPE(n) == import_from) {
2568         int n_children;
2569         int idx, ndots = 0;
2570         alias_ty mod = NULL;
2571         identifier modname = NULL;
2572 
2573        /* Count the number of dots (for relative imports) and check for the
2574           optional module name */
2575         for (idx = 1; idx < NCH(n); idx++) {
2576             if (TYPE(CHILD(n, idx)) == dotted_name) {
2577                 mod = alias_for_import_name(c, CHILD(n, idx), 0);
2578                 if (!mod)
2579                     return NULL;
2580                 idx++;
2581                 break;
2582             } else if (TYPE(CHILD(n, idx)) != DOT) {
2583                 break;
2584             }
2585             ndots++;
2586         }
2587         idx++; /* skip over the 'import' keyword */
2588         switch (TYPE(CHILD(n, idx))) {
2589         case STAR:
2590             /* from ... import * */
2591             n = CHILD(n, idx);
2592             n_children = 1;
2593             break;
2594         case LPAR:
2595             /* from ... import (x, y, z) */
2596             n = CHILD(n, idx + 1);
2597             n_children = NCH(n);
2598             break;
2599         case import_as_names:
2600             /* from ... import x, y, z */
2601             n = CHILD(n, idx);
2602             n_children = NCH(n);
2603             if (n_children % 2 == 0) {
2604                 ast_error(n, "trailing comma not allowed without"
2605                              " surrounding parentheses");
2606                 return NULL;
2607             }
2608             break;
2609         default:
2610             ast_error(n, "Unexpected node-type in from-import");
2611             return NULL;
2612         }
2613 
2614         aliases = asdl_seq_new((n_children + 1) / 2, c->c_arena);
2615         if (!aliases)
2616             return NULL;
2617 
2618         /* handle "from ... import *" special b/c there's no children */
2619         if (TYPE(n) == STAR) {
2620             alias_ty import_alias = alias_for_import_name(c, n, 1);
2621             if (!import_alias)
2622                 return NULL;
2623                 asdl_seq_SET(aliases, 0, import_alias);
2624         }
2625         else {
2626             for (i = 0; i < NCH(n); i += 2) {
2627                 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
2628                 if (!import_alias)
2629                     return NULL;
2630                     asdl_seq_SET(aliases, i / 2, import_alias);
2631             }
2632         }
2633         if (mod != NULL)
2634             modname = mod->name;
2635         return ImportFrom(modname, aliases, ndots, lineno, col_offset,
2636                           c->c_arena);
2637     }
2638     PyErr_Format(PyExc_SystemError,
2639                  "unknown import statement: starts with command '%s'",
2640                  STR(CHILD(n, 0)));
2641     return NULL;
2642 }
2643 
2644 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)2645 ast_for_global_stmt(struct compiling *c, const node *n)
2646 {
2647     /* global_stmt: 'global' NAME (',' NAME)* */
2648     identifier name;
2649     asdl_seq *s;
2650     int i;
2651 
2652     REQ(n, global_stmt);
2653     s = asdl_seq_new(NCH(n) / 2, c->c_arena);
2654     if (!s)
2655         return NULL;
2656     for (i = 1; i < NCH(n); i += 2) {
2657         name = NEW_IDENTIFIER(CHILD(n, i));
2658         if (!name)
2659             return NULL;
2660         asdl_seq_SET(s, i / 2, name);
2661     }
2662     return Global(s, LINENO(n), n->n_col_offset, c->c_arena);
2663 }
2664 
2665 static stmt_ty
ast_for_exec_stmt(struct compiling * c,const node * n)2666 ast_for_exec_stmt(struct compiling *c, const node *n)
2667 {
2668     expr_ty expr1, globals = NULL, locals = NULL;
2669     int n_children = NCH(n);
2670     if (n_children != 2 && n_children != 4 && n_children != 6) {
2671         PyErr_Format(PyExc_SystemError,
2672                      "poorly formed 'exec' statement: %d parts to statement",
2673                      n_children);
2674         return NULL;
2675     }
2676 
2677     /* exec_stmt: 'exec' expr ['in' test [',' test]] */
2678     REQ(n, exec_stmt);
2679     expr1 = ast_for_expr(c, CHILD(n, 1));
2680     if (!expr1)
2681         return NULL;
2682 
2683     if (expr1->kind == Tuple_kind && n_children < 4 &&
2684         (asdl_seq_LEN(expr1->v.Tuple.elts) == 2 ||
2685          asdl_seq_LEN(expr1->v.Tuple.elts) == 3)) {
2686         /* Backwards compatibility: passing exec args as a tuple */
2687         globals = asdl_seq_GET(expr1->v.Tuple.elts, 1);
2688         if (asdl_seq_LEN(expr1->v.Tuple.elts) == 3) {
2689             locals = asdl_seq_GET(expr1->v.Tuple.elts, 2);
2690         }
2691         expr1 = asdl_seq_GET(expr1->v.Tuple.elts, 0);
2692     }
2693 
2694     if (n_children >= 4) {
2695         globals = ast_for_expr(c, CHILD(n, 3));
2696         if (!globals)
2697             return NULL;
2698     }
2699     if (n_children == 6) {
2700         locals = ast_for_expr(c, CHILD(n, 5));
2701         if (!locals)
2702             return NULL;
2703     }
2704 
2705     return Exec(expr1, globals, locals, LINENO(n), n->n_col_offset,
2706                 c->c_arena);
2707 }
2708 
2709 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)2710 ast_for_assert_stmt(struct compiling *c, const node *n)
2711 {
2712     /* assert_stmt: 'assert' test [',' test] */
2713     REQ(n, assert_stmt);
2714     if (NCH(n) == 2) {
2715         expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2716         if (!expression)
2717             return NULL;
2718         return Assert(expression, NULL, LINENO(n), n->n_col_offset,
2719                       c->c_arena);
2720     }
2721     else if (NCH(n) == 4) {
2722         expr_ty expr1, expr2;
2723 
2724         expr1 = ast_for_expr(c, CHILD(n, 1));
2725         if (!expr1)
2726             return NULL;
2727         expr2 = ast_for_expr(c, CHILD(n, 3));
2728         if (!expr2)
2729             return NULL;
2730 
2731         return Assert(expr1, expr2, LINENO(n), n->n_col_offset, c->c_arena);
2732     }
2733     PyErr_Format(PyExc_SystemError,
2734                  "improper number of parts to 'assert' statement: %d",
2735                  NCH(n));
2736     return NULL;
2737 }
2738 
2739 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)2740 ast_for_suite(struct compiling *c, const node *n)
2741 {
2742     /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
2743     asdl_seq *seq;
2744     stmt_ty s;
2745     int i, total, num, end, pos = 0;
2746     node *ch;
2747 
2748     REQ(n, suite);
2749 
2750     total = num_stmts(n);
2751     seq = asdl_seq_new(total, c->c_arena);
2752     if (!seq)
2753         return NULL;
2754     if (TYPE(CHILD(n, 0)) == simple_stmt) {
2755         n = CHILD(n, 0);
2756         /* simple_stmt always ends with a NEWLINE,
2757            and may have a trailing SEMI
2758         */
2759         end = NCH(n) - 1;
2760         if (TYPE(CHILD(n, end - 1)) == SEMI)
2761             end--;
2762         /* loop by 2 to skip semi-colons */
2763         for (i = 0; i < end; i += 2) {
2764             ch = CHILD(n, i);
2765             s = ast_for_stmt(c, ch);
2766             if (!s)
2767                 return NULL;
2768             asdl_seq_SET(seq, pos++, s);
2769         }
2770     }
2771     else {
2772         for (i = 2; i < (NCH(n) - 1); i++) {
2773             ch = CHILD(n, i);
2774             REQ(ch, stmt);
2775             num = num_stmts(ch);
2776             if (num == 1) {
2777                 /* small_stmt or compound_stmt with only one child */
2778                 s = ast_for_stmt(c, ch);
2779                 if (!s)
2780                     return NULL;
2781                 asdl_seq_SET(seq, pos++, s);
2782             }
2783             else {
2784                 int j;
2785                 ch = CHILD(ch, 0);
2786                 REQ(ch, simple_stmt);
2787                 for (j = 0; j < NCH(ch); j += 2) {
2788                     /* statement terminates with a semi-colon ';' */
2789                     if (NCH(CHILD(ch, j)) == 0) {
2790                         assert((j + 1) == NCH(ch));
2791                         break;
2792                     }
2793                     s = ast_for_stmt(c, CHILD(ch, j));
2794                     if (!s)
2795                         return NULL;
2796                     asdl_seq_SET(seq, pos++, s);
2797                 }
2798             }
2799         }
2800     }
2801     assert(pos == seq->size);
2802     return seq;
2803 }
2804 
2805 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)2806 ast_for_if_stmt(struct compiling *c, const node *n)
2807 {
2808     /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
2809        ['else' ':' suite]
2810     */
2811     char *s;
2812 
2813     REQ(n, if_stmt);
2814 
2815     if (NCH(n) == 4) {
2816         expr_ty expression;
2817         asdl_seq *suite_seq;
2818 
2819         expression = ast_for_expr(c, CHILD(n, 1));
2820         if (!expression)
2821             return NULL;
2822         suite_seq = ast_for_suite(c, CHILD(n, 3));
2823         if (!suite_seq)
2824             return NULL;
2825 
2826         return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
2827                   c->c_arena);
2828     }
2829 
2830     s = STR(CHILD(n, 4));
2831     /* s[2], the third character in the string, will be
2832        's' for el_s_e, or
2833        'i' for el_i_f
2834     */
2835     if (s[2] == 's') {
2836         expr_ty expression;
2837         asdl_seq *seq1, *seq2;
2838 
2839         expression = ast_for_expr(c, CHILD(n, 1));
2840         if (!expression)
2841             return NULL;
2842         seq1 = ast_for_suite(c, CHILD(n, 3));
2843         if (!seq1)
2844             return NULL;
2845         seq2 = ast_for_suite(c, CHILD(n, 6));
2846         if (!seq2)
2847             return NULL;
2848 
2849         return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
2850                   c->c_arena);
2851     }
2852     else if (s[2] == 'i') {
2853         int i, n_elif, has_else = 0;
2854         expr_ty expression;
2855         asdl_seq *suite_seq;
2856         asdl_seq *orelse = NULL;
2857         n_elif = NCH(n) - 4;
2858         /* must reference the child n_elif+1 since 'else' token is third,
2859            not fourth, child from the end. */
2860         if (TYPE(CHILD(n, (n_elif + 1))) == NAME
2861             && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
2862             has_else = 1;
2863             n_elif -= 3;
2864         }
2865         n_elif /= 4;
2866 
2867         if (has_else) {
2868             asdl_seq *suite_seq2;
2869 
2870             orelse = asdl_seq_new(1, c->c_arena);
2871             if (!orelse)
2872                 return NULL;
2873             expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
2874             if (!expression)
2875                 return NULL;
2876             suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
2877             if (!suite_seq)
2878                 return NULL;
2879             suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
2880             if (!suite_seq2)
2881                 return NULL;
2882 
2883             asdl_seq_SET(orelse, 0,
2884                          If(expression, suite_seq, suite_seq2,
2885                             LINENO(CHILD(n, NCH(n) - 6)),
2886                             CHILD(n, NCH(n) - 6)->n_col_offset,
2887                             c->c_arena));
2888             /* the just-created orelse handled the last elif */
2889             n_elif--;
2890         }
2891 
2892         for (i = 0; i < n_elif; i++) {
2893             int off = 5 + (n_elif - i - 1) * 4;
2894             asdl_seq *newobj = asdl_seq_new(1, c->c_arena);
2895             if (!newobj)
2896                 return NULL;
2897             expression = ast_for_expr(c, CHILD(n, off));
2898             if (!expression)
2899                 return NULL;
2900             suite_seq = ast_for_suite(c, CHILD(n, off + 2));
2901             if (!suite_seq)
2902                 return NULL;
2903 
2904             asdl_seq_SET(newobj, 0,
2905                          If(expression, suite_seq, orelse,
2906                             LINENO(CHILD(n, off)),
2907                             CHILD(n, off)->n_col_offset, c->c_arena));
2908             orelse = newobj;
2909         }
2910         expression = ast_for_expr(c, CHILD(n, 1));
2911         if (!expression)
2912             return NULL;
2913         suite_seq = ast_for_suite(c, CHILD(n, 3));
2914         if (!suite_seq)
2915             return NULL;
2916         return If(expression, suite_seq, orelse,
2917                   LINENO(n), n->n_col_offset, c->c_arena);
2918     }
2919 
2920     PyErr_Format(PyExc_SystemError,
2921                  "unexpected token in 'if' statement: %s", s);
2922     return NULL;
2923 }
2924 
2925 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)2926 ast_for_while_stmt(struct compiling *c, const node *n)
2927 {
2928     /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
2929     REQ(n, while_stmt);
2930 
2931     if (NCH(n) == 4) {
2932         expr_ty expression;
2933         asdl_seq *suite_seq;
2934 
2935         expression = ast_for_expr(c, CHILD(n, 1));
2936         if (!expression)
2937             return NULL;
2938         suite_seq = ast_for_suite(c, CHILD(n, 3));
2939         if (!suite_seq)
2940             return NULL;
2941         return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
2942                      c->c_arena);
2943     }
2944     else if (NCH(n) == 7) {
2945         expr_ty expression;
2946         asdl_seq *seq1, *seq2;
2947 
2948         expression = ast_for_expr(c, CHILD(n, 1));
2949         if (!expression)
2950             return NULL;
2951         seq1 = ast_for_suite(c, CHILD(n, 3));
2952         if (!seq1)
2953             return NULL;
2954         seq2 = ast_for_suite(c, CHILD(n, 6));
2955         if (!seq2)
2956             return NULL;
2957 
2958         return While(expression, seq1, seq2, LINENO(n), n->n_col_offset,
2959                      c->c_arena);
2960     }
2961 
2962     PyErr_Format(PyExc_SystemError,
2963                  "wrong number of tokens for 'while' statement: %d",
2964                  NCH(n));
2965     return NULL;
2966 }
2967 
2968 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n)2969 ast_for_for_stmt(struct compiling *c, const node *n)
2970 {
2971     asdl_seq *_target, *seq = NULL, *suite_seq;
2972     expr_ty expression;
2973     expr_ty target, first;
2974     const node *node_target;
2975     /* for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] */
2976     REQ(n, for_stmt);
2977 
2978     if (NCH(n) == 9) {
2979         seq = ast_for_suite(c, CHILD(n, 8));
2980         if (!seq)
2981             return NULL;
2982     }
2983 
2984     node_target = CHILD(n, 1);
2985     _target = ast_for_exprlist(c, node_target, Store);
2986     if (!_target)
2987         return NULL;
2988     /* Check the # of children rather than the length of _target, since
2989        for x, in ... has 1 element in _target, but still requires a Tuple. */
2990     first = (expr_ty)asdl_seq_GET(_target, 0);
2991     if (NCH(node_target) == 1)
2992         target = first;
2993     else
2994         target = Tuple(_target, Store, first->lineno, first->col_offset, c->c_arena);
2995 
2996     expression = ast_for_testlist(c, CHILD(n, 3));
2997     if (!expression)
2998         return NULL;
2999     suite_seq = ast_for_suite(c, CHILD(n, 5));
3000     if (!suite_seq)
3001         return NULL;
3002 
3003     return For(target, expression, suite_seq, seq, LINENO(n), n->n_col_offset,
3004                c->c_arena);
3005 }
3006 
3007 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)3008 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
3009 {
3010     /* except_clause: 'except' [test [(',' | 'as') test]] */
3011     REQ(exc, except_clause);
3012     REQ(body, suite);
3013 
3014     if (NCH(exc) == 1) {
3015         asdl_seq *suite_seq = ast_for_suite(c, body);
3016         if (!suite_seq)
3017             return NULL;
3018 
3019         return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
3020                              exc->n_col_offset, c->c_arena);
3021     }
3022     else if (NCH(exc) == 2) {
3023         expr_ty expression;
3024         asdl_seq *suite_seq;
3025 
3026         expression = ast_for_expr(c, CHILD(exc, 1));
3027         if (!expression)
3028             return NULL;
3029         suite_seq = ast_for_suite(c, body);
3030         if (!suite_seq)
3031             return NULL;
3032 
3033         return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
3034                              exc->n_col_offset, c->c_arena);
3035     }
3036     else if (NCH(exc) == 4) {
3037         asdl_seq *suite_seq;
3038         expr_ty expression;
3039         expr_ty e = ast_for_expr(c, CHILD(exc, 3));
3040         if (!e)
3041             return NULL;
3042         if (!set_context(c, e, Store, CHILD(exc, 3)))
3043             return NULL;
3044         expression = ast_for_expr(c, CHILD(exc, 1));
3045         if (!expression)
3046             return NULL;
3047         suite_seq = ast_for_suite(c, body);
3048         if (!suite_seq)
3049             return NULL;
3050 
3051         return ExceptHandler(expression, e, suite_seq, LINENO(exc),
3052                              exc->n_col_offset, c->c_arena);
3053     }
3054 
3055     PyErr_Format(PyExc_SystemError,
3056                  "wrong number of children for 'except' clause: %d",
3057                  NCH(exc));
3058     return NULL;
3059 }
3060 
3061 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)3062 ast_for_try_stmt(struct compiling *c, const node *n)
3063 {
3064     const int nch = NCH(n);
3065     int n_except = (nch - 3)/3;
3066     asdl_seq *body, *orelse = NULL, *finally = NULL;
3067 
3068     REQ(n, try_stmt);
3069 
3070     body = ast_for_suite(c, CHILD(n, 2));
3071     if (body == NULL)
3072         return NULL;
3073 
3074     if (TYPE(CHILD(n, nch - 3)) == NAME) {
3075         if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
3076             if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
3077                 /* we can assume it's an "else",
3078                    because nch >= 9 for try-else-finally and
3079                    it would otherwise have a type of except_clause */
3080                 orelse = ast_for_suite(c, CHILD(n, nch - 4));
3081                 if (orelse == NULL)
3082                     return NULL;
3083                 n_except--;
3084             }
3085 
3086             finally = ast_for_suite(c, CHILD(n, nch - 1));
3087             if (finally == NULL)
3088                 return NULL;
3089             n_except--;
3090         }
3091         else {
3092             /* we can assume it's an "else",
3093                otherwise it would have a type of except_clause */
3094             orelse = ast_for_suite(c, CHILD(n, nch - 1));
3095             if (orelse == NULL)
3096                 return NULL;
3097             n_except--;
3098         }
3099     }
3100     else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
3101         ast_error(n, "malformed 'try' statement");
3102         return NULL;
3103     }
3104 
3105     if (n_except > 0) {
3106         int i;
3107         stmt_ty except_st;
3108         /* process except statements to create a try ... except */
3109         asdl_seq *handlers = asdl_seq_new(n_except, c->c_arena);
3110         if (handlers == NULL)
3111             return NULL;
3112 
3113         for (i = 0; i < n_except; i++) {
3114             excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
3115                                                        CHILD(n, 5 + i * 3));
3116             if (!e)
3117                 return NULL;
3118             asdl_seq_SET(handlers, i, e);
3119         }
3120 
3121         except_st = TryExcept(body, handlers, orelse, LINENO(n),
3122                               n->n_col_offset, c->c_arena);
3123         if (!finally)
3124             return except_st;
3125 
3126         /* if a 'finally' is present too, we nest the TryExcept within a
3127            TryFinally to emulate try ... except ... finally */
3128         body = asdl_seq_new(1, c->c_arena);
3129         if (body == NULL)
3130             return NULL;
3131         asdl_seq_SET(body, 0, except_st);
3132     }
3133 
3134     /* must be a try ... finally (except clauses are in body, if any exist) */
3135     assert(finally != NULL);
3136     return TryFinally(body, finally, LINENO(n), n->n_col_offset, c->c_arena);
3137 }
3138 
3139 /* with_item: test ['as' expr] */
3140 static stmt_ty
ast_for_with_item(struct compiling * c,const node * n,asdl_seq * content)3141 ast_for_with_item(struct compiling *c, const node *n, asdl_seq *content)
3142 {
3143     expr_ty context_expr, optional_vars = NULL;
3144 
3145     REQ(n, with_item);
3146     context_expr = ast_for_expr(c, CHILD(n, 0));
3147     if (!context_expr)
3148         return NULL;
3149     if (NCH(n) == 3) {
3150         optional_vars = ast_for_expr(c, CHILD(n, 2));
3151 
3152         if (!optional_vars) {
3153             return NULL;
3154         }
3155         if (!set_context(c, optional_vars, Store, n)) {
3156             return NULL;
3157         }
3158     }
3159 
3160     return With(context_expr, optional_vars, content, LINENO(n),
3161                 n->n_col_offset, c->c_arena);
3162 }
3163 
3164 /* with_stmt: 'with' with_item (',' with_item)* ':' suite */
3165 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n)3166 ast_for_with_stmt(struct compiling *c, const node *n)
3167 {
3168     int i;
3169     stmt_ty ret;
3170     asdl_seq *inner;
3171 
3172     REQ(n, with_stmt);
3173 
3174     /* process the with items inside-out */
3175     i = NCH(n) - 1;
3176     /* the suite of the innermost with item is the suite of the with stmt */
3177     inner = ast_for_suite(c, CHILD(n, i));
3178     if (!inner)
3179         return NULL;
3180 
3181     for (;;) {
3182         i -= 2;
3183         ret = ast_for_with_item(c, CHILD(n, i), inner);
3184         if (!ret)
3185             return NULL;
3186         /* was this the last item? */
3187         if (i == 1)
3188             break;
3189         /* if not, wrap the result so far in a new sequence */
3190         inner = asdl_seq_new(1, c->c_arena);
3191         if (!inner)
3192             return NULL;
3193         asdl_seq_SET(inner, 0, ret);
3194     }
3195 
3196     return ret;
3197 }
3198 
3199 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)3200 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
3201 {
3202     /* classdef: 'class' NAME ['(' testlist ')'] ':' suite */
3203     PyObject *classname;
3204     asdl_seq *bases, *s;
3205 
3206     REQ(n, classdef);
3207 
3208     if (!forbidden_check(c, n, STR(CHILD(n, 1))))
3209             return NULL;
3210 
3211     if (NCH(n) == 4) {
3212         s = ast_for_suite(c, CHILD(n, 3));
3213         if (!s)
3214             return NULL;
3215         classname = NEW_IDENTIFIER(CHILD(n, 1));
3216         if (!classname)
3217             return NULL;
3218         return ClassDef(classname, NULL, s, decorator_seq, LINENO(n),
3219                         n->n_col_offset, c->c_arena);
3220     }
3221     /* check for empty base list */
3222     if (TYPE(CHILD(n,3)) == RPAR) {
3223         s = ast_for_suite(c, CHILD(n,5));
3224         if (!s)
3225             return NULL;
3226         classname = NEW_IDENTIFIER(CHILD(n, 1));
3227         if (!classname)
3228             return NULL;
3229         return ClassDef(classname, NULL, s, decorator_seq, LINENO(n),
3230                         n->n_col_offset, c->c_arena);
3231     }
3232 
3233     /* else handle the base class list */
3234     bases = ast_for_class_bases(c, CHILD(n, 3));
3235     if (!bases)
3236         return NULL;
3237 
3238     s = ast_for_suite(c, CHILD(n, 6));
3239     if (!s)
3240         return NULL;
3241     classname = NEW_IDENTIFIER(CHILD(n, 1));
3242     if (!classname)
3243         return NULL;
3244     return ClassDef(classname, bases, s, decorator_seq,
3245                     LINENO(n), n->n_col_offset, c->c_arena);
3246 }
3247 
3248 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)3249 ast_for_stmt(struct compiling *c, const node *n)
3250 {
3251     if (TYPE(n) == stmt) {
3252         assert(NCH(n) == 1);
3253         n = CHILD(n, 0);
3254     }
3255     if (TYPE(n) == simple_stmt) {
3256         assert(num_stmts(n) == 1);
3257         n = CHILD(n, 0);
3258     }
3259     if (TYPE(n) == small_stmt) {
3260         n = CHILD(n, 0);
3261         /* small_stmt: expr_stmt | print_stmt  | del_stmt | pass_stmt
3262                      | flow_stmt | import_stmt | global_stmt | exec_stmt
3263                      | assert_stmt
3264         */
3265         switch (TYPE(n)) {
3266             case expr_stmt:
3267                 return ast_for_expr_stmt(c, n);
3268             case print_stmt:
3269                 return ast_for_print_stmt(c, n);
3270             case del_stmt:
3271                 return ast_for_del_stmt(c, n);
3272             case pass_stmt:
3273                 return Pass(LINENO(n), n->n_col_offset, c->c_arena);
3274             case flow_stmt:
3275                 return ast_for_flow_stmt(c, n);
3276             case import_stmt:
3277                 return ast_for_import_stmt(c, n);
3278             case global_stmt:
3279                 return ast_for_global_stmt(c, n);
3280             case exec_stmt:
3281                 return ast_for_exec_stmt(c, n);
3282             case assert_stmt:
3283                 return ast_for_assert_stmt(c, n);
3284             default:
3285                 PyErr_Format(PyExc_SystemError,
3286                              "unhandled small_stmt: TYPE=%d NCH=%d\n",
3287                              TYPE(n), NCH(n));
3288                 return NULL;
3289         }
3290     }
3291     else {
3292         /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
3293                         | funcdef | classdef | decorated
3294         */
3295         node *ch = CHILD(n, 0);
3296         REQ(n, compound_stmt);
3297         switch (TYPE(ch)) {
3298             case if_stmt:
3299                 return ast_for_if_stmt(c, ch);
3300             case while_stmt:
3301                 return ast_for_while_stmt(c, ch);
3302             case for_stmt:
3303                 return ast_for_for_stmt(c, ch);
3304             case try_stmt:
3305                 return ast_for_try_stmt(c, ch);
3306             case with_stmt:
3307                 return ast_for_with_stmt(c, ch);
3308             case funcdef:
3309                 return ast_for_funcdef(c, ch, NULL);
3310             case classdef:
3311                 return ast_for_classdef(c, ch, NULL);
3312             case decorated:
3313                 return ast_for_decorated(c, ch);
3314             default:
3315                 PyErr_Format(PyExc_SystemError,
3316                              "unhandled small_stmt: TYPE=%d NCH=%d\n",
3317                              TYPE(n), NCH(n));
3318                 return NULL;
3319         }
3320     }
3321 }
3322 
3323 static PyObject *
parsenumber(struct compiling * c,const char * s)3324 parsenumber(struct compiling *c, const char *s)
3325 {
3326         const char *end;
3327         long x;
3328         double dx;
3329 #ifndef WITHOUT_COMPLEX
3330         Py_complex complex;
3331         int imflag;
3332 #endif
3333 
3334         assert(s != NULL);
3335         errno = 0;
3336         end = s + strlen(s) - 1;
3337 #ifndef WITHOUT_COMPLEX
3338         imflag = *end == 'j' || *end == 'J';
3339 #endif
3340         if (*end == 'l' || *end == 'L')
3341                 return PyLong_FromString((char *)s, (char **)0, 0);
3342         x = PyOS_strtol((char *)s, (char **)&end, 0);
3343         if (*end == '\0') {
3344                 if (errno != 0)
3345                         return PyLong_FromString((char *)s, (char **)0, 0);
3346                 return PyInt_FromLong(x);
3347         }
3348         /* XXX Huge floats may silently fail */
3349 #ifndef WITHOUT_COMPLEX
3350         if (imflag) {
3351                 complex.real = 0.;
3352                 complex.imag = PyOS_string_to_double(s, (char **)&end, NULL);
3353                 if (complex.imag == -1.0 && PyErr_Occurred())
3354                         return NULL;
3355                 return PyComplex_FromCComplex(complex);
3356         }
3357         else
3358 #endif
3359         {
3360                 dx = PyOS_string_to_double(s, NULL, NULL);
3361                 if (dx == -1.0 && PyErr_Occurred())
3362                         return NULL;
3363                 return PyFloat_FromDouble(dx);
3364         }
3365 }
3366 
3367 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end,char * encoding)3368 decode_utf8(struct compiling *c, const char **sPtr, const char *end, char* encoding)
3369 {
3370 #ifndef Py_USING_UNICODE
3371         Py_FatalError("decode_utf8 should not be called in this build.");
3372         return NULL;
3373 #else
3374         PyObject *u, *v;
3375         char *s, *t;
3376         t = s = (char *)*sPtr;
3377         /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
3378         while (s < end && (*s & 0x80)) s++;
3379         *sPtr = s;
3380         u = PyUnicode_DecodeUTF8(t, s - t, NULL);
3381         if (u == NULL)
3382                 return NULL;
3383         v = PyUnicode_AsEncodedString(u, encoding, NULL);
3384         Py_DECREF(u);
3385         return v;
3386 #endif
3387 }
3388 
3389 #ifdef Py_USING_UNICODE
3390 static PyObject *
decode_unicode(struct compiling * c,const char * s,size_t len,int rawmode,const char * encoding)3391 decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, const char *encoding)
3392 {
3393         PyObject *v;
3394         PyObject *u = NULL;
3395         char *buf;
3396         char *p;
3397         const char *end;
3398         if (encoding != NULL && strcmp(encoding, "iso-8859-1")) {
3399                 /* check for integer overflow */
3400                 if (len > PY_SIZE_MAX / 6)
3401                         return NULL;
3402                 /* "<C3><A4>" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
3403                    "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
3404                 u = PyString_FromStringAndSize((char *)NULL, len * 6);
3405                 if (u == NULL)
3406                         return NULL;
3407                 p = buf = PyString_AsString(u);
3408                 end = s + len;
3409                 while (s < end) {
3410                         if (*s == '\\') {
3411                                 *p++ = *s++;
3412                                 if (*s & 0x80) {
3413                                         strcpy(p, "u005c");
3414                                         p += 5;
3415                                 }
3416                         }
3417                         if (*s & 0x80) { /* XXX inefficient */
3418                                 PyObject *w;
3419                                 char *r;
3420                                 Py_ssize_t rn, i;
3421                                 w = decode_utf8(c, &s, end, "utf-32-be");
3422                                 if (w == NULL) {
3423                                         Py_DECREF(u);
3424                                         return NULL;
3425                                 }
3426                                 r = PyString_AsString(w);
3427                                 rn = PyString_Size(w);
3428                                 assert(rn % 4 == 0);
3429                                 for (i = 0; i < rn; i += 4) {
3430                                         sprintf(p, "\\U%02x%02x%02x%02x",
3431                                                 r[i + 0] & 0xFF,
3432                                                 r[i + 1] & 0xFF,
3433                                                 r[i + 2] & 0xFF,
3434                                                 r[i + 3] & 0xFF);
3435                                         p += 10;
3436                                 }
3437                                 Py_DECREF(w);
3438                         } else {
3439                                 *p++ = *s++;
3440                         }
3441                 }
3442                 len = p - buf;
3443                 s = buf;
3444         }
3445         if (rawmode)
3446                 v = PyUnicode_DecodeRawUnicodeEscape(s, len, NULL);
3447         else
3448                 v = PyUnicode_DecodeUnicodeEscape(s, len, NULL);
3449         Py_XDECREF(u);
3450         return v;
3451 }
3452 #endif
3453 
3454 /* s is a Python string literal, including the bracketing quote characters,
3455  * and r &/or u prefixes (if any), and embedded escape sequences (if any).
3456  * parsestr parses it, and returns the decoded Python string object.
3457  */
3458 static PyObject *
parsestr(struct compiling * c,const node * n,const char * s)3459 parsestr(struct compiling *c, const node *n, const char *s)
3460 {
3461         size_t len, i;
3462         int quote = Py_CHARMASK(*s);
3463         int rawmode = 0;
3464         int need_encoding;
3465         int unicode = c->c_future_unicode;
3466         int bytes = 0;
3467 
3468         if (isalpha(quote) || quote == '_') {
3469                 if (quote == 'u' || quote == 'U') {
3470                         quote = *++s;
3471                         unicode = 1;
3472                 }
3473                 if (quote == 'b' || quote == 'B') {
3474                         quote = *++s;
3475                         unicode = 0;
3476                         bytes = 1;
3477                 }
3478                 if (quote == 'r' || quote == 'R') {
3479                         quote = *++s;
3480                         rawmode = 1;
3481                 }
3482         }
3483         if (quote != '\'' && quote != '\"') {
3484                 PyErr_BadInternalCall();
3485                 return NULL;
3486         }
3487         s++;
3488         len = strlen(s);
3489         if (len > INT_MAX) {
3490                 PyErr_SetString(PyExc_OverflowError,
3491                                 "string to parse is too long");
3492                 return NULL;
3493         }
3494         if (s[--len] != quote) {
3495                 PyErr_BadInternalCall();
3496                 return NULL;
3497         }
3498         if (len >= 4 && s[0] == quote && s[1] == quote) {
3499                 s += 2;
3500                 len -= 2;
3501                 if (s[--len] != quote || s[--len] != quote) {
3502                         PyErr_BadInternalCall();
3503                         return NULL;
3504                 }
3505         }
3506         if (Py_Py3kWarningFlag && bytes) {
3507             for (i = 0; i < len; i++) {
3508                 if ((unsigned char)s[i] > 127) {
3509                     if (!ast_warn(c, n,
3510                         "non-ascii bytes literals not supported in 3.x"))
3511                         return NULL;
3512                     break;
3513                 }
3514             }
3515         }
3516 #ifdef Py_USING_UNICODE
3517         if (unicode || Py_UnicodeFlag) {
3518                 return decode_unicode(c, s, len, rawmode, c->c_encoding);
3519         }
3520 #endif
3521         need_encoding = (c->c_encoding != NULL &&
3522                          strcmp(c->c_encoding, "utf-8") != 0 &&
3523                          strcmp(c->c_encoding, "iso-8859-1") != 0);
3524         if (rawmode || strchr(s, '\\') == NULL) {
3525                 if (need_encoding) {
3526 #ifndef Py_USING_UNICODE
3527                         /* This should not happen - we never see any other
3528                            encoding. */
3529                         Py_FatalError(
3530                             "cannot deal with encodings in this build.");
3531 #else
3532                         PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);
3533                         if (u == NULL)
3534                                 return NULL;
3535                         v = PyUnicode_AsEncodedString(u, c->c_encoding, NULL);
3536                         Py_DECREF(u);
3537                         return v;
3538 #endif
3539                 } else {
3540                         return PyString_FromStringAndSize(s, len);
3541                 }
3542         }
3543 
3544         return PyString_DecodeEscape(s, len, NULL, unicode,
3545                                      need_encoding ? c->c_encoding : NULL);
3546 }
3547 
3548 /* Build a Python string object out of a STRING atom.  This takes care of
3549  * compile-time literal catenation, calling parsestr() on each piece, and
3550  * pasting the intermediate results together.
3551  */
3552 static PyObject *
parsestrplus(struct compiling * c,const node * n)3553 parsestrplus(struct compiling *c, const node *n)
3554 {
3555         PyObject *v;
3556         int i;
3557         REQ(CHILD(n, 0), STRING);
3558         if ((v = parsestr(c, n, STR(CHILD(n, 0)))) != NULL) {
3559                 /* String literal concatenation */
3560                 for (i = 1; i < NCH(n); i++) {
3561                         PyObject *s;
3562                         s = parsestr(c, n, STR(CHILD(n, i)));
3563                         if (s == NULL)
3564                                 goto onError;
3565                         if (PyString_Check(v) && PyString_Check(s)) {
3566                                 PyString_ConcatAndDel(&v, s);
3567                                 if (v == NULL)
3568                                     goto onError;
3569                         }
3570 #ifdef Py_USING_UNICODE
3571                         else {
3572                                 PyObject *temp = PyUnicode_Concat(v, s);
3573                                 Py_DECREF(s);
3574                                 Py_DECREF(v);
3575                                 v = temp;
3576                                 if (v == NULL)
3577                                     goto onError;
3578                         }
3579 #endif
3580                 }
3581         }
3582         return v;
3583 
3584  onError:
3585         Py_XDECREF(v);
3586         return NULL;
3587 }
3588