• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This file includes functions to transform a concrete syntax tree (CST) to
3  * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4  *
5  */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "node.h"
9 #include "ast.h"
10 #include "token.h"
11 #include "pythonrun.h"
12 
13 #include <assert.h>
14 #include <stdbool.h>
15 
16 #define MAXLEVEL 200    /* Max parentheses level */
17 
18 static int validate_stmts(asdl_seq *);
19 static int validate_exprs(asdl_seq *, expr_context_ty, int);
20 static int validate_nonempty_seq(asdl_seq *, const char *, const char *);
21 static int validate_stmt(stmt_ty);
22 static int validate_expr(expr_ty, expr_context_ty);
23 
24 static int
validate_comprehension(asdl_seq * gens)25 validate_comprehension(asdl_seq *gens)
26 {
27     Py_ssize_t i;
28     if (!asdl_seq_LEN(gens)) {
29         PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
30         return 0;
31     }
32     for (i = 0; i < asdl_seq_LEN(gens); i++) {
33         comprehension_ty comp = asdl_seq_GET(gens, i);
34         if (!validate_expr(comp->target, Store) ||
35             !validate_expr(comp->iter, Load) ||
36             !validate_exprs(comp->ifs, Load, 0))
37             return 0;
38     }
39     return 1;
40 }
41 
42 static int
validate_slice(slice_ty slice)43 validate_slice(slice_ty slice)
44 {
45     switch (slice->kind) {
46     case Slice_kind:
47         return (!slice->v.Slice.lower || validate_expr(slice->v.Slice.lower, Load)) &&
48             (!slice->v.Slice.upper || validate_expr(slice->v.Slice.upper, Load)) &&
49             (!slice->v.Slice.step || validate_expr(slice->v.Slice.step, Load));
50     case ExtSlice_kind: {
51         Py_ssize_t i;
52         if (!validate_nonempty_seq(slice->v.ExtSlice.dims, "dims", "ExtSlice"))
53             return 0;
54         for (i = 0; i < asdl_seq_LEN(slice->v.ExtSlice.dims); i++)
55             if (!validate_slice(asdl_seq_GET(slice->v.ExtSlice.dims, i)))
56                 return 0;
57         return 1;
58     }
59     case Index_kind:
60         return validate_expr(slice->v.Index.value, Load);
61     default:
62         PyErr_SetString(PyExc_SystemError, "unknown slice node");
63         return 0;
64     }
65 }
66 
67 static int
validate_keywords(asdl_seq * keywords)68 validate_keywords(asdl_seq *keywords)
69 {
70     Py_ssize_t i;
71     for (i = 0; i < asdl_seq_LEN(keywords); i++)
72         if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load))
73             return 0;
74     return 1;
75 }
76 
77 static int
validate_args(asdl_seq * args)78 validate_args(asdl_seq *args)
79 {
80     Py_ssize_t i;
81     for (i = 0; i < asdl_seq_LEN(args); i++) {
82         arg_ty arg = asdl_seq_GET(args, i);
83         if (arg->annotation && !validate_expr(arg->annotation, Load))
84             return 0;
85     }
86     return 1;
87 }
88 
89 static const char *
expr_context_name(expr_context_ty ctx)90 expr_context_name(expr_context_ty ctx)
91 {
92     switch (ctx) {
93     case Load:
94         return "Load";
95     case Store:
96         return "Store";
97     case Del:
98         return "Del";
99     case AugLoad:
100         return "AugLoad";
101     case AugStore:
102         return "AugStore";
103     case Param:
104         return "Param";
105     default:
106         Py_UNREACHABLE();
107     }
108 }
109 
110 static int
validate_arguments(arguments_ty args)111 validate_arguments(arguments_ty args)
112 {
113     if (!validate_args(args->posonlyargs) || !validate_args(args->args)) {
114         return 0;
115     }
116     if (args->vararg && args->vararg->annotation
117         && !validate_expr(args->vararg->annotation, Load)) {
118             return 0;
119     }
120     if (!validate_args(args->kwonlyargs))
121         return 0;
122     if (args->kwarg && args->kwarg->annotation
123         && !validate_expr(args->kwarg->annotation, Load)) {
124             return 0;
125     }
126     if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->posonlyargs) + asdl_seq_LEN(args->args)) {
127         PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
128         return 0;
129     }
130     if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
131         PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
132                         "kw_defaults on arguments");
133         return 0;
134     }
135     return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1);
136 }
137 
138 static int
validate_constant(PyObject * value)139 validate_constant(PyObject *value)
140 {
141     if (value == Py_None || value == Py_Ellipsis)
142         return 1;
143 
144     if (PyLong_CheckExact(value)
145             || PyFloat_CheckExact(value)
146             || PyComplex_CheckExact(value)
147             || PyBool_Check(value)
148             || PyUnicode_CheckExact(value)
149             || PyBytes_CheckExact(value))
150         return 1;
151 
152     if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
153         PyObject *it;
154 
155         it = PyObject_GetIter(value);
156         if (it == NULL)
157             return 0;
158 
159         while (1) {
160             PyObject *item = PyIter_Next(it);
161             if (item == NULL) {
162                 if (PyErr_Occurred()) {
163                     Py_DECREF(it);
164                     return 0;
165                 }
166                 break;
167             }
168 
169             if (!validate_constant(item)) {
170                 Py_DECREF(it);
171                 Py_DECREF(item);
172                 return 0;
173             }
174             Py_DECREF(item);
175         }
176 
177         Py_DECREF(it);
178         return 1;
179     }
180 
181     return 0;
182 }
183 
184 static int
validate_expr(expr_ty exp,expr_context_ty ctx)185 validate_expr(expr_ty exp, expr_context_ty ctx)
186 {
187     int check_ctx = 1;
188     expr_context_ty actual_ctx;
189 
190     /* First check expression context. */
191     switch (exp->kind) {
192     case Attribute_kind:
193         actual_ctx = exp->v.Attribute.ctx;
194         break;
195     case Subscript_kind:
196         actual_ctx = exp->v.Subscript.ctx;
197         break;
198     case Starred_kind:
199         actual_ctx = exp->v.Starred.ctx;
200         break;
201     case Name_kind:
202         actual_ctx = exp->v.Name.ctx;
203         break;
204     case List_kind:
205         actual_ctx = exp->v.List.ctx;
206         break;
207     case Tuple_kind:
208         actual_ctx = exp->v.Tuple.ctx;
209         break;
210     default:
211         if (ctx != Load) {
212             PyErr_Format(PyExc_ValueError, "expression which can't be "
213                          "assigned to in %s context", expr_context_name(ctx));
214             return 0;
215         }
216         check_ctx = 0;
217         /* set actual_ctx to prevent gcc warning */
218         actual_ctx = 0;
219     }
220     if (check_ctx && actual_ctx != ctx) {
221         PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
222                      expr_context_name(ctx), expr_context_name(actual_ctx));
223         return 0;
224     }
225 
226     /* Now validate expression. */
227     switch (exp->kind) {
228     case BoolOp_kind:
229         if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
230             PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
231             return 0;
232         }
233         return validate_exprs(exp->v.BoolOp.values, Load, 0);
234     case BinOp_kind:
235         return validate_expr(exp->v.BinOp.left, Load) &&
236             validate_expr(exp->v.BinOp.right, Load);
237     case UnaryOp_kind:
238         return validate_expr(exp->v.UnaryOp.operand, Load);
239     case Lambda_kind:
240         return validate_arguments(exp->v.Lambda.args) &&
241             validate_expr(exp->v.Lambda.body, Load);
242     case IfExp_kind:
243         return validate_expr(exp->v.IfExp.test, Load) &&
244             validate_expr(exp->v.IfExp.body, Load) &&
245             validate_expr(exp->v.IfExp.orelse, Load);
246     case Dict_kind:
247         if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
248             PyErr_SetString(PyExc_ValueError,
249                             "Dict doesn't have the same number of keys as values");
250             return 0;
251         }
252         /* null_ok=1 for keys expressions to allow dict unpacking to work in
253            dict literals, i.e. ``{**{a:b}}`` */
254         return validate_exprs(exp->v.Dict.keys, Load, /*null_ok=*/ 1) &&
255             validate_exprs(exp->v.Dict.values, Load, /*null_ok=*/ 0);
256     case Set_kind:
257         return validate_exprs(exp->v.Set.elts, Load, 0);
258 #define COMP(NAME) \
259         case NAME ## _kind: \
260             return validate_comprehension(exp->v.NAME.generators) && \
261                 validate_expr(exp->v.NAME.elt, Load);
262     COMP(ListComp)
263     COMP(SetComp)
264     COMP(GeneratorExp)
265 #undef COMP
266     case DictComp_kind:
267         return validate_comprehension(exp->v.DictComp.generators) &&
268             validate_expr(exp->v.DictComp.key, Load) &&
269             validate_expr(exp->v.DictComp.value, Load);
270     case Yield_kind:
271         return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load);
272     case YieldFrom_kind:
273         return validate_expr(exp->v.YieldFrom.value, Load);
274     case Await_kind:
275         return validate_expr(exp->v.Await.value, Load);
276     case Compare_kind:
277         if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
278             PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
279             return 0;
280         }
281         if (asdl_seq_LEN(exp->v.Compare.comparators) !=
282             asdl_seq_LEN(exp->v.Compare.ops)) {
283             PyErr_SetString(PyExc_ValueError, "Compare has a different number "
284                             "of comparators and operands");
285             return 0;
286         }
287         return validate_exprs(exp->v.Compare.comparators, Load, 0) &&
288             validate_expr(exp->v.Compare.left, Load);
289     case Call_kind:
290         return validate_expr(exp->v.Call.func, Load) &&
291             validate_exprs(exp->v.Call.args, Load, 0) &&
292             validate_keywords(exp->v.Call.keywords);
293     case Constant_kind:
294         if (!validate_constant(exp->v.Constant.value)) {
295             PyErr_Format(PyExc_TypeError,
296                          "got an invalid type in Constant: %s",
297                          Py_TYPE(exp->v.Constant.value)->tp_name);
298             return 0;
299         }
300         return 1;
301     case JoinedStr_kind:
302         return validate_exprs(exp->v.JoinedStr.values, Load, 0);
303     case FormattedValue_kind:
304         if (validate_expr(exp->v.FormattedValue.value, Load) == 0)
305             return 0;
306         if (exp->v.FormattedValue.format_spec)
307             return validate_expr(exp->v.FormattedValue.format_spec, Load);
308         return 1;
309     case Attribute_kind:
310         return validate_expr(exp->v.Attribute.value, Load);
311     case Subscript_kind:
312         return validate_slice(exp->v.Subscript.slice) &&
313             validate_expr(exp->v.Subscript.value, Load);
314     case Starred_kind:
315         return validate_expr(exp->v.Starred.value, ctx);
316     case List_kind:
317         return validate_exprs(exp->v.List.elts, ctx, 0);
318     case Tuple_kind:
319         return validate_exprs(exp->v.Tuple.elts, ctx, 0);
320     case NamedExpr_kind:
321         return validate_expr(exp->v.NamedExpr.value, Load);
322     /* This last case doesn't have any checking. */
323     case Name_kind:
324         return 1;
325     }
326     PyErr_SetString(PyExc_SystemError, "unexpected expression");
327     return 0;
328 }
329 
330 static int
validate_nonempty_seq(asdl_seq * seq,const char * what,const char * owner)331 validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
332 {
333     if (asdl_seq_LEN(seq))
334         return 1;
335     PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
336     return 0;
337 }
338 
339 static int
validate_assignlist(asdl_seq * targets,expr_context_ty ctx)340 validate_assignlist(asdl_seq *targets, expr_context_ty ctx)
341 {
342     return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
343         validate_exprs(targets, ctx, 0);
344 }
345 
346 static int
validate_body(asdl_seq * body,const char * owner)347 validate_body(asdl_seq *body, const char *owner)
348 {
349     return validate_nonempty_seq(body, "body", owner) && validate_stmts(body);
350 }
351 
352 static int
validate_stmt(stmt_ty stmt)353 validate_stmt(stmt_ty stmt)
354 {
355     Py_ssize_t i;
356     switch (stmt->kind) {
357     case FunctionDef_kind:
358         return validate_body(stmt->v.FunctionDef.body, "FunctionDef") &&
359             validate_arguments(stmt->v.FunctionDef.args) &&
360             validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) &&
361             (!stmt->v.FunctionDef.returns ||
362              validate_expr(stmt->v.FunctionDef.returns, Load));
363     case ClassDef_kind:
364         return validate_body(stmt->v.ClassDef.body, "ClassDef") &&
365             validate_exprs(stmt->v.ClassDef.bases, Load, 0) &&
366             validate_keywords(stmt->v.ClassDef.keywords) &&
367             validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0);
368     case Return_kind:
369         return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load);
370     case Delete_kind:
371         return validate_assignlist(stmt->v.Delete.targets, Del);
372     case Assign_kind:
373         return validate_assignlist(stmt->v.Assign.targets, Store) &&
374             validate_expr(stmt->v.Assign.value, Load);
375     case AugAssign_kind:
376         return validate_expr(stmt->v.AugAssign.target, Store) &&
377             validate_expr(stmt->v.AugAssign.value, Load);
378     case AnnAssign_kind:
379         if (stmt->v.AnnAssign.target->kind != Name_kind &&
380             stmt->v.AnnAssign.simple) {
381             PyErr_SetString(PyExc_TypeError,
382                             "AnnAssign with simple non-Name target");
383             return 0;
384         }
385         return validate_expr(stmt->v.AnnAssign.target, Store) &&
386                (!stmt->v.AnnAssign.value ||
387                 validate_expr(stmt->v.AnnAssign.value, Load)) &&
388                validate_expr(stmt->v.AnnAssign.annotation, Load);
389     case For_kind:
390         return validate_expr(stmt->v.For.target, Store) &&
391             validate_expr(stmt->v.For.iter, Load) &&
392             validate_body(stmt->v.For.body, "For") &&
393             validate_stmts(stmt->v.For.orelse);
394     case AsyncFor_kind:
395         return validate_expr(stmt->v.AsyncFor.target, Store) &&
396             validate_expr(stmt->v.AsyncFor.iter, Load) &&
397             validate_body(stmt->v.AsyncFor.body, "AsyncFor") &&
398             validate_stmts(stmt->v.AsyncFor.orelse);
399     case While_kind:
400         return validate_expr(stmt->v.While.test, Load) &&
401             validate_body(stmt->v.While.body, "While") &&
402             validate_stmts(stmt->v.While.orelse);
403     case If_kind:
404         return validate_expr(stmt->v.If.test, Load) &&
405             validate_body(stmt->v.If.body, "If") &&
406             validate_stmts(stmt->v.If.orelse);
407     case With_kind:
408         if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
409             return 0;
410         for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
411             withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
412             if (!validate_expr(item->context_expr, Load) ||
413                 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
414                 return 0;
415         }
416         return validate_body(stmt->v.With.body, "With");
417     case AsyncWith_kind:
418         if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith"))
419             return 0;
420         for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) {
421             withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i);
422             if (!validate_expr(item->context_expr, Load) ||
423                 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
424                 return 0;
425         }
426         return validate_body(stmt->v.AsyncWith.body, "AsyncWith");
427     case Raise_kind:
428         if (stmt->v.Raise.exc) {
429             return validate_expr(stmt->v.Raise.exc, Load) &&
430                 (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load));
431         }
432         if (stmt->v.Raise.cause) {
433             PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
434             return 0;
435         }
436         return 1;
437     case Try_kind:
438         if (!validate_body(stmt->v.Try.body, "Try"))
439             return 0;
440         if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
441             !asdl_seq_LEN(stmt->v.Try.finalbody)) {
442             PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
443             return 0;
444         }
445         if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
446             asdl_seq_LEN(stmt->v.Try.orelse)) {
447             PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
448             return 0;
449         }
450         for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
451             excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
452             if ((handler->v.ExceptHandler.type &&
453                  !validate_expr(handler->v.ExceptHandler.type, Load)) ||
454                 !validate_body(handler->v.ExceptHandler.body, "ExceptHandler"))
455                 return 0;
456         }
457         return (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
458                 validate_stmts(stmt->v.Try.finalbody)) &&
459             (!asdl_seq_LEN(stmt->v.Try.orelse) ||
460              validate_stmts(stmt->v.Try.orelse));
461     case Assert_kind:
462         return validate_expr(stmt->v.Assert.test, Load) &&
463             (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load));
464     case Import_kind:
465         return validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
466     case ImportFrom_kind:
467         if (stmt->v.ImportFrom.level < 0) {
468             PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level");
469             return 0;
470         }
471         return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
472     case Global_kind:
473         return validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
474     case Nonlocal_kind:
475         return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
476     case Expr_kind:
477         return validate_expr(stmt->v.Expr.value, Load);
478     case AsyncFunctionDef_kind:
479         return validate_body(stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") &&
480             validate_arguments(stmt->v.AsyncFunctionDef.args) &&
481             validate_exprs(stmt->v.AsyncFunctionDef.decorator_list, Load, 0) &&
482             (!stmt->v.AsyncFunctionDef.returns ||
483              validate_expr(stmt->v.AsyncFunctionDef.returns, Load));
484     case Pass_kind:
485     case Break_kind:
486     case Continue_kind:
487         return 1;
488     default:
489         PyErr_SetString(PyExc_SystemError, "unexpected statement");
490         return 0;
491     }
492 }
493 
494 static int
validate_stmts(asdl_seq * seq)495 validate_stmts(asdl_seq *seq)
496 {
497     Py_ssize_t i;
498     for (i = 0; i < asdl_seq_LEN(seq); i++) {
499         stmt_ty stmt = asdl_seq_GET(seq, i);
500         if (stmt) {
501             if (!validate_stmt(stmt))
502                 return 0;
503         }
504         else {
505             PyErr_SetString(PyExc_ValueError,
506                             "None disallowed in statement list");
507             return 0;
508         }
509     }
510     return 1;
511 }
512 
513 static int
validate_exprs(asdl_seq * exprs,expr_context_ty ctx,int null_ok)514 validate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok)
515 {
516     Py_ssize_t i;
517     for (i = 0; i < asdl_seq_LEN(exprs); i++) {
518         expr_ty expr = asdl_seq_GET(exprs, i);
519         if (expr) {
520             if (!validate_expr(expr, ctx))
521                 return 0;
522         }
523         else if (!null_ok) {
524             PyErr_SetString(PyExc_ValueError,
525                             "None disallowed in expression list");
526             return 0;
527         }
528 
529     }
530     return 1;
531 }
532 
533 int
PyAST_Validate(mod_ty mod)534 PyAST_Validate(mod_ty mod)
535 {
536     int res = 0;
537 
538     switch (mod->kind) {
539     case Module_kind:
540         res = validate_stmts(mod->v.Module.body);
541         break;
542     case Interactive_kind:
543         res = validate_stmts(mod->v.Interactive.body);
544         break;
545     case Expression_kind:
546         res = validate_expr(mod->v.Expression.body, Load);
547         break;
548     case Suite_kind:
549         PyErr_SetString(PyExc_ValueError, "Suite is not valid in the CPython compiler");
550         break;
551     default:
552         PyErr_SetString(PyExc_SystemError, "impossible module node");
553         res = 0;
554         break;
555     }
556     return res;
557 }
558 
559 /* This is done here, so defines like "test" don't interfere with AST use above. */
560 #include "grammar.h"
561 #include "parsetok.h"
562 #include "graminit.h"
563 
564 /* Data structure used internally */
565 struct compiling {
566     PyArena *c_arena; /* Arena for allocating memory. */
567     PyObject *c_filename; /* filename */
568     PyObject *c_normalize; /* Normalization function from unicodedata. */
569     int c_feature_version; /* Latest minor version of Python for allowed features */
570 };
571 
572 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
573 static expr_ty ast_for_expr(struct compiling *, const node *);
574 static stmt_ty ast_for_stmt(struct compiling *, const node *);
575 static asdl_seq *ast_for_suite(struct compiling *c, const node *n);
576 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
577                                   expr_context_ty);
578 static expr_ty ast_for_testlist(struct compiling *, const node *);
579 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
580 
581 static stmt_ty ast_for_with_stmt(struct compiling *, const node *, bool);
582 static stmt_ty ast_for_for_stmt(struct compiling *, const node *, bool);
583 
584 /* Note different signature for ast_for_call */
585 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty,
586                             const node *, const node *);
587 
588 static PyObject *parsenumber(struct compiling *, const char *);
589 static expr_ty parsestrplus(struct compiling *, const node *n);
590 static void get_last_end_pos(asdl_seq *, int *, int *);
591 
592 #define COMP_GENEXP   0
593 #define COMP_LISTCOMP 1
594 #define COMP_SETCOMP  2
595 
596 static int
init_normalization(struct compiling * c)597 init_normalization(struct compiling *c)
598 {
599     PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
600     if (!m)
601         return 0;
602     c->c_normalize = PyObject_GetAttrString(m, "normalize");
603     Py_DECREF(m);
604     if (!c->c_normalize)
605         return 0;
606     return 1;
607 }
608 
609 static identifier
new_identifier(const char * n,struct compiling * c)610 new_identifier(const char *n, struct compiling *c)
611 {
612     PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
613     if (!id)
614         return NULL;
615     /* PyUnicode_DecodeUTF8 should always return a ready string. */
616     assert(PyUnicode_IS_READY(id));
617     /* Check whether there are non-ASCII characters in the
618        identifier; if so, normalize to NFKC. */
619     if (!PyUnicode_IS_ASCII(id)) {
620         PyObject *id2;
621         _Py_IDENTIFIER(NFKC);
622         if (!c->c_normalize && !init_normalization(c)) {
623             Py_DECREF(id);
624             return NULL;
625         }
626         PyObject *form = _PyUnicode_FromId(&PyId_NFKC);
627         if (form == NULL) {
628             Py_DECREF(id);
629             return NULL;
630         }
631         PyObject *args[2] = {form, id};
632         id2 = _PyObject_FastCall(c->c_normalize, args, 2);
633         Py_DECREF(id);
634         if (!id2)
635             return NULL;
636         if (!PyUnicode_Check(id2)) {
637             PyErr_Format(PyExc_TypeError,
638                          "unicodedata.normalize() must return a string, not "
639                          "%.200s",
640                          Py_TYPE(id2)->tp_name);
641             Py_DECREF(id2);
642             return NULL;
643         }
644         id = id2;
645     }
646     PyUnicode_InternInPlace(&id);
647     if (PyArena_AddPyObject(c->c_arena, id) < 0) {
648         Py_DECREF(id);
649         return NULL;
650     }
651     return id;
652 }
653 
654 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c)
655 
656 static int
ast_error(struct compiling * c,const node * n,const char * errmsg,...)657 ast_error(struct compiling *c, const node *n, const char *errmsg, ...)
658 {
659     PyObject *value, *errstr, *loc, *tmp;
660     va_list va;
661 
662     va_start(va, errmsg);
663     errstr = PyUnicode_FromFormatV(errmsg, va);
664     va_end(va);
665     if (!errstr) {
666         return 0;
667     }
668     loc = PyErr_ProgramTextObject(c->c_filename, LINENO(n));
669     if (!loc) {
670         Py_INCREF(Py_None);
671         loc = Py_None;
672     }
673     tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset + 1, loc);
674     if (!tmp) {
675         Py_DECREF(errstr);
676         return 0;
677     }
678     value = PyTuple_Pack(2, errstr, tmp);
679     Py_DECREF(errstr);
680     Py_DECREF(tmp);
681     if (value) {
682         PyErr_SetObject(PyExc_SyntaxError, value);
683         Py_DECREF(value);
684     }
685     return 0;
686 }
687 
688 /* num_stmts() returns number of contained statements.
689 
690    Use this routine to determine how big a sequence is needed for
691    the statements in a parse tree.  Its raison d'etre is this bit of
692    grammar:
693 
694    stmt: simple_stmt | compound_stmt
695    simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
696 
697    A simple_stmt can contain multiple small_stmt elements joined
698    by semicolons.  If the arg is a simple_stmt, the number of
699    small_stmt elements is returned.
700 */
701 
702 static string
new_type_comment(const char * s,struct compiling * c)703 new_type_comment(const char *s, struct compiling *c)
704 {
705     PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
706     if (res == NULL)
707         return NULL;
708     if (PyArena_AddPyObject(c->c_arena, res) < 0) {
709         Py_DECREF(res);
710         return NULL;
711     }
712     return res;
713 }
714 #define NEW_TYPE_COMMENT(n) new_type_comment(STR(n), c)
715 
716 static int
num_stmts(const node * n)717 num_stmts(const node *n)
718 {
719     int i, l;
720     node *ch;
721 
722     switch (TYPE(n)) {
723         case single_input:
724             if (TYPE(CHILD(n, 0)) == NEWLINE)
725                 return 0;
726             else
727                 return num_stmts(CHILD(n, 0));
728         case file_input:
729             l = 0;
730             for (i = 0; i < NCH(n); i++) {
731                 ch = CHILD(n, i);
732                 if (TYPE(ch) == stmt)
733                     l += num_stmts(ch);
734             }
735             return l;
736         case stmt:
737             return num_stmts(CHILD(n, 0));
738         case compound_stmt:
739             return 1;
740         case simple_stmt:
741             return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
742         case suite:
743         case func_body_suite:
744             /* func_body_suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
745             /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
746             if (NCH(n) == 1)
747                 return num_stmts(CHILD(n, 0));
748             else {
749                 i = 2;
750                 l = 0;
751                 if (TYPE(CHILD(n, 1)) == TYPE_COMMENT)
752                     i += 2;
753                 for (; i < (NCH(n) - 1); i++)
754                     l += num_stmts(CHILD(n, i));
755                 return l;
756             }
757         default: {
758             char buf[128];
759 
760             sprintf(buf, "Non-statement found: %d %d",
761                     TYPE(n), NCH(n));
762             Py_FatalError(buf);
763         }
764     }
765     Py_UNREACHABLE();
766 }
767 
768 /* Transform the CST rooted at node * to the appropriate AST
769 */
770 
771 mod_ty
PyAST_FromNodeObject(const node * n,PyCompilerFlags * flags,PyObject * filename,PyArena * arena)772 PyAST_FromNodeObject(const node *n, PyCompilerFlags *flags,
773                      PyObject *filename, PyArena *arena)
774 {
775     int i, j, k, num;
776     asdl_seq *stmts = NULL;
777     asdl_seq *type_ignores = NULL;
778     stmt_ty s;
779     node *ch;
780     struct compiling c;
781     mod_ty res = NULL;
782     asdl_seq *argtypes = NULL;
783     expr_ty ret, arg;
784 
785     c.c_arena = arena;
786     /* borrowed reference */
787     c.c_filename = filename;
788     c.c_normalize = NULL;
789     c.c_feature_version = flags ? flags->cf_feature_version : PY_MINOR_VERSION;
790 
791     if (TYPE(n) == encoding_decl)
792         n = CHILD(n, 0);
793 
794     k = 0;
795     switch (TYPE(n)) {
796         case file_input:
797             stmts = _Py_asdl_seq_new(num_stmts(n), arena);
798             if (!stmts)
799                 goto out;
800             for (i = 0; i < NCH(n) - 1; i++) {
801                 ch = CHILD(n, i);
802                 if (TYPE(ch) == NEWLINE)
803                     continue;
804                 REQ(ch, stmt);
805                 num = num_stmts(ch);
806                 if (num == 1) {
807                     s = ast_for_stmt(&c, ch);
808                     if (!s)
809                         goto out;
810                     asdl_seq_SET(stmts, k++, s);
811                 }
812                 else {
813                     ch = CHILD(ch, 0);
814                     REQ(ch, simple_stmt);
815                     for (j = 0; j < num; j++) {
816                         s = ast_for_stmt(&c, CHILD(ch, j * 2));
817                         if (!s)
818                             goto out;
819                         asdl_seq_SET(stmts, k++, s);
820                     }
821                 }
822             }
823 
824             /* Type ignores are stored under the ENDMARKER in file_input. */
825             ch = CHILD(n, NCH(n) - 1);
826             REQ(ch, ENDMARKER);
827             num = NCH(ch);
828             type_ignores = _Py_asdl_seq_new(num, arena);
829             if (!type_ignores)
830                 goto out;
831 
832             for (i = 0; i < num; i++) {
833                 string type_comment = new_type_comment(STR(CHILD(ch, i)), &c);
834                 if (!type_comment)
835                     goto out;
836                 type_ignore_ty ti = TypeIgnore(LINENO(CHILD(ch, i)), type_comment, arena);
837                 if (!ti)
838                    goto out;
839                asdl_seq_SET(type_ignores, i, ti);
840             }
841 
842             res = Module(stmts, type_ignores, arena);
843             break;
844         case eval_input: {
845             expr_ty testlist_ast;
846 
847             /* XXX Why not comp_for here? */
848             testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
849             if (!testlist_ast)
850                 goto out;
851             res = Expression(testlist_ast, arena);
852             break;
853         }
854         case single_input:
855             if (TYPE(CHILD(n, 0)) == NEWLINE) {
856                 stmts = _Py_asdl_seq_new(1, arena);
857                 if (!stmts)
858                     goto out;
859                 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
860                                             n->n_end_lineno, n->n_end_col_offset,
861                                             arena));
862                 if (!asdl_seq_GET(stmts, 0))
863                     goto out;
864                 res = Interactive(stmts, arena);
865             }
866             else {
867                 n = CHILD(n, 0);
868                 num = num_stmts(n);
869                 stmts = _Py_asdl_seq_new(num, arena);
870                 if (!stmts)
871                     goto out;
872                 if (num == 1) {
873                     s = ast_for_stmt(&c, n);
874                     if (!s)
875                         goto out;
876                     asdl_seq_SET(stmts, 0, s);
877                 }
878                 else {
879                     /* Only a simple_stmt can contain multiple statements. */
880                     REQ(n, simple_stmt);
881                     for (i = 0; i < NCH(n); i += 2) {
882                         if (TYPE(CHILD(n, i)) == NEWLINE)
883                             break;
884                         s = ast_for_stmt(&c, CHILD(n, i));
885                         if (!s)
886                             goto out;
887                         asdl_seq_SET(stmts, i / 2, s);
888                     }
889                 }
890 
891                 res = Interactive(stmts, arena);
892             }
893             break;
894         case func_type_input:
895             n = CHILD(n, 0);
896             REQ(n, func_type);
897 
898             if (TYPE(CHILD(n, 1)) == typelist) {
899                 ch = CHILD(n, 1);
900                 /* this is overly permissive -- we don't pay any attention to
901                  * stars on the args -- just parse them into an ordered list */
902                 num = 0;
903                 for (i = 0; i < NCH(ch); i++) {
904                     if (TYPE(CHILD(ch, i)) == test) {
905                         num++;
906                     }
907                 }
908 
909                 argtypes = _Py_asdl_seq_new(num, arena);
910                 if (!argtypes)
911                     goto out;
912 
913                 j = 0;
914                 for (i = 0; i < NCH(ch); i++) {
915                     if (TYPE(CHILD(ch, i)) == test) {
916                         arg = ast_for_expr(&c, CHILD(ch, i));
917                         if (!arg)
918                             goto out;
919                         asdl_seq_SET(argtypes, j++, arg);
920                     }
921                 }
922             }
923             else {
924                 argtypes = _Py_asdl_seq_new(0, arena);
925                 if (!argtypes)
926                     goto out;
927             }
928 
929             ret = ast_for_expr(&c, CHILD(n, NCH(n) - 1));
930             if (!ret)
931                 goto out;
932             res = FunctionType(argtypes, ret, arena);
933             break;
934         default:
935             PyErr_Format(PyExc_SystemError,
936                          "invalid node %d for PyAST_FromNode", TYPE(n));
937             goto out;
938     }
939  out:
940     if (c.c_normalize) {
941         Py_DECREF(c.c_normalize);
942     }
943     return res;
944 }
945 
946 mod_ty
PyAST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename_str,PyArena * arena)947 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename_str,
948                PyArena *arena)
949 {
950     mod_ty mod;
951     PyObject *filename;
952     filename = PyUnicode_DecodeFSDefault(filename_str);
953     if (filename == NULL)
954         return NULL;
955     mod = PyAST_FromNodeObject(n, flags, filename, arena);
956     Py_DECREF(filename);
957     return mod;
958 
959 }
960 
961 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
962 */
963 
964 static operator_ty
get_operator(struct compiling * c,const node * n)965 get_operator(struct compiling *c, const node *n)
966 {
967     switch (TYPE(n)) {
968         case VBAR:
969             return BitOr;
970         case CIRCUMFLEX:
971             return BitXor;
972         case AMPER:
973             return BitAnd;
974         case LEFTSHIFT:
975             return LShift;
976         case RIGHTSHIFT:
977             return RShift;
978         case PLUS:
979             return Add;
980         case MINUS:
981             return Sub;
982         case STAR:
983             return Mult;
984         case AT:
985             if (c->c_feature_version < 5) {
986                 ast_error(c, n,
987                           "The '@' operator is only supported in Python 3.5 and greater");
988                 return (operator_ty)0;
989             }
990             return MatMult;
991         case SLASH:
992             return Div;
993         case DOUBLESLASH:
994             return FloorDiv;
995         case PERCENT:
996             return Mod;
997         default:
998             return (operator_ty)0;
999     }
1000 }
1001 
1002 static const char * const FORBIDDEN[] = {
1003     "None",
1004     "True",
1005     "False",
1006     "__debug__",
1007     NULL,
1008 };
1009 
1010 static int
forbidden_name(struct compiling * c,identifier name,const node * n,int full_checks)1011 forbidden_name(struct compiling *c, identifier name, const node *n,
1012                int full_checks)
1013 {
1014     assert(PyUnicode_Check(name));
1015     const char * const *p = FORBIDDEN;
1016     if (!full_checks) {
1017         /* In most cases, the parser will protect True, False, and None
1018            from being assign to. */
1019         p += 3;
1020     }
1021     for (; *p; p++) {
1022         if (_PyUnicode_EqualToASCIIString(name, *p)) {
1023             ast_error(c, n, "cannot assign to %U", name);
1024             return 1;
1025         }
1026     }
1027     return 0;
1028 }
1029 
1030 static expr_ty
copy_location(expr_ty e,const node * n)1031 copy_location(expr_ty e, const node *n)
1032 {
1033     if (e) {
1034         e->lineno = LINENO(n);
1035         e->col_offset = n->n_col_offset;
1036         e->end_lineno = n->n_end_lineno;
1037         e->end_col_offset = n->n_end_col_offset;
1038     }
1039     return e;
1040 }
1041 
1042 static const char *
get_expr_name(expr_ty e)1043 get_expr_name(expr_ty e)
1044 {
1045     switch (e->kind) {
1046         case Attribute_kind:
1047             return "attribute";
1048         case Subscript_kind:
1049             return "subscript";
1050         case Starred_kind:
1051             return "starred";
1052         case Name_kind:
1053             return "name";
1054         case List_kind:
1055             return "list";
1056         case Tuple_kind:
1057             return "tuple";
1058         case Lambda_kind:
1059             return "lambda";
1060         case Call_kind:
1061             return "function call";
1062         case BoolOp_kind:
1063         case BinOp_kind:
1064         case UnaryOp_kind:
1065             return "operator";
1066         case GeneratorExp_kind:
1067             return "generator expression";
1068         case Yield_kind:
1069         case YieldFrom_kind:
1070             return "yield expression";
1071         case Await_kind:
1072             return "await expression";
1073         case ListComp_kind:
1074             return "list comprehension";
1075         case SetComp_kind:
1076             return "set comprehension";
1077         case DictComp_kind:
1078             return "dict comprehension";
1079         case Dict_kind:
1080             return "dict display";
1081         case Set_kind:
1082             return "set display";
1083         case JoinedStr_kind:
1084         case FormattedValue_kind:
1085             return "f-string expression";
1086         case Constant_kind: {
1087             PyObject *value = e->v.Constant.value;
1088             if (value == Py_None) {
1089                 return "None";
1090             }
1091             if (value == Py_False) {
1092                 return "False";
1093             }
1094             if (value == Py_True) {
1095                 return "True";
1096             }
1097             if (value == Py_Ellipsis) {
1098                 return "Ellipsis";
1099             }
1100             return "literal";
1101         }
1102         case Compare_kind:
1103             return "comparison";
1104         case IfExp_kind:
1105             return "conditional expression";
1106         case NamedExpr_kind:
1107             return "named expression";
1108         default:
1109             PyErr_Format(PyExc_SystemError,
1110                          "unexpected expression in assignment %d (line %d)",
1111                          e->kind, e->lineno);
1112             return NULL;
1113     }
1114 }
1115 
1116 /* Set the context ctx for expr_ty e, recursively traversing e.
1117 
1118    Only sets context for expr kinds that "can appear in assignment context"
1119    (according to ../Parser/Python.asdl).  For other expr kinds, it sets
1120    an appropriate syntax error and returns false.
1121 */
1122 
1123 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)1124 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
1125 {
1126     asdl_seq *s = NULL;
1127 
1128     /* The ast defines augmented store and load contexts, but the
1129        implementation here doesn't actually use them.  The code may be
1130        a little more complex than necessary as a result.  It also means
1131        that expressions in an augmented assignment have a Store context.
1132        Consider restructuring so that augmented assignment uses
1133        set_context(), too.
1134     */
1135     assert(ctx != AugStore && ctx != AugLoad);
1136 
1137     switch (e->kind) {
1138         case Attribute_kind:
1139             e->v.Attribute.ctx = ctx;
1140             if (ctx == Store && forbidden_name(c, e->v.Attribute.attr, n, 1))
1141                 return 0;
1142             break;
1143         case Subscript_kind:
1144             e->v.Subscript.ctx = ctx;
1145             break;
1146         case Starred_kind:
1147             e->v.Starred.ctx = ctx;
1148             if (!set_context(c, e->v.Starred.value, ctx, n))
1149                 return 0;
1150             break;
1151         case Name_kind:
1152             if (ctx == Store) {
1153                 if (forbidden_name(c, e->v.Name.id, n, 0))
1154                     return 0; /* forbidden_name() calls ast_error() */
1155             }
1156             e->v.Name.ctx = ctx;
1157             break;
1158         case List_kind:
1159             e->v.List.ctx = ctx;
1160             s = e->v.List.elts;
1161             break;
1162         case Tuple_kind:
1163             e->v.Tuple.ctx = ctx;
1164             s = e->v.Tuple.elts;
1165             break;
1166         default: {
1167             const char *expr_name = get_expr_name(e);
1168             if (expr_name != NULL) {
1169                 ast_error(c, n, "cannot %s %s",
1170                           ctx == Store ? "assign to" : "delete",
1171                           expr_name);
1172             }
1173             return 0;
1174         }
1175     }
1176 
1177     /* If the LHS is a list or tuple, we need to set the assignment
1178        context for all the contained elements.
1179     */
1180     if (s) {
1181         Py_ssize_t i;
1182 
1183         for (i = 0; i < asdl_seq_LEN(s); i++) {
1184             if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
1185                 return 0;
1186         }
1187     }
1188     return 1;
1189 }
1190 
1191 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)1192 ast_for_augassign(struct compiling *c, const node *n)
1193 {
1194     REQ(n, augassign);
1195     n = CHILD(n, 0);
1196     switch (STR(n)[0]) {
1197         case '+':
1198             return Add;
1199         case '-':
1200             return Sub;
1201         case '/':
1202             if (STR(n)[1] == '/')
1203                 return FloorDiv;
1204             else
1205                 return Div;
1206         case '%':
1207             return Mod;
1208         case '<':
1209             return LShift;
1210         case '>':
1211             return RShift;
1212         case '&':
1213             return BitAnd;
1214         case '^':
1215             return BitXor;
1216         case '|':
1217             return BitOr;
1218         case '*':
1219             if (STR(n)[1] == '*')
1220                 return Pow;
1221             else
1222                 return Mult;
1223         case '@':
1224             if (c->c_feature_version < 5) {
1225                 ast_error(c, n,
1226                           "The '@' operator is only supported in Python 3.5 and greater");
1227                 return (operator_ty)0;
1228             }
1229             return MatMult;
1230         default:
1231             PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
1232             return (operator_ty)0;
1233     }
1234 }
1235 
1236 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)1237 ast_for_comp_op(struct compiling *c, const node *n)
1238 {
1239     /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'
1240                |'is' 'not'
1241     */
1242     REQ(n, comp_op);
1243     if (NCH(n) == 1) {
1244         n = CHILD(n, 0);
1245         switch (TYPE(n)) {
1246             case LESS:
1247                 return Lt;
1248             case GREATER:
1249                 return Gt;
1250             case EQEQUAL:                       /* == */
1251                 return Eq;
1252             case LESSEQUAL:
1253                 return LtE;
1254             case GREATEREQUAL:
1255                 return GtE;
1256             case NOTEQUAL:
1257                 return NotEq;
1258             case NAME:
1259                 if (strcmp(STR(n), "in") == 0)
1260                     return In;
1261                 if (strcmp(STR(n), "is") == 0)
1262                     return Is;
1263                 /* fall through */
1264             default:
1265                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
1266                              STR(n));
1267                 return (cmpop_ty)0;
1268         }
1269     }
1270     else if (NCH(n) == 2) {
1271         /* handle "not in" and "is not" */
1272         switch (TYPE(CHILD(n, 0))) {
1273             case NAME:
1274                 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
1275                     return NotIn;
1276                 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
1277                     return IsNot;
1278                 /* fall through */
1279             default:
1280                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
1281                              STR(CHILD(n, 0)), STR(CHILD(n, 1)));
1282                 return (cmpop_ty)0;
1283         }
1284     }
1285     PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
1286                  NCH(n));
1287     return (cmpop_ty)0;
1288 }
1289 
1290 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)1291 seq_for_testlist(struct compiling *c, const node *n)
1292 {
1293     /* testlist: test (',' test)* [',']
1294        testlist_star_expr: test|star_expr (',' test|star_expr)* [',']
1295     */
1296     asdl_seq *seq;
1297     expr_ty expression;
1298     int i;
1299     assert(TYPE(n) == testlist || TYPE(n) == testlist_star_expr || TYPE(n) == testlist_comp);
1300 
1301     seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1302     if (!seq)
1303         return NULL;
1304 
1305     for (i = 0; i < NCH(n); i += 2) {
1306         const node *ch = CHILD(n, i);
1307         assert(TYPE(ch) == test || TYPE(ch) == test_nocond || TYPE(ch) == star_expr || TYPE(ch) == namedexpr_test);
1308 
1309         expression = ast_for_expr(c, ch);
1310         if (!expression)
1311             return NULL;
1312 
1313         assert(i / 2 < seq->size);
1314         asdl_seq_SET(seq, i / 2, expression);
1315     }
1316     return seq;
1317 }
1318 
1319 static arg_ty
ast_for_arg(struct compiling * c,const node * n)1320 ast_for_arg(struct compiling *c, const node *n)
1321 {
1322     identifier name;
1323     expr_ty annotation = NULL;
1324     node *ch;
1325     arg_ty ret;
1326 
1327     assert(TYPE(n) == tfpdef || TYPE(n) == vfpdef);
1328     ch = CHILD(n, 0);
1329     name = NEW_IDENTIFIER(ch);
1330     if (!name)
1331         return NULL;
1332     if (forbidden_name(c, name, ch, 0))
1333         return NULL;
1334 
1335     if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) {
1336         annotation = ast_for_expr(c, CHILD(n, 2));
1337         if (!annotation)
1338             return NULL;
1339     }
1340 
1341     ret = arg(name, annotation, NULL, LINENO(n), n->n_col_offset,
1342               n->n_end_lineno, n->n_end_col_offset, c->c_arena);
1343     if (!ret)
1344         return NULL;
1345     return ret;
1346 }
1347 
1348 /* returns -1 if failed to handle keyword only arguments
1349    returns new position to keep processing if successful
1350                (',' tfpdef ['=' test])*
1351                      ^^^
1352    start pointing here
1353  */
1354 static int
handle_keywordonly_args(struct compiling * c,const node * n,int start,asdl_seq * kwonlyargs,asdl_seq * kwdefaults)1355 handle_keywordonly_args(struct compiling *c, const node *n, int start,
1356                         asdl_seq *kwonlyargs, asdl_seq *kwdefaults)
1357 {
1358     PyObject *argname;
1359     node *ch;
1360     expr_ty expression, annotation;
1361     arg_ty arg = NULL;
1362     int i = start;
1363     int j = 0; /* index for kwdefaults and kwonlyargs */
1364 
1365     if (kwonlyargs == NULL) {
1366         ast_error(c, CHILD(n, start), "named arguments must follow bare *");
1367         return -1;
1368     }
1369     assert(kwdefaults != NULL);
1370     while (i < NCH(n)) {
1371         ch = CHILD(n, i);
1372         switch (TYPE(ch)) {
1373             case vfpdef:
1374             case tfpdef:
1375                 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1376                     expression = ast_for_expr(c, CHILD(n, i + 2));
1377                     if (!expression)
1378                         goto error;
1379                     asdl_seq_SET(kwdefaults, j, expression);
1380                     i += 2; /* '=' and test */
1381                 }
1382                 else { /* setting NULL if no default value exists */
1383                     asdl_seq_SET(kwdefaults, j, NULL);
1384                 }
1385                 if (NCH(ch) == 3) {
1386                     /* ch is NAME ':' test */
1387                     annotation = ast_for_expr(c, CHILD(ch, 2));
1388                     if (!annotation)
1389                         goto error;
1390                 }
1391                 else {
1392                     annotation = NULL;
1393                 }
1394                 ch = CHILD(ch, 0);
1395                 argname = NEW_IDENTIFIER(ch);
1396                 if (!argname)
1397                     goto error;
1398                 if (forbidden_name(c, argname, ch, 0))
1399                     goto error;
1400                 arg = arg(argname, annotation, NULL, LINENO(ch), ch->n_col_offset,
1401                           ch->n_end_lineno, ch->n_end_col_offset,
1402                           c->c_arena);
1403                 if (!arg)
1404                     goto error;
1405                 asdl_seq_SET(kwonlyargs, j++, arg);
1406                 i += 1; /* the name */
1407                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1408                     i += 1; /* the comma, if present */
1409                 break;
1410             case TYPE_COMMENT:
1411                 /* arg will be equal to the last argument processed */
1412                 arg->type_comment = NEW_TYPE_COMMENT(ch);
1413                 if (!arg->type_comment)
1414                     goto error;
1415                 i += 1;
1416                 break;
1417             case DOUBLESTAR:
1418                 return i;
1419             default:
1420                 ast_error(c, ch, "unexpected node");
1421                 goto error;
1422         }
1423     }
1424     return i;
1425  error:
1426     return -1;
1427 }
1428 
1429 /* Create AST for argument list. */
1430 
1431 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)1432 ast_for_arguments(struct compiling *c, const node *n)
1433 {
1434     /* This function handles both typedargslist (function definition)
1435        and varargslist (lambda definition).
1436 
1437        parameters: '(' [typedargslist] ')'
1438 
1439        The following definition for typedarglist is equivalent to this set of rules:
1440 
1441          arguments = argument (',' [TYPE_COMMENT] argument)*
1442          argument = tfpdef ['=' test]
1443          kwargs = '**' tfpdef [','] [TYPE_COMMENT]
1444          args = '*' [tfpdef]
1445          kwonly_kwargs = (',' [TYPE_COMMENT] argument)* (TYPE_COMMENT | [','
1446                          [TYPE_COMMENT] [kwargs]])
1447          args_kwonly_kwargs = args kwonly_kwargs | kwargs
1448          poskeyword_args_kwonly_kwargs = arguments ( TYPE_COMMENT | [','
1449                                          [TYPE_COMMENT] [args_kwonly_kwargs]])
1450          typedargslist_no_posonly  = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
1451          typedarglist = (arguments ',' [TYPE_COMMENT] '/' [',' [[TYPE_COMMENT]
1452                         typedargslist_no_posonly]])|(typedargslist_no_posonly)"
1453 
1454        typedargslist: ( (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1455            ',' [TYPE_COMMENT] '/' [',' [ [TYPE_COMMENT] tfpdef ['=' test] ( ','
1456            [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
1457            [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
1458            [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
1459            [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1460            (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
1461            '**' tfpdef [','] [TYPE_COMMENT]]] ) |  (tfpdef ['=' test] (','
1462            [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
1463            [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
1464            [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
1465            [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1466            (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
1467            '**' tfpdef [','] [TYPE_COMMENT]))
1468 
1469        tfpdef: NAME [':' test]
1470 
1471        The following definition for varargslist is equivalent to this set of rules:
1472 
1473          arguments = argument (',' argument )*
1474          argument = vfpdef ['=' test]
1475          kwargs = '**' vfpdef [',']
1476          args = '*' [vfpdef]
1477          kwonly_kwargs = (',' argument )* [',' [kwargs]]
1478          args_kwonly_kwargs = args kwonly_kwargs | kwargs
1479          poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]]
1480          vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
1481          varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] |
1482                        (vararglist_no_posonly)
1483 
1484        varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['='
1485            test] (',' vfpdef ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [','
1486            ['**' vfpdef [',']]] | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])*
1487            [',' ['**' vfpdef [',']]] | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef
1488            ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
1489            | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef
1490            [',']]] | '**' vfpdef [','])
1491 
1492        vfpdef: NAME
1493 
1494     */
1495     int i, j, k, l, nposonlyargs=0, nposargs = 0, nkwonlyargs = 0;
1496     int nposdefaults = 0, found_default = 0;
1497     asdl_seq *posonlyargs, *posargs, *posdefaults, *kwonlyargs, *kwdefaults;
1498     arg_ty vararg = NULL, kwarg = NULL;
1499     arg_ty arg = NULL;
1500     node *ch;
1501 
1502     if (TYPE(n) == parameters) {
1503         if (NCH(n) == 2) /* () as argument list */
1504             return arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1505         n = CHILD(n, 1);
1506     }
1507     assert(TYPE(n) == typedargslist || TYPE(n) == varargslist);
1508 
1509     /* First count the number of positional args & defaults.  The
1510        variable i is the loop index for this for loop and the next.
1511        The next loop picks up where the first leaves off.
1512     */
1513     for (i = 0; i < NCH(n); i++) {
1514         ch = CHILD(n, i);
1515         if (TYPE(ch) == STAR) {
1516             /* skip star */
1517             i++;
1518             if (i < NCH(n) && /* skip argument following star */
1519                 (TYPE(CHILD(n, i)) == tfpdef ||
1520                  TYPE(CHILD(n, i)) == vfpdef)) {
1521                 i++;
1522             }
1523             break;
1524         }
1525         if (TYPE(ch) == DOUBLESTAR) break;
1526         if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++;
1527         if (TYPE(ch) == EQUAL) nposdefaults++;
1528         if (TYPE(ch) == SLASH ) {
1529             nposonlyargs = nposargs;
1530             nposargs = 0;
1531         }
1532     }
1533     /* count the number of keyword only args &
1534        defaults for keyword only args */
1535     for ( ; i < NCH(n); ++i) {
1536         ch = CHILD(n, i);
1537         if (TYPE(ch) == DOUBLESTAR) break;
1538         if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++;
1539     }
1540     posonlyargs = (nposonlyargs ? _Py_asdl_seq_new(nposonlyargs, c->c_arena) : NULL);
1541     if (!posonlyargs && nposonlyargs) {
1542         return NULL;
1543     }
1544     posargs = (nposargs ? _Py_asdl_seq_new(nposargs, c->c_arena) : NULL);
1545     if (!posargs && nposargs)
1546         return NULL;
1547     kwonlyargs = (nkwonlyargs ?
1548                    _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1549     if (!kwonlyargs && nkwonlyargs)
1550         return NULL;
1551     posdefaults = (nposdefaults ?
1552                     _Py_asdl_seq_new(nposdefaults, c->c_arena) : NULL);
1553     if (!posdefaults && nposdefaults)
1554         return NULL;
1555     /* The length of kwonlyargs and kwdefaults are same
1556        since we set NULL as default for keyword only argument w/o default
1557        - we have sequence data structure, but no dictionary */
1558     kwdefaults = (nkwonlyargs ?
1559                    _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1560     if (!kwdefaults && nkwonlyargs)
1561         return NULL;
1562 
1563     /* tfpdef: NAME [':' test]
1564        vfpdef: NAME
1565     */
1566     i = 0;
1567     j = 0;  /* index for defaults */
1568     k = 0;  /* index for args */
1569     l = 0;  /* index for posonlyargs */
1570     while (i < NCH(n)) {
1571         ch = CHILD(n, i);
1572         switch (TYPE(ch)) {
1573             case tfpdef:
1574             case vfpdef:
1575                 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
1576                    anything other than EQUAL or a comma? */
1577                 /* XXX Should NCH(n) check be made a separate check? */
1578                 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1579                     expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
1580                     if (!expression)
1581                         return NULL;
1582                     assert(posdefaults != NULL);
1583                     asdl_seq_SET(posdefaults, j++, expression);
1584                     i += 2;
1585                     found_default = 1;
1586                 }
1587                 else if (found_default) {
1588                     ast_error(c, n,
1589                               "non-default argument follows default argument");
1590                     return NULL;
1591                 }
1592                 arg = ast_for_arg(c, ch);
1593                 if (!arg)
1594                     return NULL;
1595                 if (l < nposonlyargs) {
1596                     asdl_seq_SET(posonlyargs, l++, arg);
1597                 } else {
1598                     asdl_seq_SET(posargs, k++, arg);
1599                 }
1600                 i += 1; /* the name */
1601                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1602                     i += 1; /* the comma, if present */
1603                 break;
1604              case SLASH:
1605                 /* Advance the slash and the comma. If there are more names
1606                  * after the slash there will be a comma so we are advancing
1607                  * the correct number of nodes. If the slash is the last item,
1608                  * we will be advancing an extra token but then * i > NCH(n)
1609                  * and the enclosing while will finish correctly. */
1610                 i += 2;
1611                 break;
1612             case STAR:
1613                 if (i+1 >= NCH(n) ||
1614                     (i+2 == NCH(n) && (TYPE(CHILD(n, i+1)) == COMMA
1615                                        || TYPE(CHILD(n, i+1)) == TYPE_COMMENT))) {
1616                     ast_error(c, CHILD(n, i),
1617                               "named arguments must follow bare *");
1618                     return NULL;
1619                 }
1620                 ch = CHILD(n, i+1);  /* tfpdef or COMMA */
1621                 if (TYPE(ch) == COMMA) {
1622                     int res = 0;
1623                     i += 2; /* now follows keyword only arguments */
1624 
1625                     if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
1626                         ast_error(c, CHILD(n, i),
1627                                   "bare * has associated type comment");
1628                         return NULL;
1629                     }
1630 
1631                     res = handle_keywordonly_args(c, n, i,
1632                                                   kwonlyargs, kwdefaults);
1633                     if (res == -1) return NULL;
1634                     i = res; /* res has new position to process */
1635                 }
1636                 else {
1637                     vararg = ast_for_arg(c, ch);
1638                     if (!vararg)
1639                         return NULL;
1640 
1641                 i += 2; /* the star and the name */
1642                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1643                     i += 1; /* the comma, if present */
1644 
1645                 if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
1646                         vararg->type_comment = NEW_TYPE_COMMENT(CHILD(n, i));
1647                         if (!vararg->type_comment)
1648                             return NULL;
1649                         i += 1;
1650                     }
1651 
1652                     if (i < NCH(n) && (TYPE(CHILD(n, i)) == tfpdef
1653                                     || TYPE(CHILD(n, i)) == vfpdef)) {
1654                         int res = 0;
1655                         res = handle_keywordonly_args(c, n, i,
1656                                                       kwonlyargs, kwdefaults);
1657                         if (res == -1) return NULL;
1658                         i = res; /* res has new position to process */
1659                     }
1660                 }
1661                 break;
1662             case DOUBLESTAR:
1663                 ch = CHILD(n, i+1);  /* tfpdef */
1664                 assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef);
1665                 kwarg = ast_for_arg(c, ch);
1666                 if (!kwarg)
1667                     return NULL;
1668                 i += 2; /* the double star and the name */
1669                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1670                     i += 1; /* the comma, if present */
1671                 break;
1672             case TYPE_COMMENT:
1673                 assert(i);
1674 
1675                 if (kwarg)
1676                     arg = kwarg;
1677 
1678                 /* arg will be equal to the last argument processed */
1679                 arg->type_comment = NEW_TYPE_COMMENT(ch);
1680                 if (!arg->type_comment)
1681                     return NULL;
1682                 i += 1;
1683                 break;
1684             default:
1685                 PyErr_Format(PyExc_SystemError,
1686                              "unexpected node in varargslist: %d @ %d",
1687                              TYPE(ch), i);
1688                 return NULL;
1689         }
1690     }
1691     return arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg, posdefaults, c->c_arena);
1692 }
1693 
1694 static expr_ty
ast_for_dotted_name(struct compiling * c,const node * n)1695 ast_for_dotted_name(struct compiling *c, const node *n)
1696 {
1697     expr_ty e;
1698     identifier id;
1699     int lineno, col_offset;
1700     int i;
1701     node *ch;
1702 
1703     REQ(n, dotted_name);
1704 
1705     lineno = LINENO(n);
1706     col_offset = n->n_col_offset;
1707 
1708     ch = CHILD(n, 0);
1709     id = NEW_IDENTIFIER(ch);
1710     if (!id)
1711         return NULL;
1712     e = Name(id, Load, lineno, col_offset,
1713              ch->n_end_lineno, ch->n_end_col_offset, c->c_arena);
1714     if (!e)
1715         return NULL;
1716 
1717     for (i = 2; i < NCH(n); i+=2) {
1718         id = NEW_IDENTIFIER(CHILD(n, i));
1719         if (!id)
1720             return NULL;
1721         e = Attribute(e, id, Load, lineno, col_offset,
1722                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
1723         if (!e)
1724             return NULL;
1725     }
1726 
1727     return e;
1728 }
1729 
1730 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)1731 ast_for_decorator(struct compiling *c, const node *n)
1732 {
1733     /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
1734     expr_ty d = NULL;
1735     expr_ty name_expr;
1736 
1737     REQ(n, decorator);
1738     REQ(CHILD(n, 0), AT);
1739     REQ(RCHILD(n, -1), NEWLINE);
1740 
1741     name_expr = ast_for_dotted_name(c, CHILD(n, 1));
1742     if (!name_expr)
1743         return NULL;
1744 
1745     if (NCH(n) == 3) { /* No arguments */
1746         d = name_expr;
1747         name_expr = NULL;
1748     }
1749     else if (NCH(n) == 5) { /* Call with no arguments */
1750         d = Call(name_expr, NULL, NULL,
1751                  name_expr->lineno, name_expr->col_offset,
1752                  CHILD(n, 3)->n_end_lineno, CHILD(n, 3)->n_end_col_offset,
1753                  c->c_arena);
1754         if (!d)
1755             return NULL;
1756         name_expr = NULL;
1757     }
1758     else {
1759         d = ast_for_call(c, CHILD(n, 3), name_expr, CHILD(n, 2), CHILD(n, 4));
1760         if (!d)
1761             return NULL;
1762         name_expr = NULL;
1763     }
1764 
1765     return d;
1766 }
1767 
1768 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)1769 ast_for_decorators(struct compiling *c, const node *n)
1770 {
1771     asdl_seq* decorator_seq;
1772     expr_ty d;
1773     int i;
1774 
1775     REQ(n, decorators);
1776     decorator_seq = _Py_asdl_seq_new(NCH(n), c->c_arena);
1777     if (!decorator_seq)
1778         return NULL;
1779 
1780     for (i = 0; i < NCH(n); i++) {
1781         d = ast_for_decorator(c, CHILD(n, i));
1782         if (!d)
1783             return NULL;
1784         asdl_seq_SET(decorator_seq, i, d);
1785     }
1786     return decorator_seq;
1787 }
1788 
1789 static stmt_ty
ast_for_funcdef_impl(struct compiling * c,const node * n0,asdl_seq * decorator_seq,bool is_async)1790 ast_for_funcdef_impl(struct compiling *c, const node *n0,
1791                      asdl_seq *decorator_seq, bool is_async)
1792 {
1793     /* funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] suite */
1794     const node * const n = is_async ? CHILD(n0, 1) : n0;
1795     identifier name;
1796     arguments_ty args;
1797     asdl_seq *body;
1798     expr_ty returns = NULL;
1799     int name_i = 1;
1800     int end_lineno, end_col_offset;
1801     node *tc;
1802     string type_comment = NULL;
1803 
1804     if (is_async && c->c_feature_version < 5) {
1805         ast_error(c, n,
1806                   "Async functions are only supported in Python 3.5 and greater");
1807         return NULL;
1808     }
1809 
1810     REQ(n, funcdef);
1811 
1812     name = NEW_IDENTIFIER(CHILD(n, name_i));
1813     if (!name)
1814         return NULL;
1815     if (forbidden_name(c, name, CHILD(n, name_i), 0))
1816         return NULL;
1817     args = ast_for_arguments(c, CHILD(n, name_i + 1));
1818     if (!args)
1819         return NULL;
1820     if (TYPE(CHILD(n, name_i+2)) == RARROW) {
1821         returns = ast_for_expr(c, CHILD(n, name_i + 3));
1822         if (!returns)
1823             return NULL;
1824         name_i += 2;
1825     }
1826     if (TYPE(CHILD(n, name_i + 3)) == TYPE_COMMENT) {
1827         type_comment = NEW_TYPE_COMMENT(CHILD(n, name_i + 3));
1828         if (!type_comment)
1829             return NULL;
1830         name_i += 1;
1831     }
1832     body = ast_for_suite(c, CHILD(n, name_i + 3));
1833     if (!body)
1834         return NULL;
1835     get_last_end_pos(body, &end_lineno, &end_col_offset);
1836 
1837     if (NCH(CHILD(n, name_i + 3)) > 1) {
1838         /* Check if the suite has a type comment in it. */
1839         tc = CHILD(CHILD(n, name_i + 3), 1);
1840 
1841         if (TYPE(tc) == TYPE_COMMENT) {
1842             if (type_comment != NULL) {
1843                 ast_error(c, n, "Cannot have two type comments on def");
1844                 return NULL;
1845             }
1846             type_comment = NEW_TYPE_COMMENT(tc);
1847             if (!type_comment)
1848                 return NULL;
1849         }
1850     }
1851 
1852     if (is_async)
1853         return AsyncFunctionDef(name, args, body, decorator_seq, returns, type_comment,
1854                                 LINENO(n0), n0->n_col_offset, end_lineno, end_col_offset, c->c_arena);
1855     else
1856         return FunctionDef(name, args, body, decorator_seq, returns, type_comment,
1857                            LINENO(n), n->n_col_offset, end_lineno, end_col_offset, c->c_arena);
1858 }
1859 
1860 static stmt_ty
ast_for_async_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1861 ast_for_async_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1862 {
1863     /* async_funcdef: ASYNC funcdef */
1864     REQ(n, async_funcdef);
1865     REQ(CHILD(n, 0), ASYNC);
1866     REQ(CHILD(n, 1), funcdef);
1867 
1868     return ast_for_funcdef_impl(c, n, decorator_seq,
1869                                 true /* is_async */);
1870 }
1871 
1872 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1873 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1874 {
1875     /* funcdef: 'def' NAME parameters ['->' test] ':' suite */
1876     return ast_for_funcdef_impl(c, n, decorator_seq,
1877                                 false /* is_async */);
1878 }
1879 
1880 
1881 static stmt_ty
ast_for_async_stmt(struct compiling * c,const node * n)1882 ast_for_async_stmt(struct compiling *c, const node *n)
1883 {
1884     /* async_stmt: ASYNC (funcdef | with_stmt | for_stmt) */
1885     REQ(n, async_stmt);
1886     REQ(CHILD(n, 0), ASYNC);
1887 
1888     switch (TYPE(CHILD(n, 1))) {
1889         case funcdef:
1890             return ast_for_funcdef_impl(c, n, NULL,
1891                                         true /* is_async */);
1892         case with_stmt:
1893             return ast_for_with_stmt(c, n,
1894                                      true /* is_async */);
1895 
1896         case for_stmt:
1897             return ast_for_for_stmt(c, n,
1898                                     true /* is_async */);
1899 
1900         default:
1901             PyErr_Format(PyExc_SystemError,
1902                          "invalid async stament: %s",
1903                          STR(CHILD(n, 1)));
1904             return NULL;
1905     }
1906 }
1907 
1908 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)1909 ast_for_decorated(struct compiling *c, const node *n)
1910 {
1911     /* decorated: decorators (classdef | funcdef | async_funcdef) */
1912     stmt_ty thing = NULL;
1913     asdl_seq *decorator_seq = NULL;
1914 
1915     REQ(n, decorated);
1916 
1917     decorator_seq = ast_for_decorators(c, CHILD(n, 0));
1918     if (!decorator_seq)
1919       return NULL;
1920 
1921     assert(TYPE(CHILD(n, 1)) == funcdef ||
1922            TYPE(CHILD(n, 1)) == async_funcdef ||
1923            TYPE(CHILD(n, 1)) == classdef);
1924 
1925     if (TYPE(CHILD(n, 1)) == funcdef) {
1926       thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
1927     } else if (TYPE(CHILD(n, 1)) == classdef) {
1928       thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
1929     } else if (TYPE(CHILD(n, 1)) == async_funcdef) {
1930       thing = ast_for_async_funcdef(c, CHILD(n, 1), decorator_seq);
1931     }
1932     return thing;
1933 }
1934 
1935 static expr_ty
ast_for_namedexpr(struct compiling * c,const node * n)1936 ast_for_namedexpr(struct compiling *c, const node *n)
1937 {
1938     /* namedexpr_test: test [':=' test]
1939        argument: ( test [comp_for] |
1940             test ':=' test |
1941             test '=' test |
1942             '**' test |
1943             '*' test )
1944     */
1945     expr_ty target, value;
1946 
1947     target = ast_for_expr(c, CHILD(n, 0));
1948     if (!target)
1949         return NULL;
1950 
1951     value = ast_for_expr(c, CHILD(n, 2));
1952     if (!value)
1953         return NULL;
1954 
1955     if (target->kind != Name_kind) {
1956         const char *expr_name = get_expr_name(target);
1957         if (expr_name != NULL) {
1958             ast_error(c, n, "cannot use named assignment with %s", expr_name);
1959         }
1960         return NULL;
1961     }
1962 
1963     if (!set_context(c, target, Store, n))
1964         return NULL;
1965 
1966     return NamedExpr(target, value, LINENO(n), n->n_col_offset, n->n_end_lineno,
1967                      n->n_end_col_offset, c->c_arena);
1968 }
1969 
1970 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)1971 ast_for_lambdef(struct compiling *c, const node *n)
1972 {
1973     /* lambdef: 'lambda' [varargslist] ':' test
1974        lambdef_nocond: 'lambda' [varargslist] ':' test_nocond */
1975     arguments_ty args;
1976     expr_ty expression;
1977 
1978     if (NCH(n) == 3) {
1979         args = arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1980         if (!args)
1981             return NULL;
1982         expression = ast_for_expr(c, CHILD(n, 2));
1983         if (!expression)
1984             return NULL;
1985     }
1986     else {
1987         args = ast_for_arguments(c, CHILD(n, 1));
1988         if (!args)
1989             return NULL;
1990         expression = ast_for_expr(c, CHILD(n, 3));
1991         if (!expression)
1992             return NULL;
1993     }
1994 
1995     return Lambda(args, expression, LINENO(n), n->n_col_offset,
1996                   n->n_end_lineno, n->n_end_col_offset, c->c_arena);
1997 }
1998 
1999 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)2000 ast_for_ifexpr(struct compiling *c, const node *n)
2001 {
2002     /* test: or_test 'if' or_test 'else' test */
2003     expr_ty expression, body, orelse;
2004 
2005     assert(NCH(n) == 5);
2006     body = ast_for_expr(c, CHILD(n, 0));
2007     if (!body)
2008         return NULL;
2009     expression = ast_for_expr(c, CHILD(n, 2));
2010     if (!expression)
2011         return NULL;
2012     orelse = ast_for_expr(c, CHILD(n, 4));
2013     if (!orelse)
2014         return NULL;
2015     return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
2016                  n->n_end_lineno, n->n_end_col_offset,
2017                  c->c_arena);
2018 }
2019 
2020 /*
2021    Count the number of 'for' loops in a comprehension.
2022 
2023    Helper for ast_for_comprehension().
2024 */
2025 
2026 static int
count_comp_fors(struct compiling * c,const node * n)2027 count_comp_fors(struct compiling *c, const node *n)
2028 {
2029     int n_fors = 0;
2030 
2031   count_comp_for:
2032     n_fors++;
2033     REQ(n, comp_for);
2034     if (NCH(n) == 2) {
2035         REQ(CHILD(n, 0), ASYNC);
2036         n = CHILD(n, 1);
2037     }
2038     else if (NCH(n) == 1) {
2039         n = CHILD(n, 0);
2040     }
2041     else {
2042         goto error;
2043     }
2044     if (NCH(n) == (5)) {
2045         n = CHILD(n, 4);
2046     }
2047     else {
2048         return n_fors;
2049     }
2050   count_comp_iter:
2051     REQ(n, comp_iter);
2052     n = CHILD(n, 0);
2053     if (TYPE(n) == comp_for)
2054         goto count_comp_for;
2055     else if (TYPE(n) == comp_if) {
2056         if (NCH(n) == 3) {
2057             n = CHILD(n, 2);
2058             goto count_comp_iter;
2059         }
2060         else
2061             return n_fors;
2062     }
2063 
2064   error:
2065     /* Should never be reached */
2066     PyErr_SetString(PyExc_SystemError,
2067                     "logic error in count_comp_fors");
2068     return -1;
2069 }
2070 
2071 /* Count the number of 'if' statements in a comprehension.
2072 
2073    Helper for ast_for_comprehension().
2074 */
2075 
2076 static int
count_comp_ifs(struct compiling * c,const node * n)2077 count_comp_ifs(struct compiling *c, const node *n)
2078 {
2079     int n_ifs = 0;
2080 
2081     while (1) {
2082         REQ(n, comp_iter);
2083         if (TYPE(CHILD(n, 0)) == comp_for)
2084             return n_ifs;
2085         n = CHILD(n, 0);
2086         REQ(n, comp_if);
2087         n_ifs++;
2088         if (NCH(n) == 2)
2089             return n_ifs;
2090         n = CHILD(n, 2);
2091     }
2092 }
2093 
2094 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)2095 ast_for_comprehension(struct compiling *c, const node *n)
2096 {
2097     int i, n_fors;
2098     asdl_seq *comps;
2099 
2100     n_fors = count_comp_fors(c, n);
2101     if (n_fors == -1)
2102         return NULL;
2103 
2104     comps = _Py_asdl_seq_new(n_fors, c->c_arena);
2105     if (!comps)
2106         return NULL;
2107 
2108     for (i = 0; i < n_fors; i++) {
2109         comprehension_ty comp;
2110         asdl_seq *t;
2111         expr_ty expression, first;
2112         node *for_ch;
2113         node *sync_n;
2114         int is_async = 0;
2115 
2116         REQ(n, comp_for);
2117 
2118         if (NCH(n) == 2) {
2119             is_async = 1;
2120             REQ(CHILD(n, 0), ASYNC);
2121             sync_n = CHILD(n, 1);
2122         }
2123         else {
2124             sync_n = CHILD(n, 0);
2125         }
2126         REQ(sync_n, sync_comp_for);
2127 
2128         /* Async comprehensions only allowed in Python 3.6 and greater */
2129         if (is_async && c->c_feature_version < 6) {
2130             ast_error(c, n,
2131                       "Async comprehensions are only supported in Python 3.6 and greater");
2132             return NULL;
2133         }
2134 
2135         for_ch = CHILD(sync_n, 1);
2136         t = ast_for_exprlist(c, for_ch, Store);
2137         if (!t)
2138             return NULL;
2139         expression = ast_for_expr(c, CHILD(sync_n, 3));
2140         if (!expression)
2141             return NULL;
2142 
2143         /* Check the # of children rather than the length of t, since
2144            (x for x, in ...) has 1 element in t, but still requires a Tuple. */
2145         first = (expr_ty)asdl_seq_GET(t, 0);
2146         if (NCH(for_ch) == 1)
2147             comp = comprehension(first, expression, NULL,
2148                                  is_async, c->c_arena);
2149         else
2150             comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
2151                                        for_ch->n_end_lineno, for_ch->n_end_col_offset,
2152                                        c->c_arena),
2153                                  expression, NULL, is_async, c->c_arena);
2154         if (!comp)
2155             return NULL;
2156 
2157         if (NCH(sync_n) == 5) {
2158             int j, n_ifs;
2159             asdl_seq *ifs;
2160 
2161             n = CHILD(sync_n, 4);
2162             n_ifs = count_comp_ifs(c, n);
2163             if (n_ifs == -1)
2164                 return NULL;
2165 
2166             ifs = _Py_asdl_seq_new(n_ifs, c->c_arena);
2167             if (!ifs)
2168                 return NULL;
2169 
2170             for (j = 0; j < n_ifs; j++) {
2171                 REQ(n, comp_iter);
2172                 n = CHILD(n, 0);
2173                 REQ(n, comp_if);
2174 
2175                 expression = ast_for_expr(c, CHILD(n, 1));
2176                 if (!expression)
2177                     return NULL;
2178                 asdl_seq_SET(ifs, j, expression);
2179                 if (NCH(n) == 3)
2180                     n = CHILD(n, 2);
2181             }
2182             /* on exit, must guarantee that n is a comp_for */
2183             if (TYPE(n) == comp_iter)
2184                 n = CHILD(n, 0);
2185             comp->ifs = ifs;
2186         }
2187         asdl_seq_SET(comps, i, comp);
2188     }
2189     return comps;
2190 }
2191 
2192 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)2193 ast_for_itercomp(struct compiling *c, const node *n, int type)
2194 {
2195     /* testlist_comp: (test|star_expr)
2196      *                ( comp_for | (',' (test|star_expr))* [','] ) */
2197     expr_ty elt;
2198     asdl_seq *comps;
2199     node *ch;
2200 
2201     assert(NCH(n) > 1);
2202 
2203     ch = CHILD(n, 0);
2204     elt = ast_for_expr(c, ch);
2205     if (!elt)
2206         return NULL;
2207     if (elt->kind == Starred_kind) {
2208         ast_error(c, ch, "iterable unpacking cannot be used in comprehension");
2209         return NULL;
2210     }
2211 
2212     comps = ast_for_comprehension(c, CHILD(n, 1));
2213     if (!comps)
2214         return NULL;
2215 
2216     if (type == COMP_GENEXP)
2217         return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset,
2218                             n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2219     else if (type == COMP_LISTCOMP)
2220         return ListComp(elt, comps, LINENO(n), n->n_col_offset,
2221                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2222     else if (type == COMP_SETCOMP)
2223         return SetComp(elt, comps, LINENO(n), n->n_col_offset,
2224                        n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2225     else
2226         /* Should never happen */
2227         return NULL;
2228 }
2229 
2230 /* Fills in the key, value pair corresponding to the dict element.  In case
2231  * of an unpacking, key is NULL.  *i is advanced by the number of ast
2232  * elements.  Iff successful, nonzero is returned.
2233  */
2234 static int
ast_for_dictelement(struct compiling * c,const node * n,int * i,expr_ty * key,expr_ty * value)2235 ast_for_dictelement(struct compiling *c, const node *n, int *i,
2236                     expr_ty *key, expr_ty *value)
2237 {
2238     expr_ty expression;
2239     if (TYPE(CHILD(n, *i)) == DOUBLESTAR) {
2240         assert(NCH(n) - *i >= 2);
2241 
2242         expression = ast_for_expr(c, CHILD(n, *i + 1));
2243         if (!expression)
2244             return 0;
2245         *key = NULL;
2246         *value = expression;
2247 
2248         *i += 2;
2249     }
2250     else {
2251         assert(NCH(n) - *i >= 3);
2252 
2253         expression = ast_for_expr(c, CHILD(n, *i));
2254         if (!expression)
2255             return 0;
2256         *key = expression;
2257 
2258         REQ(CHILD(n, *i + 1), COLON);
2259 
2260         expression = ast_for_expr(c, CHILD(n, *i + 2));
2261         if (!expression)
2262             return 0;
2263         *value = expression;
2264 
2265         *i += 3;
2266     }
2267     return 1;
2268 }
2269 
2270 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)2271 ast_for_dictcomp(struct compiling *c, const node *n)
2272 {
2273     expr_ty key, value;
2274     asdl_seq *comps;
2275     int i = 0;
2276 
2277     if (!ast_for_dictelement(c, n, &i, &key, &value))
2278         return NULL;
2279     assert(key);
2280     assert(NCH(n) - i >= 1);
2281 
2282     comps = ast_for_comprehension(c, CHILD(n, i));
2283     if (!comps)
2284         return NULL;
2285 
2286     return DictComp(key, value, comps, LINENO(n), n->n_col_offset,
2287                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2288 }
2289 
2290 static expr_ty
ast_for_dictdisplay(struct compiling * c,const node * n)2291 ast_for_dictdisplay(struct compiling *c, const node *n)
2292 {
2293     int i;
2294     int j;
2295     int size;
2296     asdl_seq *keys, *values;
2297 
2298     size = (NCH(n) + 1) / 3; /* +1 in case no trailing comma */
2299     keys = _Py_asdl_seq_new(size, c->c_arena);
2300     if (!keys)
2301         return NULL;
2302 
2303     values = _Py_asdl_seq_new(size, c->c_arena);
2304     if (!values)
2305         return NULL;
2306 
2307     j = 0;
2308     for (i = 0; i < NCH(n); i++) {
2309         expr_ty key, value;
2310 
2311         if (!ast_for_dictelement(c, n, &i, &key, &value))
2312             return NULL;
2313         asdl_seq_SET(keys, j, key);
2314         asdl_seq_SET(values, j, value);
2315 
2316         j++;
2317     }
2318     keys->size = j;
2319     values->size = j;
2320     return Dict(keys, values, LINENO(n), n->n_col_offset,
2321                 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2322 }
2323 
2324 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)2325 ast_for_genexp(struct compiling *c, const node *n)
2326 {
2327     assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
2328     return ast_for_itercomp(c, n, COMP_GENEXP);
2329 }
2330 
2331 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)2332 ast_for_listcomp(struct compiling *c, const node *n)
2333 {
2334     assert(TYPE(n) == (testlist_comp));
2335     return ast_for_itercomp(c, n, COMP_LISTCOMP);
2336 }
2337 
2338 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)2339 ast_for_setcomp(struct compiling *c, const node *n)
2340 {
2341     assert(TYPE(n) == (dictorsetmaker));
2342     return ast_for_itercomp(c, n, COMP_SETCOMP);
2343 }
2344 
2345 static expr_ty
ast_for_setdisplay(struct compiling * c,const node * n)2346 ast_for_setdisplay(struct compiling *c, const node *n)
2347 {
2348     int i;
2349     int size;
2350     asdl_seq *elts;
2351 
2352     assert(TYPE(n) == (dictorsetmaker));
2353     size = (NCH(n) + 1) / 2; /* +1 in case no trailing comma */
2354     elts = _Py_asdl_seq_new(size, c->c_arena);
2355     if (!elts)
2356         return NULL;
2357     for (i = 0; i < NCH(n); i += 2) {
2358         expr_ty expression;
2359         expression = ast_for_expr(c, CHILD(n, i));
2360         if (!expression)
2361             return NULL;
2362         asdl_seq_SET(elts, i / 2, expression);
2363     }
2364     return Set(elts, LINENO(n), n->n_col_offset,
2365                n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2366 }
2367 
2368 static expr_ty
ast_for_atom(struct compiling * c,const node * n)2369 ast_for_atom(struct compiling *c, const node *n)
2370 {
2371     /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']'
2372        | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+
2373        | '...' | 'None' | 'True' | 'False'
2374     */
2375     node *ch = CHILD(n, 0);
2376 
2377     switch (TYPE(ch)) {
2378     case NAME: {
2379         PyObject *name;
2380         const char *s = STR(ch);
2381         size_t len = strlen(s);
2382         if (len >= 4 && len <= 5) {
2383             if (!strcmp(s, "None"))
2384                 return Constant(Py_None, NULL, LINENO(n), n->n_col_offset,
2385                                 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2386             if (!strcmp(s, "True"))
2387                 return Constant(Py_True, NULL, LINENO(n), n->n_col_offset,
2388                                 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2389             if (!strcmp(s, "False"))
2390                 return Constant(Py_False, NULL, LINENO(n), n->n_col_offset,
2391                                 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2392         }
2393         name = new_identifier(s, c);
2394         if (!name)
2395             return NULL;
2396         /* All names start in Load context, but may later be changed. */
2397         return Name(name, Load, LINENO(n), n->n_col_offset,
2398                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2399     }
2400     case STRING: {
2401         expr_ty str = parsestrplus(c, n);
2402         if (!str) {
2403             const char *errtype = NULL;
2404             if (PyErr_ExceptionMatches(PyExc_UnicodeError))
2405                 errtype = "unicode error";
2406             else if (PyErr_ExceptionMatches(PyExc_ValueError))
2407                 errtype = "value error";
2408             if (errtype) {
2409                 PyObject *type, *value, *tback, *errstr;
2410                 PyErr_Fetch(&type, &value, &tback);
2411                 errstr = PyObject_Str(value);
2412                 if (errstr) {
2413                     ast_error(c, n, "(%s) %U", errtype, errstr);
2414                     Py_DECREF(errstr);
2415                 }
2416                 else {
2417                     PyErr_Clear();
2418                     ast_error(c, n, "(%s) unknown error", errtype);
2419                 }
2420                 Py_DECREF(type);
2421                 Py_XDECREF(value);
2422                 Py_XDECREF(tback);
2423             }
2424             return NULL;
2425         }
2426         return str;
2427     }
2428     case NUMBER: {
2429         PyObject *pynum;
2430         /* Underscores in numeric literals are only allowed in Python 3.6 or greater */
2431         /* Check for underscores here rather than in parse_number so we can report a line number on error */
2432         if (c->c_feature_version < 6 && strchr(STR(ch), '_') != NULL) {
2433             ast_error(c, ch,
2434                       "Underscores in numeric literals are only supported in Python 3.6 and greater");
2435             return NULL;
2436         }
2437         pynum = parsenumber(c, STR(ch));
2438         if (!pynum)
2439             return NULL;
2440 
2441         if (PyArena_AddPyObject(c->c_arena, pynum) < 0) {
2442             Py_DECREF(pynum);
2443             return NULL;
2444         }
2445         return Constant(pynum, NULL, LINENO(n), n->n_col_offset,
2446                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2447     }
2448     case ELLIPSIS: /* Ellipsis */
2449         return Constant(Py_Ellipsis, NULL, LINENO(n), n->n_col_offset,
2450                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2451     case LPAR: /* some parenthesized expressions */
2452         ch = CHILD(n, 1);
2453 
2454         if (TYPE(ch) == RPAR)
2455             return Tuple(NULL, Load, LINENO(n), n->n_col_offset,
2456                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2457 
2458         if (TYPE(ch) == yield_expr)
2459             return ast_for_expr(c, ch);
2460 
2461         /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2462         if (NCH(ch) == 1) {
2463             return ast_for_testlist(c, ch);
2464         }
2465 
2466         if (TYPE(CHILD(ch, 1)) == comp_for) {
2467             return copy_location(ast_for_genexp(c, ch), n);
2468         }
2469         else {
2470             return copy_location(ast_for_testlist(c, ch), n);
2471         }
2472     case LSQB: /* list (or list comprehension) */
2473         ch = CHILD(n, 1);
2474 
2475         if (TYPE(ch) == RSQB)
2476             return List(NULL, Load, LINENO(n), n->n_col_offset,
2477                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2478 
2479         REQ(ch, testlist_comp);
2480         if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
2481             asdl_seq *elts = seq_for_testlist(c, ch);
2482             if (!elts)
2483                 return NULL;
2484 
2485             return List(elts, Load, LINENO(n), n->n_col_offset,
2486                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2487         }
2488         else {
2489             return copy_location(ast_for_listcomp(c, ch), n);
2490         }
2491     case LBRACE: {
2492         /* dictorsetmaker: ( ((test ':' test | '**' test)
2493          *                    (comp_for | (',' (test ':' test | '**' test))* [','])) |
2494          *                   ((test | '*' test)
2495          *                    (comp_for | (',' (test | '*' test))* [','])) ) */
2496         expr_ty res;
2497         ch = CHILD(n, 1);
2498         if (TYPE(ch) == RBRACE) {
2499             /* It's an empty dict. */
2500             return Dict(NULL, NULL, LINENO(n), n->n_col_offset,
2501                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2502         }
2503         else {
2504             int is_dict = (TYPE(CHILD(ch, 0)) == DOUBLESTAR);
2505             if (NCH(ch) == 1 ||
2506                     (NCH(ch) > 1 &&
2507                      TYPE(CHILD(ch, 1)) == COMMA)) {
2508                 /* It's a set display. */
2509                 res = ast_for_setdisplay(c, ch);
2510             }
2511             else if (NCH(ch) > 1 &&
2512                     TYPE(CHILD(ch, 1)) == comp_for) {
2513                 /* It's a set comprehension. */
2514                 res = ast_for_setcomp(c, ch);
2515             }
2516             else if (NCH(ch) > 3 - is_dict &&
2517                     TYPE(CHILD(ch, 3 - is_dict)) == comp_for) {
2518                 /* It's a dictionary comprehension. */
2519                 if (is_dict) {
2520                     ast_error(c, n,
2521                               "dict unpacking cannot be used in dict comprehension");
2522                     return NULL;
2523                 }
2524                 res = ast_for_dictcomp(c, ch);
2525             }
2526             else {
2527                 /* It's a dictionary display. */
2528                 res = ast_for_dictdisplay(c, ch);
2529             }
2530             return copy_location(res, n);
2531         }
2532     }
2533     default:
2534         PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
2535         return NULL;
2536     }
2537 }
2538 
2539 static slice_ty
ast_for_slice(struct compiling * c,const node * n)2540 ast_for_slice(struct compiling *c, const node *n)
2541 {
2542     node *ch;
2543     expr_ty lower = NULL, upper = NULL, step = NULL;
2544 
2545     REQ(n, subscript);
2546 
2547     /*
2548        subscript: test | [test] ':' [test] [sliceop]
2549        sliceop: ':' [test]
2550     */
2551     ch = CHILD(n, 0);
2552     if (NCH(n) == 1 && TYPE(ch) == test) {
2553         /* 'step' variable hold no significance in terms of being used over
2554            other vars */
2555         step = ast_for_expr(c, ch);
2556         if (!step)
2557             return NULL;
2558 
2559         return Index(step, c->c_arena);
2560     }
2561 
2562     if (TYPE(ch) == test) {
2563         lower = ast_for_expr(c, ch);
2564         if (!lower)
2565             return NULL;
2566     }
2567 
2568     /* If there's an upper bound it's in the second or third position. */
2569     if (TYPE(ch) == COLON) {
2570         if (NCH(n) > 1) {
2571             node *n2 = CHILD(n, 1);
2572 
2573             if (TYPE(n2) == test) {
2574                 upper = ast_for_expr(c, n2);
2575                 if (!upper)
2576                     return NULL;
2577             }
2578         }
2579     } else if (NCH(n) > 2) {
2580         node *n2 = CHILD(n, 2);
2581 
2582         if (TYPE(n2) == test) {
2583             upper = ast_for_expr(c, n2);
2584             if (!upper)
2585                 return NULL;
2586         }
2587     }
2588 
2589     ch = CHILD(n, NCH(n) - 1);
2590     if (TYPE(ch) == sliceop) {
2591         if (NCH(ch) != 1) {
2592             ch = CHILD(ch, 1);
2593             if (TYPE(ch) == test) {
2594                 step = ast_for_expr(c, ch);
2595                 if (!step)
2596                     return NULL;
2597             }
2598         }
2599     }
2600 
2601     return Slice(lower, upper, step, c->c_arena);
2602 }
2603 
2604 static expr_ty
ast_for_binop(struct compiling * c,const node * n)2605 ast_for_binop(struct compiling *c, const node *n)
2606 {
2607     /* Must account for a sequence of expressions.
2608        How should A op B op C by represented?
2609        BinOp(BinOp(A, op, B), op, C).
2610     */
2611 
2612     int i, nops;
2613     expr_ty expr1, expr2, result;
2614     operator_ty newoperator;
2615 
2616     expr1 = ast_for_expr(c, CHILD(n, 0));
2617     if (!expr1)
2618         return NULL;
2619 
2620     expr2 = ast_for_expr(c, CHILD(n, 2));
2621     if (!expr2)
2622         return NULL;
2623 
2624     newoperator = get_operator(c, CHILD(n, 1));
2625     if (!newoperator)
2626         return NULL;
2627 
2628     result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2629                    CHILD(n, 2)->n_end_lineno, CHILD(n, 2)->n_end_col_offset,
2630                    c->c_arena);
2631     if (!result)
2632         return NULL;
2633 
2634     nops = (NCH(n) - 1) / 2;
2635     for (i = 1; i < nops; i++) {
2636         expr_ty tmp_result, tmp;
2637         const node* next_oper = CHILD(n, i * 2 + 1);
2638 
2639         newoperator = get_operator(c, next_oper);
2640         if (!newoperator)
2641             return NULL;
2642 
2643         tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
2644         if (!tmp)
2645             return NULL;
2646 
2647         tmp_result = BinOp(result, newoperator, tmp,
2648                            LINENO(n), n->n_col_offset,
2649                            CHILD(n, i * 2 + 2)->n_end_lineno,
2650                            CHILD(n, i * 2 + 2)->n_end_col_offset,
2651                            c->c_arena);
2652         if (!tmp_result)
2653             return NULL;
2654         result = tmp_result;
2655     }
2656     return result;
2657 }
2658 
2659 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr)2660 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr)
2661 {
2662     /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
2663        subscriptlist: subscript (',' subscript)* [',']
2664        subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
2665      */
2666     const node *n_copy = n;
2667     REQ(n, trailer);
2668     if (TYPE(CHILD(n, 0)) == LPAR) {
2669         if (NCH(n) == 2)
2670             return Call(left_expr, NULL, NULL, LINENO(n), n->n_col_offset,
2671                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2672         else
2673             return ast_for_call(c, CHILD(n, 1), left_expr, CHILD(n, 0), CHILD(n, 2));
2674     }
2675     else if (TYPE(CHILD(n, 0)) == DOT) {
2676         PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
2677         if (!attr_id)
2678             return NULL;
2679         return Attribute(left_expr, attr_id, Load,
2680                          LINENO(n), n->n_col_offset,
2681                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2682     }
2683     else {
2684         REQ(CHILD(n, 0), LSQB);
2685         REQ(CHILD(n, 2), RSQB);
2686         n = CHILD(n, 1);
2687         if (NCH(n) == 1) {
2688             slice_ty slc = ast_for_slice(c, CHILD(n, 0));
2689             if (!slc)
2690                 return NULL;
2691             return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset,
2692                              n_copy->n_end_lineno, n_copy->n_end_col_offset,
2693                              c->c_arena);
2694         }
2695         else {
2696             /* The grammar is ambiguous here. The ambiguity is resolved
2697                by treating the sequence as a tuple literal if there are
2698                no slice features.
2699             */
2700             Py_ssize_t j;
2701             slice_ty slc;
2702             expr_ty e;
2703             int simple = 1;
2704             asdl_seq *slices, *elts;
2705             slices = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2706             if (!slices)
2707                 return NULL;
2708             for (j = 0; j < NCH(n); j += 2) {
2709                 slc = ast_for_slice(c, CHILD(n, j));
2710                 if (!slc)
2711                     return NULL;
2712                 if (slc->kind != Index_kind)
2713                     simple = 0;
2714                 asdl_seq_SET(slices, j / 2, slc);
2715             }
2716             if (!simple) {
2717                 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
2718                                  Load, LINENO(n), n->n_col_offset,
2719                                  n_copy->n_end_lineno, n_copy->n_end_col_offset, c->c_arena);
2720             }
2721             /* extract Index values and put them in a Tuple */
2722             elts = _Py_asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
2723             if (!elts)
2724                 return NULL;
2725             for (j = 0; j < asdl_seq_LEN(slices); ++j) {
2726                 slc = (slice_ty)asdl_seq_GET(slices, j);
2727                 assert(slc->kind == Index_kind  && slc->v.Index.value);
2728                 asdl_seq_SET(elts, j, slc->v.Index.value);
2729             }
2730             e = Tuple(elts, Load, LINENO(n), n->n_col_offset,
2731                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2732             if (!e)
2733                 return NULL;
2734             return Subscript(left_expr, Index(e, c->c_arena),
2735                              Load, LINENO(n), n->n_col_offset,
2736                              n_copy->n_end_lineno, n_copy->n_end_col_offset, c->c_arena);
2737         }
2738     }
2739 }
2740 
2741 static expr_ty
ast_for_factor(struct compiling * c,const node * n)2742 ast_for_factor(struct compiling *c, const node *n)
2743 {
2744     expr_ty expression;
2745 
2746     expression = ast_for_expr(c, CHILD(n, 1));
2747     if (!expression)
2748         return NULL;
2749 
2750     switch (TYPE(CHILD(n, 0))) {
2751         case PLUS:
2752             return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
2753                            n->n_end_lineno, n->n_end_col_offset,
2754                            c->c_arena);
2755         case MINUS:
2756             return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
2757                            n->n_end_lineno, n->n_end_col_offset,
2758                            c->c_arena);
2759         case TILDE:
2760             return UnaryOp(Invert, expression, LINENO(n), n->n_col_offset,
2761                            n->n_end_lineno, n->n_end_col_offset,
2762                            c->c_arena);
2763     }
2764     PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
2765                  TYPE(CHILD(n, 0)));
2766     return NULL;
2767 }
2768 
2769 static expr_ty
ast_for_atom_expr(struct compiling * c,const node * n)2770 ast_for_atom_expr(struct compiling *c, const node *n)
2771 {
2772     int i, nch, start = 0;
2773     expr_ty e, tmp;
2774 
2775     REQ(n, atom_expr);
2776     nch = NCH(n);
2777 
2778     if (TYPE(CHILD(n, 0)) == AWAIT) {
2779         if (c->c_feature_version < 5) {
2780             ast_error(c, n,
2781                       "Await expressions are only supported in Python 3.5 and greater");
2782             return NULL;
2783         }
2784         start = 1;
2785         assert(nch > 1);
2786     }
2787 
2788     e = ast_for_atom(c, CHILD(n, start));
2789     if (!e)
2790         return NULL;
2791     if (nch == 1)
2792         return e;
2793     if (start && nch == 2) {
2794         return Await(e, LINENO(n), n->n_col_offset,
2795                      n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2796     }
2797 
2798     for (i = start + 1; i < nch; i++) {
2799         node *ch = CHILD(n, i);
2800         if (TYPE(ch) != trailer)
2801             break;
2802         tmp = ast_for_trailer(c, ch, e);
2803         if (!tmp)
2804             return NULL;
2805         tmp->lineno = e->lineno;
2806         tmp->col_offset = e->col_offset;
2807         e = tmp;
2808     }
2809 
2810     if (start) {
2811         /* there was an 'await' */
2812         return Await(e, LINENO(n), n->n_col_offset,
2813                      n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2814     }
2815     else {
2816         return e;
2817     }
2818 }
2819 
2820 static expr_ty
ast_for_power(struct compiling * c,const node * n)2821 ast_for_power(struct compiling *c, const node *n)
2822 {
2823     /* power: atom trailer* ('**' factor)*
2824      */
2825     expr_ty e;
2826     REQ(n, power);
2827     e = ast_for_atom_expr(c, CHILD(n, 0));
2828     if (!e)
2829         return NULL;
2830     if (NCH(n) == 1)
2831         return e;
2832     if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
2833         expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
2834         if (!f)
2835             return NULL;
2836         e = BinOp(e, Pow, f, LINENO(n), n->n_col_offset,
2837                   n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2838     }
2839     return e;
2840 }
2841 
2842 static expr_ty
ast_for_starred(struct compiling * c,const node * n)2843 ast_for_starred(struct compiling *c, const node *n)
2844 {
2845     expr_ty tmp;
2846     REQ(n, star_expr);
2847 
2848     tmp = ast_for_expr(c, CHILD(n, 1));
2849     if (!tmp)
2850         return NULL;
2851 
2852     /* The Load context is changed later. */
2853     return Starred(tmp, Load, LINENO(n), n->n_col_offset,
2854                    n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2855 }
2856 
2857 
2858 /* Do not name a variable 'expr'!  Will cause a compile error.
2859 */
2860 
2861 static expr_ty
ast_for_expr(struct compiling * c,const node * n)2862 ast_for_expr(struct compiling *c, const node *n)
2863 {
2864     /* handle the full range of simple expressions
2865        namedexpr_test: test [':=' test]
2866        test: or_test ['if' or_test 'else' test] | lambdef
2867        test_nocond: or_test | lambdef_nocond
2868        or_test: and_test ('or' and_test)*
2869        and_test: not_test ('and' not_test)*
2870        not_test: 'not' not_test | comparison
2871        comparison: expr (comp_op expr)*
2872        expr: xor_expr ('|' xor_expr)*
2873        xor_expr: and_expr ('^' and_expr)*
2874        and_expr: shift_expr ('&' shift_expr)*
2875        shift_expr: arith_expr (('<<'|'>>') arith_expr)*
2876        arith_expr: term (('+'|'-') term)*
2877        term: factor (('*'|'@'|'/'|'%'|'//') factor)*
2878        factor: ('+'|'-'|'~') factor | power
2879        power: atom_expr ['**' factor]
2880        atom_expr: [AWAIT] atom trailer*
2881        yield_expr: 'yield' [yield_arg]
2882     */
2883 
2884     asdl_seq *seq;
2885     int i;
2886 
2887  loop:
2888     switch (TYPE(n)) {
2889         case namedexpr_test:
2890             if (NCH(n) == 3)
2891                 return ast_for_namedexpr(c, n);
2892             /* Fallthrough */
2893         case test:
2894         case test_nocond:
2895             if (TYPE(CHILD(n, 0)) == lambdef ||
2896                 TYPE(CHILD(n, 0)) == lambdef_nocond)
2897                 return ast_for_lambdef(c, CHILD(n, 0));
2898             else if (NCH(n) > 1)
2899                 return ast_for_ifexpr(c, n);
2900             /* Fallthrough */
2901         case or_test:
2902         case and_test:
2903             if (NCH(n) == 1) {
2904                 n = CHILD(n, 0);
2905                 goto loop;
2906             }
2907             seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2908             if (!seq)
2909                 return NULL;
2910             for (i = 0; i < NCH(n); i += 2) {
2911                 expr_ty e = ast_for_expr(c, CHILD(n, i));
2912                 if (!e)
2913                     return NULL;
2914                 asdl_seq_SET(seq, i / 2, e);
2915             }
2916             if (!strcmp(STR(CHILD(n, 1)), "and"))
2917                 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
2918                               n->n_end_lineno, n->n_end_col_offset,
2919                               c->c_arena);
2920             assert(!strcmp(STR(CHILD(n, 1)), "or"));
2921             return BoolOp(Or, seq, LINENO(n), n->n_col_offset,
2922                           n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2923         case not_test:
2924             if (NCH(n) == 1) {
2925                 n = CHILD(n, 0);
2926                 goto loop;
2927             }
2928             else {
2929                 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2930                 if (!expression)
2931                     return NULL;
2932 
2933                 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
2934                                n->n_end_lineno, n->n_end_col_offset,
2935                                c->c_arena);
2936             }
2937         case comparison:
2938             if (NCH(n) == 1) {
2939                 n = CHILD(n, 0);
2940                 goto loop;
2941             }
2942             else {
2943                 expr_ty expression;
2944                 asdl_int_seq *ops;
2945                 asdl_seq *cmps;
2946                 ops = _Py_asdl_int_seq_new(NCH(n) / 2, c->c_arena);
2947                 if (!ops)
2948                     return NULL;
2949                 cmps = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
2950                 if (!cmps) {
2951                     return NULL;
2952                 }
2953                 for (i = 1; i < NCH(n); i += 2) {
2954                     cmpop_ty newoperator;
2955 
2956                     newoperator = ast_for_comp_op(c, CHILD(n, i));
2957                     if (!newoperator) {
2958                         return NULL;
2959                     }
2960 
2961                     expression = ast_for_expr(c, CHILD(n, i + 1));
2962                     if (!expression) {
2963                         return NULL;
2964                     }
2965 
2966                     asdl_seq_SET(ops, i / 2, newoperator);
2967                     asdl_seq_SET(cmps, i / 2, expression);
2968                 }
2969                 expression = ast_for_expr(c, CHILD(n, 0));
2970                 if (!expression) {
2971                     return NULL;
2972                 }
2973 
2974                 return Compare(expression, ops, cmps, LINENO(n), n->n_col_offset,
2975                                n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2976             }
2977 
2978         case star_expr:
2979             return ast_for_starred(c, n);
2980         /* The next five cases all handle BinOps.  The main body of code
2981            is the same in each case, but the switch turned inside out to
2982            reuse the code for each type of operator.
2983          */
2984         case expr:
2985         case xor_expr:
2986         case and_expr:
2987         case shift_expr:
2988         case arith_expr:
2989         case term:
2990             if (NCH(n) == 1) {
2991                 n = CHILD(n, 0);
2992                 goto loop;
2993             }
2994             return ast_for_binop(c, n);
2995         case yield_expr: {
2996             node *an = NULL;
2997             node *en = NULL;
2998             int is_from = 0;
2999             expr_ty exp = NULL;
3000             if (NCH(n) > 1)
3001                 an = CHILD(n, 1); /* yield_arg */
3002             if (an) {
3003                 en = CHILD(an, NCH(an) - 1);
3004                 if (NCH(an) == 2) {
3005                     is_from = 1;
3006                     exp = ast_for_expr(c, en);
3007                 }
3008                 else
3009                     exp = ast_for_testlist(c, en);
3010                 if (!exp)
3011                     return NULL;
3012             }
3013             if (is_from)
3014                 return YieldFrom(exp, LINENO(n), n->n_col_offset,
3015                                  n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3016             return Yield(exp, LINENO(n), n->n_col_offset,
3017                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3018         }
3019         case factor:
3020             if (NCH(n) == 1) {
3021                 n = CHILD(n, 0);
3022                 goto loop;
3023             }
3024             return ast_for_factor(c, n);
3025         case power:
3026             return ast_for_power(c, n);
3027         default:
3028             PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
3029             return NULL;
3030     }
3031     /* should never get here unless if error is set */
3032     return NULL;
3033 }
3034 
3035 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func,const node * maybegenbeg,const node * closepar)3036 ast_for_call(struct compiling *c, const node *n, expr_ty func,
3037              const node *maybegenbeg, const node *closepar)
3038 {
3039     /*
3040       arglist: argument (',' argument)*  [',']
3041       argument: ( test [comp_for] | '*' test | test '=' test | '**' test )
3042     */
3043 
3044     int i, nargs, nkeywords;
3045     int ndoublestars;
3046     asdl_seq *args;
3047     asdl_seq *keywords;
3048 
3049     REQ(n, arglist);
3050 
3051     nargs = 0;
3052     nkeywords = 0;
3053     for (i = 0; i < NCH(n); i++) {
3054         node *ch = CHILD(n, i);
3055         if (TYPE(ch) == argument) {
3056             if (NCH(ch) == 1)
3057                 nargs++;
3058             else if (TYPE(CHILD(ch, 1)) == comp_for) {
3059                 nargs++;
3060                 if (!maybegenbeg) {
3061                     ast_error(c, ch, "invalid syntax");
3062                     return NULL;
3063                 }
3064                 if (NCH(n) > 1) {
3065                     ast_error(c, ch, "Generator expression must be parenthesized");
3066                     return NULL;
3067                 }
3068             }
3069             else if (TYPE(CHILD(ch, 0)) == STAR)
3070                 nargs++;
3071             else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
3072                 nargs++;
3073             }
3074             else
3075                 /* TYPE(CHILD(ch, 0)) == DOUBLESTAR or keyword argument */
3076                 nkeywords++;
3077         }
3078     }
3079 
3080     args = _Py_asdl_seq_new(nargs, c->c_arena);
3081     if (!args)
3082         return NULL;
3083     keywords = _Py_asdl_seq_new(nkeywords, c->c_arena);
3084     if (!keywords)
3085         return NULL;
3086 
3087     nargs = 0;  /* positional arguments + iterable argument unpackings */
3088     nkeywords = 0;  /* keyword arguments + keyword argument unpackings */
3089     ndoublestars = 0;  /* just keyword argument unpackings */
3090     for (i = 0; i < NCH(n); i++) {
3091         node *ch = CHILD(n, i);
3092         if (TYPE(ch) == argument) {
3093             expr_ty e;
3094             node *chch = CHILD(ch, 0);
3095             if (NCH(ch) == 1) {
3096                 /* a positional argument */
3097                 if (nkeywords) {
3098                     if (ndoublestars) {
3099                         ast_error(c, chch,
3100                                   "positional argument follows "
3101                                   "keyword argument unpacking");
3102                     }
3103                     else {
3104                         ast_error(c, chch,
3105                                   "positional argument follows "
3106                                   "keyword argument");
3107                     }
3108                     return NULL;
3109                 }
3110                 e = ast_for_expr(c, chch);
3111                 if (!e)
3112                     return NULL;
3113                 asdl_seq_SET(args, nargs++, e);
3114             }
3115             else if (TYPE(chch) == STAR) {
3116                 /* an iterable argument unpacking */
3117                 expr_ty starred;
3118                 if (ndoublestars) {
3119                     ast_error(c, chch,
3120                               "iterable argument unpacking follows "
3121                               "keyword argument unpacking");
3122                     return NULL;
3123                 }
3124                 e = ast_for_expr(c, CHILD(ch, 1));
3125                 if (!e)
3126                     return NULL;
3127                 starred = Starred(e, Load, LINENO(chch),
3128                         chch->n_col_offset,
3129                         e->end_lineno, e->end_col_offset,
3130                         c->c_arena);
3131                 if (!starred)
3132                     return NULL;
3133                 asdl_seq_SET(args, nargs++, starred);
3134 
3135             }
3136             else if (TYPE(chch) == DOUBLESTAR) {
3137                 /* a keyword argument unpacking */
3138                 keyword_ty kw;
3139                 i++;
3140                 e = ast_for_expr(c, CHILD(ch, 1));
3141                 if (!e)
3142                     return NULL;
3143                 kw = keyword(NULL, e, c->c_arena);
3144                 asdl_seq_SET(keywords, nkeywords++, kw);
3145                 ndoublestars++;
3146             }
3147             else if (TYPE(CHILD(ch, 1)) == comp_for) {
3148                 /* the lone generator expression */
3149                 e = copy_location(ast_for_genexp(c, ch), maybegenbeg);
3150                 if (!e)
3151                     return NULL;
3152                 asdl_seq_SET(args, nargs++, e);
3153             }
3154             else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
3155                 /* treat colon equal as positional argument */
3156                 if (nkeywords) {
3157                     if (ndoublestars) {
3158                         ast_error(c, chch,
3159                                   "positional argument follows "
3160                                   "keyword argument unpacking");
3161                     }
3162                     else {
3163                         ast_error(c, chch,
3164                                   "positional argument follows "
3165                                   "keyword argument");
3166                     }
3167                     return NULL;
3168                 }
3169                 e = ast_for_namedexpr(c, ch);
3170                 if (!e)
3171                     return NULL;
3172                 asdl_seq_SET(args, nargs++, e);
3173             }
3174             else {
3175                 /* a keyword argument */
3176                 keyword_ty kw;
3177                 identifier key, tmp;
3178                 int k;
3179 
3180                 // To remain LL(1), the grammar accepts any test (basically, any
3181                 // expression) in the keyword slot of a call site.  So, we need
3182                 // to manually enforce that the keyword is a NAME here.
3183                 static const int name_tree[] = {
3184                     test,
3185                     or_test,
3186                     and_test,
3187                     not_test,
3188                     comparison,
3189                     expr,
3190                     xor_expr,
3191                     and_expr,
3192                     shift_expr,
3193                     arith_expr,
3194                     term,
3195                     factor,
3196                     power,
3197                     atom_expr,
3198                     atom,
3199                     0,
3200                 };
3201                 node *expr_node = chch;
3202                 for (int i = 0; name_tree[i]; i++) {
3203                     if (TYPE(expr_node) != name_tree[i])
3204                         break;
3205                     if (NCH(expr_node) != 1)
3206                         break;
3207                     expr_node = CHILD(expr_node, 0);
3208                 }
3209                 if (TYPE(expr_node) != NAME) {
3210                     ast_error(c, chch,
3211                               "expression cannot contain assignment, "
3212                               "perhaps you meant \"==\"?");
3213                     return NULL;
3214                 }
3215                 key = new_identifier(STR(expr_node), c);
3216                 if (key == NULL) {
3217                     return NULL;
3218                 }
3219                 if (forbidden_name(c, key, chch, 1)) {
3220                     return NULL;
3221                 }
3222                 for (k = 0; k < nkeywords; k++) {
3223                     tmp = ((keyword_ty)asdl_seq_GET(keywords, k))->arg;
3224                     if (tmp && !PyUnicode_Compare(tmp, key)) {
3225                         ast_error(c, chch,
3226                                   "keyword argument repeated");
3227                         return NULL;
3228                     }
3229                 }
3230                 e = ast_for_expr(c, CHILD(ch, 2));
3231                 if (!e)
3232                     return NULL;
3233                 kw = keyword(key, e, c->c_arena);
3234                 if (!kw)
3235                     return NULL;
3236                 asdl_seq_SET(keywords, nkeywords++, kw);
3237             }
3238         }
3239     }
3240 
3241     return Call(func, args, keywords, func->lineno, func->col_offset,
3242                 closepar->n_end_lineno, closepar->n_end_col_offset, c->c_arena);
3243 }
3244 
3245 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)3246 ast_for_testlist(struct compiling *c, const node* n)
3247 {
3248     /* testlist_comp: test (comp_for | (',' test)* [',']) */
3249     /* testlist: test (',' test)* [','] */
3250     assert(NCH(n) > 0);
3251     if (TYPE(n) == testlist_comp) {
3252         if (NCH(n) > 1)
3253             assert(TYPE(CHILD(n, 1)) != comp_for);
3254     }
3255     else {
3256         assert(TYPE(n) == testlist ||
3257                TYPE(n) == testlist_star_expr);
3258     }
3259     if (NCH(n) == 1)
3260         return ast_for_expr(c, CHILD(n, 0));
3261     else {
3262         asdl_seq *tmp = seq_for_testlist(c, n);
3263         if (!tmp)
3264             return NULL;
3265         return Tuple(tmp, Load, LINENO(n), n->n_col_offset,
3266                      n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3267     }
3268 }
3269 
3270 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)3271 ast_for_expr_stmt(struct compiling *c, const node *n)
3272 {
3273     REQ(n, expr_stmt);
3274     /* expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
3275                      [('=' (yield_expr|testlist_star_expr))+ [TYPE_COMMENT]] )
3276        annassign: ':' test ['=' (yield_expr|testlist)]
3277        testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
3278        augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
3279                    '<<=' | '>>=' | '**=' | '//=')
3280        test: ... here starts the operator precedence dance
3281      */
3282     int num = NCH(n);
3283 
3284     if (num == 1) {
3285         expr_ty e = ast_for_testlist(c, CHILD(n, 0));
3286         if (!e)
3287             return NULL;
3288 
3289         return Expr(e, LINENO(n), n->n_col_offset,
3290                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3291     }
3292     else if (TYPE(CHILD(n, 1)) == augassign) {
3293         expr_ty expr1, expr2;
3294         operator_ty newoperator;
3295         node *ch = CHILD(n, 0);
3296 
3297         expr1 = ast_for_testlist(c, ch);
3298         if (!expr1)
3299             return NULL;
3300         if(!set_context(c, expr1, Store, ch))
3301             return NULL;
3302         /* set_context checks that most expressions are not the left side.
3303           Augmented assignments can only have a name, a subscript, or an
3304           attribute on the left, though, so we have to explicitly check for
3305           those. */
3306         switch (expr1->kind) {
3307             case Name_kind:
3308             case Attribute_kind:
3309             case Subscript_kind:
3310                 break;
3311             default:
3312                 ast_error(c, ch, "illegal expression for augmented assignment");
3313                 return NULL;
3314         }
3315 
3316         ch = CHILD(n, 2);
3317         if (TYPE(ch) == testlist)
3318             expr2 = ast_for_testlist(c, ch);
3319         else
3320             expr2 = ast_for_expr(c, ch);
3321         if (!expr2)
3322             return NULL;
3323 
3324         newoperator = ast_for_augassign(c, CHILD(n, 1));
3325         if (!newoperator)
3326             return NULL;
3327 
3328         return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
3329                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3330     }
3331     else if (TYPE(CHILD(n, 1)) == annassign) {
3332         expr_ty expr1, expr2, expr3;
3333         node *ch = CHILD(n, 0);
3334         node *deep, *ann = CHILD(n, 1);
3335         int simple = 1;
3336 
3337         /* AnnAssigns are only allowed in Python 3.6 or greater */
3338         if (c->c_feature_version < 6) {
3339             ast_error(c, ch,
3340                       "Variable annotation syntax is only supported in Python 3.6 and greater");
3341             return NULL;
3342         }
3343 
3344         /* we keep track of parens to qualify (x) as expression not name */
3345         deep = ch;
3346         while (NCH(deep) == 1) {
3347             deep = CHILD(deep, 0);
3348         }
3349         if (NCH(deep) > 0 && TYPE(CHILD(deep, 0)) == LPAR) {
3350             simple = 0;
3351         }
3352         expr1 = ast_for_testlist(c, ch);
3353         if (!expr1) {
3354             return NULL;
3355         }
3356         switch (expr1->kind) {
3357             case Name_kind:
3358                 if (forbidden_name(c, expr1->v.Name.id, n, 0)) {
3359                     return NULL;
3360                 }
3361                 expr1->v.Name.ctx = Store;
3362                 break;
3363             case Attribute_kind:
3364                 if (forbidden_name(c, expr1->v.Attribute.attr, n, 1)) {
3365                     return NULL;
3366                 }
3367                 expr1->v.Attribute.ctx = Store;
3368                 break;
3369             case Subscript_kind:
3370                 expr1->v.Subscript.ctx = Store;
3371                 break;
3372             case List_kind:
3373                 ast_error(c, ch,
3374                           "only single target (not list) can be annotated");
3375                 return NULL;
3376             case Tuple_kind:
3377                 ast_error(c, ch,
3378                           "only single target (not tuple) can be annotated");
3379                 return NULL;
3380             default:
3381                 ast_error(c, ch,
3382                           "illegal target for annotation");
3383                 return NULL;
3384         }
3385 
3386         if (expr1->kind != Name_kind) {
3387             simple = 0;
3388         }
3389         ch = CHILD(ann, 1);
3390         expr2 = ast_for_expr(c, ch);
3391         if (!expr2) {
3392             return NULL;
3393         }
3394         if (NCH(ann) == 2) {
3395             return AnnAssign(expr1, expr2, NULL, simple,
3396                              LINENO(n), n->n_col_offset,
3397                              n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3398         }
3399         else {
3400             ch = CHILD(ann, 3);
3401             if (TYPE(ch) == testlist_star_expr) {
3402                 expr3 = ast_for_testlist(c, ch);
3403             }
3404             else {
3405                 expr3 = ast_for_expr(c, ch);
3406             }
3407             if (!expr3) {
3408                 return NULL;
3409             }
3410             return AnnAssign(expr1, expr2, expr3, simple,
3411                              LINENO(n), n->n_col_offset,
3412                              n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3413         }
3414     }
3415     else {
3416         int i, nch_minus_type, has_type_comment;
3417         asdl_seq *targets;
3418         node *value;
3419         expr_ty expression;
3420         string type_comment;
3421 
3422         /* a normal assignment */
3423         REQ(CHILD(n, 1), EQUAL);
3424 
3425         has_type_comment = TYPE(CHILD(n, num - 1)) == TYPE_COMMENT;
3426         nch_minus_type = num - has_type_comment;
3427 
3428         targets = _Py_asdl_seq_new(nch_minus_type / 2, c->c_arena);
3429         if (!targets)
3430             return NULL;
3431         for (i = 0; i < nch_minus_type - 2; i += 2) {
3432             expr_ty e;
3433             node *ch = CHILD(n, i);
3434             if (TYPE(ch) == yield_expr) {
3435                 ast_error(c, ch, "assignment to yield expression not possible");
3436                 return NULL;
3437             }
3438             e = ast_for_testlist(c, ch);
3439             if (!e)
3440               return NULL;
3441 
3442             /* set context to assign */
3443             if (!set_context(c, e, Store, CHILD(n, i)))
3444               return NULL;
3445 
3446             asdl_seq_SET(targets, i / 2, e);
3447         }
3448         value = CHILD(n, nch_minus_type - 1);
3449         if (TYPE(value) == testlist_star_expr)
3450             expression = ast_for_testlist(c, value);
3451         else
3452             expression = ast_for_expr(c, value);
3453         if (!expression)
3454             return NULL;
3455         if (has_type_comment) {
3456             type_comment = NEW_TYPE_COMMENT(CHILD(n, nch_minus_type));
3457             if (!type_comment)
3458                 return NULL;
3459         }
3460         else
3461             type_comment = NULL;
3462         return Assign(targets, expression, type_comment, LINENO(n), n->n_col_offset,
3463                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3464     }
3465 }
3466 
3467 
3468 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)3469 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
3470 {
3471     asdl_seq *seq;
3472     int i;
3473     expr_ty e;
3474 
3475     REQ(n, exprlist);
3476 
3477     seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3478     if (!seq)
3479         return NULL;
3480     for (i = 0; i < NCH(n); i += 2) {
3481         e = ast_for_expr(c, CHILD(n, i));
3482         if (!e)
3483             return NULL;
3484         asdl_seq_SET(seq, i / 2, e);
3485         if (context && !set_context(c, e, context, CHILD(n, i)))
3486             return NULL;
3487     }
3488     return seq;
3489 }
3490 
3491 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)3492 ast_for_del_stmt(struct compiling *c, const node *n)
3493 {
3494     asdl_seq *expr_list;
3495 
3496     /* del_stmt: 'del' exprlist */
3497     REQ(n, del_stmt);
3498 
3499     expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
3500     if (!expr_list)
3501         return NULL;
3502     return Delete(expr_list, LINENO(n), n->n_col_offset,
3503                   n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3504 }
3505 
3506 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)3507 ast_for_flow_stmt(struct compiling *c, const node *n)
3508 {
3509     /*
3510       flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
3511                  | yield_stmt
3512       break_stmt: 'break'
3513       continue_stmt: 'continue'
3514       return_stmt: 'return' [testlist]
3515       yield_stmt: yield_expr
3516       yield_expr: 'yield' testlist | 'yield' 'from' test
3517       raise_stmt: 'raise' [test [',' test [',' test]]]
3518     */
3519     node *ch;
3520 
3521     REQ(n, flow_stmt);
3522     ch = CHILD(n, 0);
3523     switch (TYPE(ch)) {
3524         case break_stmt:
3525             return Break(LINENO(n), n->n_col_offset,
3526                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3527         case continue_stmt:
3528             return Continue(LINENO(n), n->n_col_offset,
3529                             n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3530         case yield_stmt: { /* will reduce to yield_expr */
3531             expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
3532             if (!exp)
3533                 return NULL;
3534             return Expr(exp, LINENO(n), n->n_col_offset,
3535                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3536         }
3537         case return_stmt:
3538             if (NCH(ch) == 1)
3539                 return Return(NULL, LINENO(n), n->n_col_offset,
3540                               n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3541             else {
3542                 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
3543                 if (!expression)
3544                     return NULL;
3545                 return Return(expression, LINENO(n), n->n_col_offset,
3546                               n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3547             }
3548         case raise_stmt:
3549             if (NCH(ch) == 1)
3550                 return Raise(NULL, NULL, LINENO(n), n->n_col_offset,
3551                              n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3552             else if (NCH(ch) >= 2) {
3553                 expr_ty cause = NULL;
3554                 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
3555                 if (!expression)
3556                     return NULL;
3557                 if (NCH(ch) == 4) {
3558                     cause = ast_for_expr(c, CHILD(ch, 3));
3559                     if (!cause)
3560                         return NULL;
3561                 }
3562                 return Raise(expression, cause, LINENO(n), n->n_col_offset,
3563                              n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3564             }
3565             /* fall through */
3566         default:
3567             PyErr_Format(PyExc_SystemError,
3568                          "unexpected flow_stmt: %d", TYPE(ch));
3569             return NULL;
3570     }
3571 }
3572 
3573 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)3574 alias_for_import_name(struct compiling *c, const node *n, int store)
3575 {
3576     /*
3577       import_as_name: NAME ['as' NAME]
3578       dotted_as_name: dotted_name ['as' NAME]
3579       dotted_name: NAME ('.' NAME)*
3580     */
3581     identifier str, name;
3582 
3583  loop:
3584     switch (TYPE(n)) {
3585         case import_as_name: {
3586             node *name_node = CHILD(n, 0);
3587             str = NULL;
3588             name = NEW_IDENTIFIER(name_node);
3589             if (!name)
3590                 return NULL;
3591             if (NCH(n) == 3) {
3592                 node *str_node = CHILD(n, 2);
3593                 str = NEW_IDENTIFIER(str_node);
3594                 if (!str)
3595                     return NULL;
3596                 if (store && forbidden_name(c, str, str_node, 0))
3597                     return NULL;
3598             }
3599             else {
3600                 if (forbidden_name(c, name, name_node, 0))
3601                     return NULL;
3602             }
3603             return alias(name, str, c->c_arena);
3604         }
3605         case dotted_as_name:
3606             if (NCH(n) == 1) {
3607                 n = CHILD(n, 0);
3608                 goto loop;
3609             }
3610             else {
3611                 node *asname_node = CHILD(n, 2);
3612                 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
3613                 if (!a)
3614                     return NULL;
3615                 assert(!a->asname);
3616                 a->asname = NEW_IDENTIFIER(asname_node);
3617                 if (!a->asname)
3618                     return NULL;
3619                 if (forbidden_name(c, a->asname, asname_node, 0))
3620                     return NULL;
3621                 return a;
3622             }
3623         case dotted_name:
3624             if (NCH(n) == 1) {
3625                 node *name_node = CHILD(n, 0);
3626                 name = NEW_IDENTIFIER(name_node);
3627                 if (!name)
3628                     return NULL;
3629                 if (store && forbidden_name(c, name, name_node, 0))
3630                     return NULL;
3631                 return alias(name, NULL, c->c_arena);
3632             }
3633             else {
3634                 /* Create a string of the form "a.b.c" */
3635                 int i;
3636                 size_t len;
3637                 char *s;
3638                 PyObject *uni;
3639 
3640                 len = 0;
3641                 for (i = 0; i < NCH(n); i += 2)
3642                     /* length of string plus one for the dot */
3643                     len += strlen(STR(CHILD(n, i))) + 1;
3644                 len--; /* the last name doesn't have a dot */
3645                 str = PyBytes_FromStringAndSize(NULL, len);
3646                 if (!str)
3647                     return NULL;
3648                 s = PyBytes_AS_STRING(str);
3649                 if (!s)
3650                     return NULL;
3651                 for (i = 0; i < NCH(n); i += 2) {
3652                     char *sch = STR(CHILD(n, i));
3653                     strcpy(s, STR(CHILD(n, i)));
3654                     s += strlen(sch);
3655                     *s++ = '.';
3656                 }
3657                 --s;
3658                 *s = '\0';
3659                 uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str),
3660                                            PyBytes_GET_SIZE(str),
3661                                            NULL);
3662                 Py_DECREF(str);
3663                 if (!uni)
3664                     return NULL;
3665                 str = uni;
3666                 PyUnicode_InternInPlace(&str);
3667                 if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3668                     Py_DECREF(str);
3669                     return NULL;
3670                 }
3671                 return alias(str, NULL, c->c_arena);
3672             }
3673         case STAR:
3674             str = PyUnicode_InternFromString("*");
3675             if (!str)
3676                 return NULL;
3677             if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3678                 Py_DECREF(str);
3679                 return NULL;
3680             }
3681             return alias(str, NULL, c->c_arena);
3682         default:
3683             PyErr_Format(PyExc_SystemError,
3684                          "unexpected import name: %d", TYPE(n));
3685             return NULL;
3686     }
3687 
3688     PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
3689     return NULL;
3690 }
3691 
3692 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)3693 ast_for_import_stmt(struct compiling *c, const node *n)
3694 {
3695     /*
3696       import_stmt: import_name | import_from
3697       import_name: 'import' dotted_as_names
3698       import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+)
3699                    'import' ('*' | '(' import_as_names ')' | import_as_names)
3700     */
3701     int lineno;
3702     int col_offset;
3703     int i;
3704     asdl_seq *aliases;
3705 
3706     REQ(n, import_stmt);
3707     lineno = LINENO(n);
3708     col_offset = n->n_col_offset;
3709     n = CHILD(n, 0);
3710     if (TYPE(n) == import_name) {
3711         n = CHILD(n, 1);
3712         REQ(n, dotted_as_names);
3713         aliases = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3714         if (!aliases)
3715                 return NULL;
3716         for (i = 0; i < NCH(n); i += 2) {
3717             alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3718             if (!import_alias)
3719                 return NULL;
3720             asdl_seq_SET(aliases, i / 2, import_alias);
3721         }
3722         // Even though n is modified above, the end position is not changed
3723         return Import(aliases, lineno, col_offset,
3724                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3725     }
3726     else if (TYPE(n) == import_from) {
3727         int n_children;
3728         int idx, ndots = 0;
3729         const node *n_copy = n;
3730         alias_ty mod = NULL;
3731         identifier modname = NULL;
3732 
3733        /* Count the number of dots (for relative imports) and check for the
3734           optional module name */
3735         for (idx = 1; idx < NCH(n); idx++) {
3736             if (TYPE(CHILD(n, idx)) == dotted_name) {
3737                 mod = alias_for_import_name(c, CHILD(n, idx), 0);
3738                 if (!mod)
3739                     return NULL;
3740                 idx++;
3741                 break;
3742             } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) {
3743                 /* three consecutive dots are tokenized as one ELLIPSIS */
3744                 ndots += 3;
3745                 continue;
3746             } else if (TYPE(CHILD(n, idx)) != DOT) {
3747                 break;
3748             }
3749             ndots++;
3750         }
3751         idx++; /* skip over the 'import' keyword */
3752         switch (TYPE(CHILD(n, idx))) {
3753         case STAR:
3754             /* from ... import * */
3755             n = CHILD(n, idx);
3756             n_children = 1;
3757             break;
3758         case LPAR:
3759             /* from ... import (x, y, z) */
3760             n = CHILD(n, idx + 1);
3761             n_children = NCH(n);
3762             break;
3763         case import_as_names:
3764             /* from ... import x, y, z */
3765             n = CHILD(n, idx);
3766             n_children = NCH(n);
3767             if (n_children % 2 == 0) {
3768                 ast_error(c, n,
3769                           "trailing comma not allowed without"
3770                           " surrounding parentheses");
3771                 return NULL;
3772             }
3773             break;
3774         default:
3775             ast_error(c, n, "Unexpected node-type in from-import");
3776             return NULL;
3777         }
3778 
3779         aliases = _Py_asdl_seq_new((n_children + 1) / 2, c->c_arena);
3780         if (!aliases)
3781             return NULL;
3782 
3783         /* handle "from ... import *" special b/c there's no children */
3784         if (TYPE(n) == STAR) {
3785             alias_ty import_alias = alias_for_import_name(c, n, 1);
3786             if (!import_alias)
3787                 return NULL;
3788             asdl_seq_SET(aliases, 0, import_alias);
3789         }
3790         else {
3791             for (i = 0; i < NCH(n); i += 2) {
3792                 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3793                 if (!import_alias)
3794                     return NULL;
3795                 asdl_seq_SET(aliases, i / 2, import_alias);
3796             }
3797         }
3798         if (mod != NULL)
3799             modname = mod->name;
3800         return ImportFrom(modname, aliases, ndots, lineno, col_offset,
3801                           n_copy->n_end_lineno, n_copy->n_end_col_offset,
3802                           c->c_arena);
3803     }
3804     PyErr_Format(PyExc_SystemError,
3805                  "unknown import statement: starts with command '%s'",
3806                  STR(CHILD(n, 0)));
3807     return NULL;
3808 }
3809 
3810 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)3811 ast_for_global_stmt(struct compiling *c, const node *n)
3812 {
3813     /* global_stmt: 'global' NAME (',' NAME)* */
3814     identifier name;
3815     asdl_seq *s;
3816     int i;
3817 
3818     REQ(n, global_stmt);
3819     s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3820     if (!s)
3821         return NULL;
3822     for (i = 1; i < NCH(n); i += 2) {
3823         name = NEW_IDENTIFIER(CHILD(n, i));
3824         if (!name)
3825             return NULL;
3826         asdl_seq_SET(s, i / 2, name);
3827     }
3828     return Global(s, LINENO(n), n->n_col_offset,
3829                   n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3830 }
3831 
3832 static stmt_ty
ast_for_nonlocal_stmt(struct compiling * c,const node * n)3833 ast_for_nonlocal_stmt(struct compiling *c, const node *n)
3834 {
3835     /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */
3836     identifier name;
3837     asdl_seq *s;
3838     int i;
3839 
3840     REQ(n, nonlocal_stmt);
3841     s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3842     if (!s)
3843         return NULL;
3844     for (i = 1; i < NCH(n); i += 2) {
3845         name = NEW_IDENTIFIER(CHILD(n, i));
3846         if (!name)
3847             return NULL;
3848         asdl_seq_SET(s, i / 2, name);
3849     }
3850     return Nonlocal(s, LINENO(n), n->n_col_offset,
3851                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3852 }
3853 
3854 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)3855 ast_for_assert_stmt(struct compiling *c, const node *n)
3856 {
3857     /* assert_stmt: 'assert' test [',' test] */
3858     REQ(n, assert_stmt);
3859     if (NCH(n) == 2) {
3860         expr_ty expression = ast_for_expr(c, CHILD(n, 1));
3861         if (!expression)
3862             return NULL;
3863         return Assert(expression, NULL, LINENO(n), n->n_col_offset,
3864                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3865     }
3866     else if (NCH(n) == 4) {
3867         expr_ty expr1, expr2;
3868 
3869         expr1 = ast_for_expr(c, CHILD(n, 1));
3870         if (!expr1)
3871             return NULL;
3872         expr2 = ast_for_expr(c, CHILD(n, 3));
3873         if (!expr2)
3874             return NULL;
3875 
3876         return Assert(expr1, expr2, LINENO(n), n->n_col_offset,
3877                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3878     }
3879     PyErr_Format(PyExc_SystemError,
3880                  "improper number of parts to 'assert' statement: %d",
3881                  NCH(n));
3882     return NULL;
3883 }
3884 
3885 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)3886 ast_for_suite(struct compiling *c, const node *n)
3887 {
3888     /* suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
3889     asdl_seq *seq;
3890     stmt_ty s;
3891     int i, total, num, end, pos = 0;
3892     node *ch;
3893 
3894     if (TYPE(n) != func_body_suite) {
3895         REQ(n, suite);
3896     }
3897 
3898     total = num_stmts(n);
3899     seq = _Py_asdl_seq_new(total, c->c_arena);
3900     if (!seq)
3901         return NULL;
3902     if (TYPE(CHILD(n, 0)) == simple_stmt) {
3903         n = CHILD(n, 0);
3904         /* simple_stmt always ends with a NEWLINE,
3905            and may have a trailing SEMI
3906         */
3907         end = NCH(n) - 1;
3908         if (TYPE(CHILD(n, end - 1)) == SEMI)
3909             end--;
3910         /* loop by 2 to skip semi-colons */
3911         for (i = 0; i < end; i += 2) {
3912             ch = CHILD(n, i);
3913             s = ast_for_stmt(c, ch);
3914             if (!s)
3915                 return NULL;
3916             asdl_seq_SET(seq, pos++, s);
3917         }
3918     }
3919     else {
3920         i = 2;
3921         if (TYPE(CHILD(n, 1)) == TYPE_COMMENT) {
3922             i += 2;
3923             REQ(CHILD(n, 2), NEWLINE);
3924         }
3925 
3926         for (; i < (NCH(n) - 1); i++) {
3927             ch = CHILD(n, i);
3928             REQ(ch, stmt);
3929             num = num_stmts(ch);
3930             if (num == 1) {
3931                 /* small_stmt or compound_stmt with only one child */
3932                 s = ast_for_stmt(c, ch);
3933                 if (!s)
3934                     return NULL;
3935                 asdl_seq_SET(seq, pos++, s);
3936             }
3937             else {
3938                 int j;
3939                 ch = CHILD(ch, 0);
3940                 REQ(ch, simple_stmt);
3941                 for (j = 0; j < NCH(ch); j += 2) {
3942                     /* statement terminates with a semi-colon ';' */
3943                     if (NCH(CHILD(ch, j)) == 0) {
3944                         assert((j + 1) == NCH(ch));
3945                         break;
3946                     }
3947                     s = ast_for_stmt(c, CHILD(ch, j));
3948                     if (!s)
3949                         return NULL;
3950                     asdl_seq_SET(seq, pos++, s);
3951                 }
3952             }
3953         }
3954     }
3955     assert(pos == seq->size);
3956     return seq;
3957 }
3958 
3959 static void
get_last_end_pos(asdl_seq * s,int * end_lineno,int * end_col_offset)3960 get_last_end_pos(asdl_seq *s, int *end_lineno, int *end_col_offset)
3961 {
3962     Py_ssize_t tot = asdl_seq_LEN(s);
3963     // There must be no empty suites.
3964     assert(tot > 0);
3965     stmt_ty last = asdl_seq_GET(s, tot - 1);
3966     *end_lineno = last->end_lineno;
3967     *end_col_offset = last->end_col_offset;
3968 }
3969 
3970 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)3971 ast_for_if_stmt(struct compiling *c, const node *n)
3972 {
3973     /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
3974        ['else' ':' suite]
3975     */
3976     char *s;
3977     int end_lineno, end_col_offset;
3978 
3979     REQ(n, if_stmt);
3980 
3981     if (NCH(n) == 4) {
3982         expr_ty expression;
3983         asdl_seq *suite_seq;
3984 
3985         expression = ast_for_expr(c, CHILD(n, 1));
3986         if (!expression)
3987             return NULL;
3988         suite_seq = ast_for_suite(c, CHILD(n, 3));
3989         if (!suite_seq)
3990             return NULL;
3991         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
3992 
3993         return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
3994                   end_lineno, end_col_offset, c->c_arena);
3995     }
3996 
3997     s = STR(CHILD(n, 4));
3998     /* s[2], the third character in the string, will be
3999        's' for el_s_e, or
4000        'i' for el_i_f
4001     */
4002     if (s[2] == 's') {
4003         expr_ty expression;
4004         asdl_seq *seq1, *seq2;
4005 
4006         expression = ast_for_expr(c, CHILD(n, 1));
4007         if (!expression)
4008             return NULL;
4009         seq1 = ast_for_suite(c, CHILD(n, 3));
4010         if (!seq1)
4011             return NULL;
4012         seq2 = ast_for_suite(c, CHILD(n, 6));
4013         if (!seq2)
4014             return NULL;
4015         get_last_end_pos(seq2, &end_lineno, &end_col_offset);
4016 
4017         return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
4018                   end_lineno, end_col_offset, c->c_arena);
4019     }
4020     else if (s[2] == 'i') {
4021         int i, n_elif, has_else = 0;
4022         expr_ty expression;
4023         asdl_seq *suite_seq;
4024         asdl_seq *orelse = NULL;
4025         n_elif = NCH(n) - 4;
4026         /* must reference the child n_elif+1 since 'else' token is third,
4027            not fourth, child from the end. */
4028         if (TYPE(CHILD(n, (n_elif + 1))) == NAME
4029             && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
4030             has_else = 1;
4031             n_elif -= 3;
4032         }
4033         n_elif /= 4;
4034 
4035         if (has_else) {
4036             asdl_seq *suite_seq2;
4037 
4038             orelse = _Py_asdl_seq_new(1, c->c_arena);
4039             if (!orelse)
4040                 return NULL;
4041             expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
4042             if (!expression)
4043                 return NULL;
4044             suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
4045             if (!suite_seq)
4046                 return NULL;
4047             suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
4048             if (!suite_seq2)
4049                 return NULL;
4050             get_last_end_pos(suite_seq2, &end_lineno, &end_col_offset);
4051 
4052             asdl_seq_SET(orelse, 0,
4053                          If(expression, suite_seq, suite_seq2,
4054                             LINENO(CHILD(n, NCH(n) - 7)),
4055                             CHILD(n, NCH(n) - 7)->n_col_offset,
4056                             end_lineno, end_col_offset, c->c_arena));
4057             /* the just-created orelse handled the last elif */
4058             n_elif--;
4059         }
4060 
4061         for (i = 0; i < n_elif; i++) {
4062             int off = 5 + (n_elif - i - 1) * 4;
4063             asdl_seq *newobj = _Py_asdl_seq_new(1, c->c_arena);
4064             if (!newobj)
4065                 return NULL;
4066             expression = ast_for_expr(c, CHILD(n, off));
4067             if (!expression)
4068                 return NULL;
4069             suite_seq = ast_for_suite(c, CHILD(n, off + 2));
4070             if (!suite_seq)
4071                 return NULL;
4072 
4073             if (orelse != NULL) {
4074                 get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4075             } else {
4076                 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4077             }
4078             asdl_seq_SET(newobj, 0,
4079                          If(expression, suite_seq, orelse,
4080                             LINENO(CHILD(n, off - 1)),
4081                             CHILD(n, off - 1)->n_col_offset,
4082                             end_lineno, end_col_offset, c->c_arena));
4083             orelse = newobj;
4084         }
4085         expression = ast_for_expr(c, CHILD(n, 1));
4086         if (!expression)
4087             return NULL;
4088         suite_seq = ast_for_suite(c, CHILD(n, 3));
4089         if (!suite_seq)
4090             return NULL;
4091         get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4092         return If(expression, suite_seq, orelse,
4093                   LINENO(n), n->n_col_offset,
4094                   end_lineno, end_col_offset, c->c_arena);
4095     }
4096 
4097     PyErr_Format(PyExc_SystemError,
4098                  "unexpected token in 'if' statement: %s", s);
4099     return NULL;
4100 }
4101 
4102 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)4103 ast_for_while_stmt(struct compiling *c, const node *n)
4104 {
4105     /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
4106     REQ(n, while_stmt);
4107     int end_lineno, end_col_offset;
4108 
4109     if (NCH(n) == 4) {
4110         expr_ty expression;
4111         asdl_seq *suite_seq;
4112 
4113         expression = ast_for_expr(c, CHILD(n, 1));
4114         if (!expression)
4115             return NULL;
4116         suite_seq = ast_for_suite(c, CHILD(n, 3));
4117         if (!suite_seq)
4118             return NULL;
4119         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4120         return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
4121                      end_lineno, end_col_offset, c->c_arena);
4122     }
4123     else if (NCH(n) == 7) {
4124         expr_ty expression;
4125         asdl_seq *seq1, *seq2;
4126 
4127         expression = ast_for_expr(c, CHILD(n, 1));
4128         if (!expression)
4129             return NULL;
4130         seq1 = ast_for_suite(c, CHILD(n, 3));
4131         if (!seq1)
4132             return NULL;
4133         seq2 = ast_for_suite(c, CHILD(n, 6));
4134         if (!seq2)
4135             return NULL;
4136         get_last_end_pos(seq2, &end_lineno, &end_col_offset);
4137 
4138         return While(expression, seq1, seq2, LINENO(n), n->n_col_offset,
4139                      end_lineno, end_col_offset, c->c_arena);
4140     }
4141 
4142     PyErr_Format(PyExc_SystemError,
4143                  "wrong number of tokens for 'while' statement: %d",
4144                  NCH(n));
4145     return NULL;
4146 }
4147 
4148 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n0,bool is_async)4149 ast_for_for_stmt(struct compiling *c, const node *n0, bool is_async)
4150 {
4151     const node * const n = is_async ? CHILD(n0, 1) : n0;
4152     asdl_seq *_target, *seq = NULL, *suite_seq;
4153     expr_ty expression;
4154     expr_ty target, first;
4155     const node *node_target;
4156     int end_lineno, end_col_offset;
4157     int has_type_comment;
4158     string type_comment;
4159 
4160     if (is_async && c->c_feature_version < 5) {
4161         ast_error(c, n,
4162                   "Async for loops are only supported in Python 3.5 and greater");
4163         return NULL;
4164     }
4165 
4166     /* for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite] */
4167     REQ(n, for_stmt);
4168 
4169     has_type_comment = TYPE(CHILD(n, 5)) == TYPE_COMMENT;
4170 
4171     if (NCH(n) == 9 + has_type_comment) {
4172         seq = ast_for_suite(c, CHILD(n, 8 + has_type_comment));
4173         if (!seq)
4174             return NULL;
4175     }
4176 
4177     node_target = CHILD(n, 1);
4178     _target = ast_for_exprlist(c, node_target, Store);
4179     if (!_target)
4180         return NULL;
4181     /* Check the # of children rather than the length of _target, since
4182        for x, in ... has 1 element in _target, but still requires a Tuple. */
4183     first = (expr_ty)asdl_seq_GET(_target, 0);
4184     if (NCH(node_target) == 1)
4185         target = first;
4186     else
4187         target = Tuple(_target, Store, first->lineno, first->col_offset,
4188                        node_target->n_end_lineno, node_target->n_end_col_offset,
4189                        c->c_arena);
4190 
4191     expression = ast_for_testlist(c, CHILD(n, 3));
4192     if (!expression)
4193         return NULL;
4194     suite_seq = ast_for_suite(c, CHILD(n, 5 + has_type_comment));
4195     if (!suite_seq)
4196         return NULL;
4197 
4198     if (seq != NULL) {
4199         get_last_end_pos(seq, &end_lineno, &end_col_offset);
4200     } else {
4201         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4202     }
4203 
4204     if (has_type_comment) {
4205         type_comment = NEW_TYPE_COMMENT(CHILD(n, 5));
4206         if (!type_comment)
4207             return NULL;
4208     }
4209     else
4210         type_comment = NULL;
4211 
4212     if (is_async)
4213         return AsyncFor(target, expression, suite_seq, seq, type_comment,
4214                         LINENO(n0), n0->n_col_offset,
4215                         end_lineno, end_col_offset, c->c_arena);
4216     else
4217         return For(target, expression, suite_seq, seq, type_comment,
4218                    LINENO(n), n->n_col_offset,
4219                    end_lineno, end_col_offset, c->c_arena);
4220 }
4221 
4222 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)4223 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
4224 {
4225     /* except_clause: 'except' [test ['as' test]] */
4226     int end_lineno, end_col_offset;
4227     REQ(exc, except_clause);
4228     REQ(body, suite);
4229 
4230     if (NCH(exc) == 1) {
4231         asdl_seq *suite_seq = ast_for_suite(c, body);
4232         if (!suite_seq)
4233             return NULL;
4234         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4235 
4236         return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
4237                              exc->n_col_offset,
4238                              end_lineno, end_col_offset, c->c_arena);
4239     }
4240     else if (NCH(exc) == 2) {
4241         expr_ty expression;
4242         asdl_seq *suite_seq;
4243 
4244         expression = ast_for_expr(c, CHILD(exc, 1));
4245         if (!expression)
4246             return NULL;
4247         suite_seq = ast_for_suite(c, body);
4248         if (!suite_seq)
4249             return NULL;
4250         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4251 
4252         return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
4253                              exc->n_col_offset,
4254                              end_lineno, end_col_offset, c->c_arena);
4255     }
4256     else if (NCH(exc) == 4) {
4257         asdl_seq *suite_seq;
4258         expr_ty expression;
4259         identifier e = NEW_IDENTIFIER(CHILD(exc, 3));
4260         if (!e)
4261             return NULL;
4262         if (forbidden_name(c, e, CHILD(exc, 3), 0))
4263             return NULL;
4264         expression = ast_for_expr(c, CHILD(exc, 1));
4265         if (!expression)
4266             return NULL;
4267         suite_seq = ast_for_suite(c, body);
4268         if (!suite_seq)
4269             return NULL;
4270         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4271 
4272         return ExceptHandler(expression, e, suite_seq, LINENO(exc),
4273                              exc->n_col_offset,
4274                              end_lineno, end_col_offset, c->c_arena);
4275     }
4276 
4277     PyErr_Format(PyExc_SystemError,
4278                  "wrong number of children for 'except' clause: %d",
4279                  NCH(exc));
4280     return NULL;
4281 }
4282 
4283 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)4284 ast_for_try_stmt(struct compiling *c, const node *n)
4285 {
4286     const int nch = NCH(n);
4287     int end_lineno, end_col_offset, n_except = (nch - 3)/3;
4288     asdl_seq *body, *handlers = NULL, *orelse = NULL, *finally = NULL;
4289     excepthandler_ty last_handler;
4290 
4291     REQ(n, try_stmt);
4292 
4293     body = ast_for_suite(c, CHILD(n, 2));
4294     if (body == NULL)
4295         return NULL;
4296 
4297     if (TYPE(CHILD(n, nch - 3)) == NAME) {
4298         if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
4299             if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
4300                 /* we can assume it's an "else",
4301                    because nch >= 9 for try-else-finally and
4302                    it would otherwise have a type of except_clause */
4303                 orelse = ast_for_suite(c, CHILD(n, nch - 4));
4304                 if (orelse == NULL)
4305                     return NULL;
4306                 n_except--;
4307             }
4308 
4309             finally = ast_for_suite(c, CHILD(n, nch - 1));
4310             if (finally == NULL)
4311                 return NULL;
4312             n_except--;
4313         }
4314         else {
4315             /* we can assume it's an "else",
4316                otherwise it would have a type of except_clause */
4317             orelse = ast_for_suite(c, CHILD(n, nch - 1));
4318             if (orelse == NULL)
4319                 return NULL;
4320             n_except--;
4321         }
4322     }
4323     else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
4324         ast_error(c, n, "malformed 'try' statement");
4325         return NULL;
4326     }
4327 
4328     if (n_except > 0) {
4329         int i;
4330         /* process except statements to create a try ... except */
4331         handlers = _Py_asdl_seq_new(n_except, c->c_arena);
4332         if (handlers == NULL)
4333             return NULL;
4334 
4335         for (i = 0; i < n_except; i++) {
4336             excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
4337                                                        CHILD(n, 5 + i * 3));
4338             if (!e)
4339                 return NULL;
4340             asdl_seq_SET(handlers, i, e);
4341         }
4342     }
4343 
4344     assert(finally != NULL || asdl_seq_LEN(handlers));
4345         if (finally != NULL) {
4346         // finally is always last
4347         get_last_end_pos(finally, &end_lineno, &end_col_offset);
4348     } else if (orelse != NULL) {
4349         // otherwise else is last
4350         get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4351     } else {
4352         // inline the get_last_end_pos logic due to layout mismatch
4353         last_handler = (excepthandler_ty) asdl_seq_GET(handlers, n_except - 1);
4354         end_lineno = last_handler->end_lineno;
4355         end_col_offset = last_handler->end_col_offset;
4356     }
4357     return Try(body, handlers, orelse, finally, LINENO(n), n->n_col_offset,
4358                end_lineno, end_col_offset, c->c_arena);
4359 }
4360 
4361 /* with_item: test ['as' expr] */
4362 static withitem_ty
ast_for_with_item(struct compiling * c,const node * n)4363 ast_for_with_item(struct compiling *c, const node *n)
4364 {
4365     expr_ty context_expr, optional_vars = NULL;
4366 
4367     REQ(n, with_item);
4368     context_expr = ast_for_expr(c, CHILD(n, 0));
4369     if (!context_expr)
4370         return NULL;
4371     if (NCH(n) == 3) {
4372         optional_vars = ast_for_expr(c, CHILD(n, 2));
4373 
4374         if (!optional_vars) {
4375             return NULL;
4376         }
4377         if (!set_context(c, optional_vars, Store, n)) {
4378             return NULL;
4379         }
4380     }
4381 
4382     return withitem(context_expr, optional_vars, c->c_arena);
4383 }
4384 
4385 /* with_stmt: 'with' with_item (',' with_item)*  ':' [TYPE_COMMENT] suite */
4386 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n0,bool is_async)4387 ast_for_with_stmt(struct compiling *c, const node *n0, bool is_async)
4388 {
4389     const node * const n = is_async ? CHILD(n0, 1) : n0;
4390     int i, n_items, nch_minus_type, has_type_comment, end_lineno, end_col_offset;
4391     asdl_seq *items, *body;
4392     string type_comment;
4393 
4394     if (is_async && c->c_feature_version < 5) {
4395         ast_error(c, n,
4396                   "Async with statements are only supported in Python 3.5 and greater");
4397         return NULL;
4398     }
4399 
4400     REQ(n, with_stmt);
4401 
4402     has_type_comment = TYPE(CHILD(n, NCH(n) - 2)) == TYPE_COMMENT;
4403     nch_minus_type = NCH(n) - has_type_comment;
4404 
4405     n_items = (nch_minus_type - 2) / 2;
4406     items = _Py_asdl_seq_new(n_items, c->c_arena);
4407     if (!items)
4408         return NULL;
4409     for (i = 1; i < nch_minus_type - 2; i += 2) {
4410         withitem_ty item = ast_for_with_item(c, CHILD(n, i));
4411         if (!item)
4412             return NULL;
4413         asdl_seq_SET(items, (i - 1) / 2, item);
4414     }
4415 
4416     body = ast_for_suite(c, CHILD(n, NCH(n) - 1));
4417     if (!body)
4418         return NULL;
4419     get_last_end_pos(body, &end_lineno, &end_col_offset);
4420 
4421     if (has_type_comment) {
4422         type_comment = NEW_TYPE_COMMENT(CHILD(n, NCH(n) - 2));
4423         if (!type_comment)
4424             return NULL;
4425     }
4426     else
4427         type_comment = NULL;
4428 
4429     if (is_async)
4430         return AsyncWith(items, body, type_comment, LINENO(n0), n0->n_col_offset,
4431                          end_lineno, end_col_offset, c->c_arena);
4432     else
4433         return With(items, body, type_comment, LINENO(n), n->n_col_offset,
4434                     end_lineno, end_col_offset, c->c_arena);
4435 }
4436 
4437 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)4438 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
4439 {
4440     /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */
4441     PyObject *classname;
4442     asdl_seq *s;
4443     expr_ty call;
4444     int end_lineno, end_col_offset;
4445 
4446     REQ(n, classdef);
4447 
4448     if (NCH(n) == 4) { /* class NAME ':' suite */
4449         s = ast_for_suite(c, CHILD(n, 3));
4450         if (!s)
4451             return NULL;
4452         get_last_end_pos(s, &end_lineno, &end_col_offset);
4453 
4454         classname = NEW_IDENTIFIER(CHILD(n, 1));
4455         if (!classname)
4456             return NULL;
4457         if (forbidden_name(c, classname, CHILD(n, 3), 0))
4458             return NULL;
4459         return ClassDef(classname, NULL, NULL, s, decorator_seq,
4460                         LINENO(n), n->n_col_offset,
4461                         end_lineno, end_col_offset, c->c_arena);
4462     }
4463 
4464     if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */
4465         s = ast_for_suite(c, CHILD(n, 5));
4466         if (!s)
4467             return NULL;
4468         get_last_end_pos(s, &end_lineno, &end_col_offset);
4469 
4470         classname = NEW_IDENTIFIER(CHILD(n, 1));
4471         if (!classname)
4472             return NULL;
4473         if (forbidden_name(c, classname, CHILD(n, 3), 0))
4474             return NULL;
4475         return ClassDef(classname, NULL, NULL, s, decorator_seq,
4476                         LINENO(n), n->n_col_offset,
4477                         end_lineno, end_col_offset, c->c_arena);
4478     }
4479 
4480     /* class NAME '(' arglist ')' ':' suite */
4481     /* build up a fake Call node so we can extract its pieces */
4482     {
4483         PyObject *dummy_name;
4484         expr_ty dummy;
4485         dummy_name = NEW_IDENTIFIER(CHILD(n, 1));
4486         if (!dummy_name)
4487             return NULL;
4488         dummy = Name(dummy_name, Load, LINENO(n), n->n_col_offset,
4489                      CHILD(n, 1)->n_end_lineno, CHILD(n, 1)->n_end_col_offset,
4490                      c->c_arena);
4491         call = ast_for_call(c, CHILD(n, 3), dummy, NULL, CHILD(n, 4));
4492         if (!call)
4493             return NULL;
4494     }
4495     s = ast_for_suite(c, CHILD(n, 6));
4496     if (!s)
4497         return NULL;
4498     get_last_end_pos(s, &end_lineno, &end_col_offset);
4499 
4500     classname = NEW_IDENTIFIER(CHILD(n, 1));
4501     if (!classname)
4502         return NULL;
4503     if (forbidden_name(c, classname, CHILD(n, 1), 0))
4504         return NULL;
4505 
4506     return ClassDef(classname, call->v.Call.args, call->v.Call.keywords, s,
4507                     decorator_seq, LINENO(n), n->n_col_offset,
4508                     end_lineno, end_col_offset, c->c_arena);
4509 }
4510 
4511 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)4512 ast_for_stmt(struct compiling *c, const node *n)
4513 {
4514     if (TYPE(n) == stmt) {
4515         assert(NCH(n) == 1);
4516         n = CHILD(n, 0);
4517     }
4518     if (TYPE(n) == simple_stmt) {
4519         assert(num_stmts(n) == 1);
4520         n = CHILD(n, 0);
4521     }
4522     if (TYPE(n) == small_stmt) {
4523         n = CHILD(n, 0);
4524         /* small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt
4525                   | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
4526         */
4527         switch (TYPE(n)) {
4528             case expr_stmt:
4529                 return ast_for_expr_stmt(c, n);
4530             case del_stmt:
4531                 return ast_for_del_stmt(c, n);
4532             case pass_stmt:
4533                 return Pass(LINENO(n), n->n_col_offset,
4534                             n->n_end_lineno, n->n_end_col_offset, c->c_arena);
4535             case flow_stmt:
4536                 return ast_for_flow_stmt(c, n);
4537             case import_stmt:
4538                 return ast_for_import_stmt(c, n);
4539             case global_stmt:
4540                 return ast_for_global_stmt(c, n);
4541             case nonlocal_stmt:
4542                 return ast_for_nonlocal_stmt(c, n);
4543             case assert_stmt:
4544                 return ast_for_assert_stmt(c, n);
4545             default:
4546                 PyErr_Format(PyExc_SystemError,
4547                              "unhandled small_stmt: TYPE=%d NCH=%d\n",
4548                              TYPE(n), NCH(n));
4549                 return NULL;
4550         }
4551     }
4552     else {
4553         /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
4554                         | funcdef | classdef | decorated | async_stmt
4555         */
4556         node *ch = CHILD(n, 0);
4557         REQ(n, compound_stmt);
4558         switch (TYPE(ch)) {
4559             case if_stmt:
4560                 return ast_for_if_stmt(c, ch);
4561             case while_stmt:
4562                 return ast_for_while_stmt(c, ch);
4563             case for_stmt:
4564                 return ast_for_for_stmt(c, ch, 0);
4565             case try_stmt:
4566                 return ast_for_try_stmt(c, ch);
4567             case with_stmt:
4568                 return ast_for_with_stmt(c, ch, 0);
4569             case funcdef:
4570                 return ast_for_funcdef(c, ch, NULL);
4571             case classdef:
4572                 return ast_for_classdef(c, ch, NULL);
4573             case decorated:
4574                 return ast_for_decorated(c, ch);
4575             case async_stmt:
4576                 return ast_for_async_stmt(c, ch);
4577             default:
4578                 PyErr_Format(PyExc_SystemError,
4579                              "unhandled compound_stmt: TYPE=%d NCH=%d\n",
4580                              TYPE(n), NCH(n));
4581                 return NULL;
4582         }
4583     }
4584 }
4585 
4586 static PyObject *
parsenumber_raw(struct compiling * c,const char * s)4587 parsenumber_raw(struct compiling *c, const char *s)
4588 {
4589     const char *end;
4590     long x;
4591     double dx;
4592     Py_complex compl;
4593     int imflag;
4594 
4595     assert(s != NULL);
4596     errno = 0;
4597     end = s + strlen(s) - 1;
4598     imflag = *end == 'j' || *end == 'J';
4599     if (s[0] == '0') {
4600         x = (long) PyOS_strtoul(s, (char **)&end, 0);
4601         if (x < 0 && errno == 0) {
4602             return PyLong_FromString(s, (char **)0, 0);
4603         }
4604     }
4605     else
4606         x = PyOS_strtol(s, (char **)&end, 0);
4607     if (*end == '\0') {
4608         if (errno != 0)
4609             return PyLong_FromString(s, (char **)0, 0);
4610         return PyLong_FromLong(x);
4611     }
4612     /* XXX Huge floats may silently fail */
4613     if (imflag) {
4614         compl.real = 0.;
4615         compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
4616         if (compl.imag == -1.0 && PyErr_Occurred())
4617             return NULL;
4618         return PyComplex_FromCComplex(compl);
4619     }
4620     else
4621     {
4622         dx = PyOS_string_to_double(s, NULL, NULL);
4623         if (dx == -1.0 && PyErr_Occurred())
4624             return NULL;
4625         return PyFloat_FromDouble(dx);
4626     }
4627 }
4628 
4629 static PyObject *
parsenumber(struct compiling * c,const char * s)4630 parsenumber(struct compiling *c, const char *s)
4631 {
4632     char *dup, *end;
4633     PyObject *res = NULL;
4634 
4635     assert(s != NULL);
4636 
4637     if (strchr(s, '_') == NULL) {
4638         return parsenumber_raw(c, s);
4639     }
4640     /* Create a duplicate without underscores. */
4641     dup = PyMem_Malloc(strlen(s) + 1);
4642     if (dup == NULL) {
4643         return PyErr_NoMemory();
4644     }
4645     end = dup;
4646     for (; *s; s++) {
4647         if (*s != '_') {
4648             *end++ = *s;
4649         }
4650     }
4651     *end = '\0';
4652     res = parsenumber_raw(c, dup);
4653     PyMem_Free(dup);
4654     return res;
4655 }
4656 
4657 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end)4658 decode_utf8(struct compiling *c, const char **sPtr, const char *end)
4659 {
4660     const char *s, *t;
4661     t = s = *sPtr;
4662     /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
4663     while (s < end && (*s & 0x80)) s++;
4664     *sPtr = s;
4665     return PyUnicode_DecodeUTF8(t, s - t, NULL);
4666 }
4667 
4668 static int
warn_invalid_escape_sequence(struct compiling * c,const node * n,unsigned char first_invalid_escape_char)4669 warn_invalid_escape_sequence(struct compiling *c, const node *n,
4670                              unsigned char first_invalid_escape_char)
4671 {
4672     PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
4673                                          first_invalid_escape_char);
4674     if (msg == NULL) {
4675         return -1;
4676     }
4677     if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
4678                                    c->c_filename, LINENO(n),
4679                                    NULL, NULL) < 0)
4680     {
4681         if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
4682             /* Replace the DeprecationWarning exception with a SyntaxError
4683                to get a more accurate error report */
4684             PyErr_Clear();
4685             ast_error(c, n, "%U", msg);
4686         }
4687         Py_DECREF(msg);
4688         return -1;
4689     }
4690     Py_DECREF(msg);
4691     return 0;
4692 }
4693 
4694 static PyObject *
decode_unicode_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4695 decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
4696                             size_t len)
4697 {
4698     PyObject *v, *u;
4699     char *buf;
4700     char *p;
4701     const char *end;
4702 
4703     /* check for integer overflow */
4704     if (len > SIZE_MAX / 6)
4705         return NULL;
4706     /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
4707        "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
4708     u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
4709     if (u == NULL)
4710         return NULL;
4711     p = buf = PyBytes_AsString(u);
4712     end = s + len;
4713     while (s < end) {
4714         if (*s == '\\') {
4715             *p++ = *s++;
4716             if (s >= end || *s & 0x80) {
4717                 strcpy(p, "u005c");
4718                 p += 5;
4719                 if (s >= end)
4720                     break;
4721             }
4722         }
4723         if (*s & 0x80) { /* XXX inefficient */
4724             PyObject *w;
4725             int kind;
4726             void *data;
4727             Py_ssize_t len, i;
4728             w = decode_utf8(c, &s, end);
4729             if (w == NULL) {
4730                 Py_DECREF(u);
4731                 return NULL;
4732             }
4733             kind = PyUnicode_KIND(w);
4734             data = PyUnicode_DATA(w);
4735             len = PyUnicode_GET_LENGTH(w);
4736             for (i = 0; i < len; i++) {
4737                 Py_UCS4 chr = PyUnicode_READ(kind, data, i);
4738                 sprintf(p, "\\U%08x", chr);
4739                 p += 10;
4740             }
4741             /* Should be impossible to overflow */
4742             assert(p - buf <= PyBytes_GET_SIZE(u));
4743             Py_DECREF(w);
4744         } else {
4745             *p++ = *s++;
4746         }
4747     }
4748     len = p - buf;
4749     s = buf;
4750 
4751     const char *first_invalid_escape;
4752     v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
4753 
4754     if (v != NULL && first_invalid_escape != NULL) {
4755         if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4756             /* We have not decref u before because first_invalid_escape points
4757                inside u. */
4758             Py_XDECREF(u);
4759             Py_DECREF(v);
4760             return NULL;
4761         }
4762     }
4763     Py_XDECREF(u);
4764     return v;
4765 }
4766 
4767 static PyObject *
decode_bytes_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4768 decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
4769                           size_t len)
4770 {
4771     const char *first_invalid_escape;
4772     PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
4773                                              &first_invalid_escape);
4774     if (result == NULL)
4775         return NULL;
4776 
4777     if (first_invalid_escape != NULL) {
4778         if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4779             Py_DECREF(result);
4780             return NULL;
4781         }
4782     }
4783     return result;
4784 }
4785 
4786 /* Shift locations for the given node and all its children by adding `lineno`
4787    and `col_offset` to existing locations. */
fstring_shift_node_locations(node * n,int lineno,int col_offset)4788 static void fstring_shift_node_locations(node *n, int lineno, int col_offset)
4789 {
4790     n->n_col_offset = n->n_col_offset + col_offset;
4791     n->n_end_col_offset = n->n_end_col_offset + col_offset;
4792     for (int i = 0; i < NCH(n); ++i) {
4793         if (n->n_lineno && n->n_lineno < CHILD(n, i)->n_lineno) {
4794             /* Shifting column offsets unnecessary if there's been newlines. */
4795             col_offset = 0;
4796         }
4797         fstring_shift_node_locations(CHILD(n, i), lineno, col_offset);
4798     }
4799     n->n_lineno = n->n_lineno + lineno;
4800     n->n_end_lineno = n->n_end_lineno + lineno;
4801 }
4802 
4803 /* Fix locations for the given node and its children.
4804 
4805    `parent` is the enclosing node.
4806    `n` is the node which locations are going to be fixed relative to parent.
4807    `expr_str` is the child node's string representation, including braces.
4808 */
4809 static void
fstring_fix_node_location(const node * parent,node * n,char * expr_str)4810 fstring_fix_node_location(const node *parent, node *n, char *expr_str)
4811 {
4812     char *substr = NULL;
4813     char *start;
4814     int lines = LINENO(parent) - 1;
4815     int cols = parent->n_col_offset;
4816     /* Find the full fstring to fix location information in `n`. */
4817     while (parent && parent->n_type != STRING)
4818         parent = parent->n_child;
4819     if (parent && parent->n_str) {
4820         substr = strstr(parent->n_str, expr_str);
4821         if (substr) {
4822             start = substr;
4823             while (start > parent->n_str) {
4824                 if (start[0] == '\n')
4825                     break;
4826                 start--;
4827             }
4828             cols += (int)(substr - start);
4829             /* adjust the start based on the number of newlines encountered
4830                before the f-string expression */
4831             for (char* p = parent->n_str; p < substr; p++) {
4832                 if (*p == '\n') {
4833                     lines++;
4834                 }
4835             }
4836         }
4837     }
4838     fstring_shift_node_locations(n, lines, cols);
4839 }
4840 
4841 /* Compile this expression in to an expr_ty.  Add parens around the
4842    expression, in order to allow leading spaces in the expression. */
4843 static expr_ty
fstring_compile_expr(const char * expr_start,const char * expr_end,struct compiling * c,const node * n)4844 fstring_compile_expr(const char *expr_start, const char *expr_end,
4845                      struct compiling *c, const node *n)
4846 
4847 {
4848     node *mod_n;
4849     mod_ty mod;
4850     char *str;
4851     Py_ssize_t len;
4852     const char *s;
4853 
4854     assert(expr_end >= expr_start);
4855     assert(*(expr_start-1) == '{');
4856     assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':' ||
4857            *expr_end == '=');
4858 
4859     /* If the substring is all whitespace, it's an error.  We need to catch this
4860        here, and not when we call PyParser_SimpleParseStringFlagsFilename,
4861        because turning the expression '' in to '()' would go from being invalid
4862        to valid. */
4863     for (s = expr_start; s != expr_end; s++) {
4864         char c = *s;
4865         /* The Python parser ignores only the following whitespace
4866            characters (\r already is converted to \n). */
4867         if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) {
4868             break;
4869         }
4870     }
4871     if (s == expr_end) {
4872         ast_error(c, n, "f-string: empty expression not allowed");
4873         return NULL;
4874     }
4875 
4876     len = expr_end - expr_start;
4877     /* Allocate 3 extra bytes: open paren, close paren, null byte. */
4878     str = PyMem_RawMalloc(len + 3);
4879     if (str == NULL) {
4880         PyErr_NoMemory();
4881         return NULL;
4882     }
4883 
4884     str[0] = '(';
4885     memcpy(str+1, expr_start, len);
4886     str[len+1] = ')';
4887     str[len+2] = 0;
4888 
4889     PyCompilerFlags cf = _PyCompilerFlags_INIT;
4890     cf.cf_flags = PyCF_ONLY_AST;
4891     mod_n = PyParser_SimpleParseStringFlagsFilename(str, "<fstring>",
4892                                                     Py_eval_input, 0);
4893     if (!mod_n) {
4894         PyMem_RawFree(str);
4895         return NULL;
4896     }
4897     /* Reuse str to find the correct column offset. */
4898     str[0] = '{';
4899     str[len+1] = '}';
4900     fstring_fix_node_location(n, mod_n, str);
4901     mod = PyAST_FromNode(mod_n, &cf, "<fstring>", c->c_arena);
4902     PyMem_RawFree(str);
4903     PyNode_Free(mod_n);
4904     if (!mod)
4905         return NULL;
4906     return mod->v.Expression.body;
4907 }
4908 
4909 /* Return -1 on error.
4910 
4911    Return 0 if we reached the end of the literal.
4912 
4913    Return 1 if we haven't reached the end of the literal, but we want
4914    the caller to process the literal up to this point. Used for
4915    doubled braces.
4916 */
4917 static int
fstring_find_literal(const char ** str,const char * end,int raw,PyObject ** literal,int recurse_lvl,struct compiling * c,const node * n)4918 fstring_find_literal(const char **str, const char *end, int raw,
4919                      PyObject **literal, int recurse_lvl,
4920                      struct compiling *c, const node *n)
4921 {
4922     /* Get any literal string. It ends when we hit an un-doubled left
4923        brace (which isn't part of a unicode name escape such as
4924        "\N{EULER CONSTANT}"), or the end of the string. */
4925 
4926     const char *s = *str;
4927     const char *literal_start = s;
4928     int result = 0;
4929 
4930     assert(*literal == NULL);
4931     while (s < end) {
4932         char ch = *s++;
4933         if (!raw && ch == '\\' && s < end) {
4934             ch = *s++;
4935             if (ch == 'N') {
4936                 if (s < end && *s++ == '{') {
4937                     while (s < end && *s++ != '}') {
4938                     }
4939                     continue;
4940                 }
4941                 break;
4942             }
4943             if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
4944                 return -1;
4945             }
4946         }
4947         if (ch == '{' || ch == '}') {
4948             /* Check for doubled braces, but only at the top level. If
4949                we checked at every level, then f'{0:{3}}' would fail
4950                with the two closing braces. */
4951             if (recurse_lvl == 0) {
4952                 if (s < end && *s == ch) {
4953                     /* We're going to tell the caller that the literal ends
4954                        here, but that they should continue scanning. But also
4955                        skip over the second brace when we resume scanning. */
4956                     *str = s + 1;
4957                     result = 1;
4958                     goto done;
4959                 }
4960 
4961                 /* Where a single '{' is the start of a new expression, a
4962                    single '}' is not allowed. */
4963                 if (ch == '}') {
4964                     *str = s - 1;
4965                     ast_error(c, n, "f-string: single '}' is not allowed");
4966                     return -1;
4967                 }
4968             }
4969             /* We're either at a '{', which means we're starting another
4970                expression; or a '}', which means we're at the end of this
4971                f-string (for a nested format_spec). */
4972             s--;
4973             break;
4974         }
4975     }
4976     *str = s;
4977     assert(s <= end);
4978     assert(s == end || *s == '{' || *s == '}');
4979 done:
4980     if (literal_start != s) {
4981         if (raw)
4982             *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
4983                                                     s - literal_start,
4984                                                     NULL, NULL);
4985         else
4986             *literal = decode_unicode_with_escapes(c, n, literal_start,
4987                                                    s - literal_start);
4988         if (!*literal)
4989             return -1;
4990     }
4991     return result;
4992 }
4993 
4994 /* Forward declaration because parsing is recursive. */
4995 static expr_ty
4996 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
4997               struct compiling *c, const node *n);
4998 
4999 /* Parse the f-string at *str, ending at end.  We know *str starts an
5000    expression (so it must be a '{'). Returns the FormattedValue node, which
5001    includes the expression, conversion character, format_spec expression, and
5002    optionally the text of the expression (if = is used).
5003 
5004    Note that I don't do a perfect job here: I don't make sure that a
5005    closing brace doesn't match an opening paren, for example. It
5006    doesn't need to error on all invalid expressions, just correctly
5007    find the end of all valid ones. Any errors inside the expression
5008    will be caught when we parse it later.
5009 
5010    *expression is set to the expression.  For an '=' "debug" expression,
5011    *expr_text is set to the debug text (the original text of the expression,
5012    including the '=' and any whitespace around it, as a string object).  If
5013    not a debug expression, *expr_text set to NULL. */
5014 static int
fstring_find_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** expr_text,expr_ty * expression,struct compiling * c,const node * n)5015 fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
5016                   PyObject **expr_text, expr_ty *expression,
5017                   struct compiling *c, const node *n)
5018 {
5019     /* Return -1 on error, else 0. */
5020 
5021     const char *expr_start;
5022     const char *expr_end;
5023     expr_ty simple_expression;
5024     expr_ty format_spec = NULL; /* Optional format specifier. */
5025     int conversion = -1; /* The conversion char.  Use default if not
5026                             specified, or !r if using = and no format
5027                             spec. */
5028 
5029     /* 0 if we're not in a string, else the quote char we're trying to
5030        match (single or double quote). */
5031     char quote_char = 0;
5032 
5033     /* If we're inside a string, 1=normal, 3=triple-quoted. */
5034     int string_type = 0;
5035 
5036     /* Keep track of nesting level for braces/parens/brackets in
5037        expressions. */
5038     Py_ssize_t nested_depth = 0;
5039     char parenstack[MAXLEVEL];
5040 
5041     *expr_text = NULL;
5042 
5043     /* Can only nest one level deep. */
5044     if (recurse_lvl >= 2) {
5045         ast_error(c, n, "f-string: expressions nested too deeply");
5046         goto error;
5047     }
5048 
5049     /* The first char must be a left brace, or we wouldn't have gotten
5050        here. Skip over it. */
5051     assert(**str == '{');
5052     *str += 1;
5053 
5054     expr_start = *str;
5055     for (; *str < end; (*str)++) {
5056         char ch;
5057 
5058         /* Loop invariants. */
5059         assert(nested_depth >= 0);
5060         assert(*str >= expr_start && *str < end);
5061         if (quote_char)
5062             assert(string_type == 1 || string_type == 3);
5063         else
5064             assert(string_type == 0);
5065 
5066         ch = **str;
5067         /* Nowhere inside an expression is a backslash allowed. */
5068         if (ch == '\\') {
5069             /* Error: can't include a backslash character, inside
5070                parens or strings or not. */
5071             ast_error(c, n,
5072                       "f-string expression part "
5073                       "cannot include a backslash");
5074             goto error;
5075         }
5076         if (quote_char) {
5077             /* We're inside a string. See if we're at the end. */
5078             /* This code needs to implement the same non-error logic
5079                as tok_get from tokenizer.c, at the letter_quote
5080                label. To actually share that code would be a
5081                nightmare. But, it's unlikely to change and is small,
5082                so duplicate it here. Note we don't need to catch all
5083                of the errors, since they'll be caught when parsing the
5084                expression. We just need to match the non-error
5085                cases. Thus we can ignore \n in single-quoted strings,
5086                for example. Or non-terminated strings. */
5087             if (ch == quote_char) {
5088                 /* Does this match the string_type (single or triple
5089                    quoted)? */
5090                 if (string_type == 3) {
5091                     if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
5092                         /* We're at the end of a triple quoted string. */
5093                         *str += 2;
5094                         string_type = 0;
5095                         quote_char = 0;
5096                         continue;
5097                     }
5098                 } else {
5099                     /* We're at the end of a normal string. */
5100                     quote_char = 0;
5101                     string_type = 0;
5102                     continue;
5103                 }
5104             }
5105         } else if (ch == '\'' || ch == '"') {
5106             /* Is this a triple quoted string? */
5107             if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
5108                 string_type = 3;
5109                 *str += 2;
5110             } else {
5111                 /* Start of a normal string. */
5112                 string_type = 1;
5113             }
5114             /* Start looking for the end of the string. */
5115             quote_char = ch;
5116         } else if (ch == '[' || ch == '{' || ch == '(') {
5117             if (nested_depth >= MAXLEVEL) {
5118                 ast_error(c, n, "f-string: too many nested parenthesis");
5119                 goto error;
5120             }
5121             parenstack[nested_depth] = ch;
5122             nested_depth++;
5123         } else if (ch == '#') {
5124             /* Error: can't include a comment character, inside parens
5125                or not. */
5126             ast_error(c, n, "f-string expression part cannot include '#'");
5127             goto error;
5128         } else if (nested_depth == 0 &&
5129                    (ch == '!' || ch == ':' || ch == '}' ||
5130                     ch == '=' || ch == '>' || ch == '<')) {
5131             /* See if there's a next character. */
5132             if (*str+1 < end) {
5133                 char next = *(*str+1);
5134 
5135                 /* For "!=". since '=' is not an allowed conversion character,
5136                    nothing is lost in this test. */
5137                 if ((ch == '!' && next == '=') ||   /* != */
5138                     (ch == '=' && next == '=') ||   /* == */
5139                     (ch == '<' && next == '=') ||   /* <= */
5140                     (ch == '>' && next == '=')      /* >= */
5141                     ) {
5142                     *str += 1;
5143                     continue;
5144                 }
5145                 /* Don't get out of the loop for these, if they're single
5146                    chars (not part of 2-char tokens). If by themselves, they
5147                    don't end an expression (unlike say '!'). */
5148                 if (ch == '>' || ch == '<') {
5149                     continue;
5150                 }
5151             }
5152 
5153             /* Normal way out of this loop. */
5154             break;
5155         } else if (ch == ']' || ch == '}' || ch == ')') {
5156             if (!nested_depth) {
5157                 ast_error(c, n, "f-string: unmatched '%c'", ch);
5158                 goto error;
5159             }
5160             nested_depth--;
5161             int opening = parenstack[nested_depth];
5162             if (!((opening == '(' && ch == ')') ||
5163                   (opening == '[' && ch == ']') ||
5164                   (opening == '{' && ch == '}')))
5165             {
5166                 ast_error(c, n,
5167                           "f-string: closing parenthesis '%c' "
5168                           "does not match opening parenthesis '%c'",
5169                           ch, opening);
5170                 goto error;
5171             }
5172         } else {
5173             /* Just consume this char and loop around. */
5174         }
5175     }
5176     expr_end = *str;
5177     /* If we leave this loop in a string or with mismatched parens, we
5178        don't care. We'll get a syntax error when compiling the
5179        expression. But, we can produce a better error message, so
5180        let's just do that.*/
5181     if (quote_char) {
5182         ast_error(c, n, "f-string: unterminated string");
5183         goto error;
5184     }
5185     if (nested_depth) {
5186         int opening = parenstack[nested_depth - 1];
5187         ast_error(c, n, "f-string: unmatched '%c'", opening);
5188         goto error;
5189     }
5190 
5191     if (*str >= end)
5192         goto unexpected_end_of_string;
5193 
5194     /* Compile the expression as soon as possible, so we show errors
5195        related to the expression before errors related to the
5196        conversion or format_spec. */
5197     simple_expression = fstring_compile_expr(expr_start, expr_end, c, n);
5198     if (!simple_expression)
5199         goto error;
5200 
5201     /* Check for =, which puts the text value of the expression in
5202        expr_text. */
5203     if (**str == '=') {
5204         *str += 1;
5205 
5206         /* Skip over ASCII whitespace.  No need to test for end of string
5207            here, since we know there's at least a trailing quote somewhere
5208            ahead. */
5209         while (Py_ISSPACE(**str)) {
5210             *str += 1;
5211         }
5212 
5213         /* Set *expr_text to the text of the expression. */
5214         *expr_text = PyUnicode_FromStringAndSize(expr_start, *str-expr_start);
5215         if (!*expr_text) {
5216             goto error;
5217         }
5218     }
5219 
5220     /* Check for a conversion char, if present. */
5221     if (**str == '!') {
5222         *str += 1;
5223         if (*str >= end)
5224             goto unexpected_end_of_string;
5225 
5226         conversion = **str;
5227         *str += 1;
5228 
5229         /* Validate the conversion. */
5230         if (!(conversion == 's' || conversion == 'r' || conversion == 'a')) {
5231             ast_error(c, n,
5232                       "f-string: invalid conversion character: "
5233                       "expected 's', 'r', or 'a'");
5234             goto error;
5235         }
5236 
5237     }
5238 
5239     /* Check for the format spec, if present. */
5240     if (*str >= end)
5241         goto unexpected_end_of_string;
5242     if (**str == ':') {
5243         *str += 1;
5244         if (*str >= end)
5245             goto unexpected_end_of_string;
5246 
5247         /* Parse the format spec. */
5248         format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n);
5249         if (!format_spec)
5250             goto error;
5251     }
5252 
5253     if (*str >= end || **str != '}')
5254         goto unexpected_end_of_string;
5255 
5256     /* We're at a right brace. Consume it. */
5257     assert(*str < end);
5258     assert(**str == '}');
5259     *str += 1;
5260 
5261     /* If we're in = mode (detected by non-NULL expr_text), and have no format
5262        spec and no explict conversion, set the conversion to 'r'. */
5263     if (*expr_text && format_spec == NULL && conversion == -1) {
5264         conversion = 'r';
5265     }
5266 
5267     /* And now create the FormattedValue node that represents this
5268        entire expression with the conversion and format spec. */
5269     *expression = FormattedValue(simple_expression, conversion,
5270                                  format_spec, LINENO(n),
5271                                  n->n_col_offset, n->n_end_lineno,
5272                                  n->n_end_col_offset, c->c_arena);
5273     if (!*expression)
5274         goto error;
5275 
5276     return 0;
5277 
5278 unexpected_end_of_string:
5279     ast_error(c, n, "f-string: expecting '}'");
5280     /* Falls through to error. */
5281 
5282 error:
5283     Py_XDECREF(*expr_text);
5284     return -1;
5285 
5286 }
5287 
5288 /* Return -1 on error.
5289 
5290    Return 0 if we have a literal (possible zero length) and an
5291    expression (zero length if at the end of the string.
5292 
5293    Return 1 if we have a literal, but no expression, and we want the
5294    caller to call us again. This is used to deal with doubled
5295    braces.
5296 
5297    When called multiple times on the string 'a{{b{0}c', this function
5298    will return:
5299 
5300    1. the literal 'a{' with no expression, and a return value
5301       of 1. Despite the fact that there's no expression, the return
5302       value of 1 means we're not finished yet.
5303 
5304    2. the literal 'b' and the expression '0', with a return value of
5305       0. The fact that there's an expression means we're not finished.
5306 
5307    3. literal 'c' with no expression and a return value of 0. The
5308       combination of the return value of 0 with no expression means
5309       we're finished.
5310 */
5311 static int
fstring_find_literal_and_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** literal,PyObject ** expr_text,expr_ty * expression,struct compiling * c,const node * n)5312 fstring_find_literal_and_expr(const char **str, const char *end, int raw,
5313                               int recurse_lvl, PyObject **literal,
5314                               PyObject **expr_text, expr_ty *expression,
5315                               struct compiling *c, const node *n)
5316 {
5317     int result;
5318 
5319     assert(*literal == NULL && *expression == NULL);
5320 
5321     /* Get any literal string. */
5322     result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n);
5323     if (result < 0)
5324         goto error;
5325 
5326     assert(result == 0 || result == 1);
5327 
5328     if (result == 1)
5329         /* We have a literal, but don't look at the expression. */
5330         return 1;
5331 
5332     if (*str >= end || **str == '}')
5333         /* We're at the end of the string or the end of a nested
5334            f-string: no expression. The top-level error case where we
5335            expect to be at the end of the string but we're at a '}' is
5336            handled later. */
5337         return 0;
5338 
5339     /* We must now be the start of an expression, on a '{'. */
5340     assert(**str == '{');
5341 
5342     if (fstring_find_expr(str, end, raw, recurse_lvl, expr_text,
5343                           expression, c, n) < 0)
5344         goto error;
5345 
5346     return 0;
5347 
5348 error:
5349     Py_CLEAR(*literal);
5350     return -1;
5351 }
5352 
5353 #define EXPRLIST_N_CACHED  64
5354 
5355 typedef struct {
5356     /* Incrementally build an array of expr_ty, so be used in an
5357        asdl_seq. Cache some small but reasonably sized number of
5358        expr_ty's, and then after that start dynamically allocating,
5359        doubling the number allocated each time. Note that the f-string
5360        f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
5361        Constant for the literal 'a'. So you add expr_ty's about twice as
5362        fast as you add expressions in an f-string. */
5363 
5364     Py_ssize_t allocated;  /* Number we've allocated. */
5365     Py_ssize_t size;       /* Number we've used. */
5366     expr_ty    *p;         /* Pointer to the memory we're actually
5367                               using. Will point to 'data' until we
5368                               start dynamically allocating. */
5369     expr_ty    data[EXPRLIST_N_CACHED];
5370 } ExprList;
5371 
5372 #ifdef NDEBUG
5373 #define ExprList_check_invariants(l)
5374 #else
5375 static void
ExprList_check_invariants(ExprList * l)5376 ExprList_check_invariants(ExprList *l)
5377 {
5378     /* Check our invariants. Make sure this object is "live", and
5379        hasn't been deallocated. */
5380     assert(l->size >= 0);
5381     assert(l->p != NULL);
5382     if (l->size <= EXPRLIST_N_CACHED)
5383         assert(l->data == l->p);
5384 }
5385 #endif
5386 
5387 static void
ExprList_Init(ExprList * l)5388 ExprList_Init(ExprList *l)
5389 {
5390     l->allocated = EXPRLIST_N_CACHED;
5391     l->size = 0;
5392 
5393     /* Until we start allocating dynamically, p points to data. */
5394     l->p = l->data;
5395 
5396     ExprList_check_invariants(l);
5397 }
5398 
5399 static int
ExprList_Append(ExprList * l,expr_ty exp)5400 ExprList_Append(ExprList *l, expr_ty exp)
5401 {
5402     ExprList_check_invariants(l);
5403     if (l->size >= l->allocated) {
5404         /* We need to alloc (or realloc) the memory. */
5405         Py_ssize_t new_size = l->allocated * 2;
5406 
5407         /* See if we've ever allocated anything dynamically. */
5408         if (l->p == l->data) {
5409             Py_ssize_t i;
5410             /* We're still using the cached data. Switch to
5411                alloc-ing. */
5412             l->p = PyMem_RawMalloc(sizeof(expr_ty) * new_size);
5413             if (!l->p)
5414                 return -1;
5415             /* Copy the cached data into the new buffer. */
5416             for (i = 0; i < l->size; i++)
5417                 l->p[i] = l->data[i];
5418         } else {
5419             /* Just realloc. */
5420             expr_ty *tmp = PyMem_RawRealloc(l->p, sizeof(expr_ty) * new_size);
5421             if (!tmp) {
5422                 PyMem_RawFree(l->p);
5423                 l->p = NULL;
5424                 return -1;
5425             }
5426             l->p = tmp;
5427         }
5428 
5429         l->allocated = new_size;
5430         assert(l->allocated == 2 * l->size);
5431     }
5432 
5433     l->p[l->size++] = exp;
5434 
5435     ExprList_check_invariants(l);
5436     return 0;
5437 }
5438 
5439 static void
ExprList_Dealloc(ExprList * l)5440 ExprList_Dealloc(ExprList *l)
5441 {
5442     ExprList_check_invariants(l);
5443 
5444     /* If there's been an error, or we've never dynamically allocated,
5445        do nothing. */
5446     if (!l->p || l->p == l->data) {
5447         /* Do nothing. */
5448     } else {
5449         /* We have dynamically allocated. Free the memory. */
5450         PyMem_RawFree(l->p);
5451     }
5452     l->p = NULL;
5453     l->size = -1;
5454 }
5455 
5456 static asdl_seq *
ExprList_Finish(ExprList * l,PyArena * arena)5457 ExprList_Finish(ExprList *l, PyArena *arena)
5458 {
5459     asdl_seq *seq;
5460 
5461     ExprList_check_invariants(l);
5462 
5463     /* Allocate the asdl_seq and copy the expressions in to it. */
5464     seq = _Py_asdl_seq_new(l->size, arena);
5465     if (seq) {
5466         Py_ssize_t i;
5467         for (i = 0; i < l->size; i++)
5468             asdl_seq_SET(seq, i, l->p[i]);
5469     }
5470     ExprList_Dealloc(l);
5471     return seq;
5472 }
5473 
5474 /* The FstringParser is designed to add a mix of strings and
5475    f-strings, and concat them together as needed. Ultimately, it
5476    generates an expr_ty. */
5477 typedef struct {
5478     PyObject *last_str;
5479     ExprList expr_list;
5480     int fmode;
5481 } FstringParser;
5482 
5483 #ifdef NDEBUG
5484 #define FstringParser_check_invariants(state)
5485 #else
5486 static void
FstringParser_check_invariants(FstringParser * state)5487 FstringParser_check_invariants(FstringParser *state)
5488 {
5489     if (state->last_str)
5490         assert(PyUnicode_CheckExact(state->last_str));
5491     ExprList_check_invariants(&state->expr_list);
5492 }
5493 #endif
5494 
5495 static void
FstringParser_Init(FstringParser * state)5496 FstringParser_Init(FstringParser *state)
5497 {
5498     state->last_str = NULL;
5499     state->fmode = 0;
5500     ExprList_Init(&state->expr_list);
5501     FstringParser_check_invariants(state);
5502 }
5503 
5504 static void
FstringParser_Dealloc(FstringParser * state)5505 FstringParser_Dealloc(FstringParser *state)
5506 {
5507     FstringParser_check_invariants(state);
5508 
5509     Py_XDECREF(state->last_str);
5510     ExprList_Dealloc(&state->expr_list);
5511 }
5512 
5513 /* Constants for the following */
5514 static PyObject *u_kind;
5515 
5516 /* Compute 'kind' field for string Constant (either 'u' or None) */
5517 static PyObject *
make_kind(struct compiling * c,const node * n)5518 make_kind(struct compiling *c, const node *n)
5519 {
5520     char *s = NULL;
5521     PyObject *kind = NULL;
5522 
5523     /* Find the first string literal, if any */
5524     while (TYPE(n) != STRING) {
5525         if (NCH(n) == 0)
5526             return NULL;
5527         n = CHILD(n, 0);
5528     }
5529     REQ(n, STRING);
5530 
5531     /* If it starts with 'u', return a PyUnicode "u" string */
5532     s = STR(n);
5533     if (s && *s == 'u') {
5534         if (!u_kind) {
5535             u_kind = PyUnicode_InternFromString("u");
5536             if (!u_kind)
5537                 return NULL;
5538         }
5539         kind = u_kind;
5540         if (PyArena_AddPyObject(c->c_arena, kind) < 0) {
5541             return NULL;
5542         }
5543         Py_INCREF(kind);
5544     }
5545     return kind;
5546 }
5547 
5548 /* Make a Constant node, but decref the PyUnicode object being added. */
5549 static expr_ty
make_str_node_and_del(PyObject ** str,struct compiling * c,const node * n)5550 make_str_node_and_del(PyObject **str, struct compiling *c, const node* n)
5551 {
5552     PyObject *s = *str;
5553     PyObject *kind = NULL;
5554     *str = NULL;
5555     assert(PyUnicode_CheckExact(s));
5556     if (PyArena_AddPyObject(c->c_arena, s) < 0) {
5557         Py_DECREF(s);
5558         return NULL;
5559     }
5560     kind = make_kind(c, n);
5561     if (kind == NULL && PyErr_Occurred())
5562         return NULL;
5563     return Constant(s, kind, LINENO(n), n->n_col_offset,
5564                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5565 }
5566 
5567 /* Add a non-f-string (that is, a regular literal string). str is
5568    decref'd. */
5569 static int
FstringParser_ConcatAndDel(FstringParser * state,PyObject * str)5570 FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
5571 {
5572     FstringParser_check_invariants(state);
5573 
5574     assert(PyUnicode_CheckExact(str));
5575 
5576     if (PyUnicode_GET_LENGTH(str) == 0) {
5577         Py_DECREF(str);
5578         return 0;
5579     }
5580 
5581     if (!state->last_str) {
5582         /* We didn't have a string before, so just remember this one. */
5583         state->last_str = str;
5584     } else {
5585         /* Concatenate this with the previous string. */
5586         PyUnicode_AppendAndDel(&state->last_str, str);
5587         if (!state->last_str)
5588             return -1;
5589     }
5590     FstringParser_check_invariants(state);
5591     return 0;
5592 }
5593 
5594 /* Parse an f-string. The f-string is in *str to end, with no
5595    'f' or quotes. */
5596 static int
FstringParser_ConcatFstring(FstringParser * state,const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5597 FstringParser_ConcatFstring(FstringParser *state, const char **str,
5598                             const char *end, int raw, int recurse_lvl,
5599                             struct compiling *c, const node *n)
5600 {
5601     FstringParser_check_invariants(state);
5602     state->fmode = 1;
5603 
5604     /* Parse the f-string. */
5605     while (1) {
5606         PyObject *literal = NULL;
5607         PyObject *expr_text = NULL;
5608         expr_ty expression = NULL;
5609 
5610         /* If there's a zero length literal in front of the
5611            expression, literal will be NULL. If we're at the end of
5612            the f-string, expression will be NULL (unless result == 1,
5613            see below). */
5614         int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
5615                                                    &literal, &expr_text,
5616                                                    &expression, c, n);
5617         if (result < 0)
5618             return -1;
5619 
5620         /* Add the literal, if any. */
5621         if (literal && FstringParser_ConcatAndDel(state, literal) < 0) {
5622             Py_XDECREF(expr_text);
5623             return -1;
5624         }
5625         /* Add the expr_text, if any. */
5626         if (expr_text && FstringParser_ConcatAndDel(state, expr_text) < 0) {
5627             return -1;
5628         }
5629 
5630         /* We've dealt with the literal and expr_text, their ownership has
5631            been transferred to the state object.  Don't look at them again. */
5632 
5633         /* See if we should just loop around to get the next literal
5634            and expression, while ignoring the expression this
5635            time. This is used for un-doubling braces, as an
5636            optimization. */
5637         if (result == 1)
5638             continue;
5639 
5640         if (!expression)
5641             /* We're done with this f-string. */
5642             break;
5643 
5644         /* We know we have an expression. Convert any existing string
5645            to a Constant node. */
5646         if (!state->last_str) {
5647             /* Do nothing. No previous literal. */
5648         } else {
5649             /* Convert the existing last_str literal to a Constant node. */
5650             expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5651             if (!str || ExprList_Append(&state->expr_list, str) < 0)
5652                 return -1;
5653         }
5654 
5655         if (ExprList_Append(&state->expr_list, expression) < 0)
5656             return -1;
5657     }
5658 
5659     /* If recurse_lvl is zero, then we must be at the end of the
5660        string. Otherwise, we must be at a right brace. */
5661 
5662     if (recurse_lvl == 0 && *str < end-1) {
5663         ast_error(c, n, "f-string: unexpected end of string");
5664         return -1;
5665     }
5666     if (recurse_lvl != 0 && **str != '}') {
5667         ast_error(c, n, "f-string: expecting '}'");
5668         return -1;
5669     }
5670 
5671     FstringParser_check_invariants(state);
5672     return 0;
5673 }
5674 
5675 /* Convert the partial state reflected in last_str and expr_list to an
5676    expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
5677 static expr_ty
FstringParser_Finish(FstringParser * state,struct compiling * c,const node * n)5678 FstringParser_Finish(FstringParser *state, struct compiling *c,
5679                      const node *n)
5680 {
5681     asdl_seq *seq;
5682 
5683     FstringParser_check_invariants(state);
5684 
5685     /* If we're just a constant string with no expressions, return
5686        that. */
5687     if (!state->fmode) {
5688         assert(!state->expr_list.size);
5689         if (!state->last_str) {
5690             /* Create a zero length string. */
5691             state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
5692             if (!state->last_str)
5693                 goto error;
5694         }
5695         return make_str_node_and_del(&state->last_str, c, n);
5696     }
5697 
5698     /* Create a Constant node out of last_str, if needed. It will be the
5699        last node in our expression list. */
5700     if (state->last_str) {
5701         expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5702         if (!str || ExprList_Append(&state->expr_list, str) < 0)
5703             goto error;
5704     }
5705     /* This has already been freed. */
5706     assert(state->last_str == NULL);
5707 
5708     seq = ExprList_Finish(&state->expr_list, c->c_arena);
5709     if (!seq)
5710         goto error;
5711 
5712     return JoinedStr(seq, LINENO(n), n->n_col_offset,
5713                      n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5714 
5715 error:
5716     FstringParser_Dealloc(state);
5717     return NULL;
5718 }
5719 
5720 /* Given an f-string (with no 'f' or quotes) that's in *str and ends
5721    at end, parse it into an expr_ty.  Return NULL on error.  Adjust
5722    str to point past the parsed portion. */
5723 static expr_ty
fstring_parse(const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5724 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
5725               struct compiling *c, const node *n)
5726 {
5727     FstringParser state;
5728 
5729     FstringParser_Init(&state);
5730     if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl,
5731                                     c, n) < 0) {
5732         FstringParser_Dealloc(&state);
5733         return NULL;
5734     }
5735 
5736     return FstringParser_Finish(&state, c, n);
5737 }
5738 
5739 /* n is a Python string literal, including the bracketing quote
5740    characters, and r, b, u, &/or f prefixes (if any), and embedded
5741    escape sequences (if any). parsestr parses it, and sets *result to
5742    decoded Python string object.  If the string is an f-string, set
5743    *fstr and *fstrlen to the unparsed string object.  Return 0 if no
5744    errors occurred.
5745 */
5746 static int
parsestr(struct compiling * c,const node * n,int * bytesmode,int * rawmode,PyObject ** result,const char ** fstr,Py_ssize_t * fstrlen)5747 parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
5748          PyObject **result, const char **fstr, Py_ssize_t *fstrlen)
5749 {
5750     size_t len;
5751     const char *s = STR(n);
5752     int quote = Py_CHARMASK(*s);
5753     int fmode = 0;
5754     *bytesmode = 0;
5755     *rawmode = 0;
5756     *result = NULL;
5757     *fstr = NULL;
5758     if (Py_ISALPHA(quote)) {
5759         while (!*bytesmode || !*rawmode) {
5760             if (quote == 'b' || quote == 'B') {
5761                 quote = *++s;
5762                 *bytesmode = 1;
5763             }
5764             else if (quote == 'u' || quote == 'U') {
5765                 quote = *++s;
5766             }
5767             else if (quote == 'r' || quote == 'R') {
5768                 quote = *++s;
5769                 *rawmode = 1;
5770             }
5771             else if (quote == 'f' || quote == 'F') {
5772                 quote = *++s;
5773                 fmode = 1;
5774             }
5775             else {
5776                 break;
5777             }
5778         }
5779     }
5780 
5781     /* fstrings are only allowed in Python 3.6 and greater */
5782     if (fmode && c->c_feature_version < 6) {
5783         ast_error(c, n, "Format strings are only supported in Python 3.6 and greater");
5784         return -1;
5785     }
5786 
5787     if (fmode && *bytesmode) {
5788         PyErr_BadInternalCall();
5789         return -1;
5790     }
5791     if (quote != '\'' && quote != '\"') {
5792         PyErr_BadInternalCall();
5793         return -1;
5794     }
5795     /* Skip the leading quote char. */
5796     s++;
5797     len = strlen(s);
5798     if (len > INT_MAX) {
5799         PyErr_SetString(PyExc_OverflowError,
5800                         "string to parse is too long");
5801         return -1;
5802     }
5803     if (s[--len] != quote) {
5804         /* Last quote char must match the first. */
5805         PyErr_BadInternalCall();
5806         return -1;
5807     }
5808     if (len >= 4 && s[0] == quote && s[1] == quote) {
5809         /* A triple quoted string. We've already skipped one quote at
5810            the start and one at the end of the string. Now skip the
5811            two at the start. */
5812         s += 2;
5813         len -= 2;
5814         /* And check that the last two match. */
5815         if (s[--len] != quote || s[--len] != quote) {
5816             PyErr_BadInternalCall();
5817             return -1;
5818         }
5819     }
5820 
5821     if (fmode) {
5822         /* Just return the bytes. The caller will parse the resulting
5823            string. */
5824         *fstr = s;
5825         *fstrlen = len;
5826         return 0;
5827     }
5828 
5829     /* Not an f-string. */
5830     /* Avoid invoking escape decoding routines if possible. */
5831     *rawmode = *rawmode || strchr(s, '\\') == NULL;
5832     if (*bytesmode) {
5833         /* Disallow non-ASCII characters. */
5834         const char *ch;
5835         for (ch = s; *ch; ch++) {
5836             if (Py_CHARMASK(*ch) >= 0x80) {
5837                 ast_error(c, n,
5838                           "bytes can only contain ASCII "
5839                           "literal characters.");
5840                 return -1;
5841             }
5842         }
5843         if (*rawmode)
5844             *result = PyBytes_FromStringAndSize(s, len);
5845         else
5846             *result = decode_bytes_with_escapes(c, n, s, len);
5847     } else {
5848         if (*rawmode)
5849             *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
5850         else
5851             *result = decode_unicode_with_escapes(c, n, s, len);
5852     }
5853     return *result == NULL ? -1 : 0;
5854 }
5855 
5856 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through
5857    each STRING atom, and process it as needed. For bytes, just
5858    concatenate them together, and the result will be a Constant node. For
5859    normal strings and f-strings, concatenate them together. The result
5860    will be a Constant node if there were no f-strings; a FormattedValue
5861    node if there's just an f-string (with no leading or trailing
5862    literals), or a JoinedStr node if there are multiple f-strings or
5863    any literals involved. */
5864 static expr_ty
parsestrplus(struct compiling * c,const node * n)5865 parsestrplus(struct compiling *c, const node *n)
5866 {
5867     int bytesmode = 0;
5868     PyObject *bytes_str = NULL;
5869     int i;
5870 
5871     FstringParser state;
5872     FstringParser_Init(&state);
5873 
5874     for (i = 0; i < NCH(n); i++) {
5875         int this_bytesmode;
5876         int this_rawmode;
5877         PyObject *s;
5878         const char *fstr;
5879         Py_ssize_t fstrlen = -1;  /* Silence a compiler warning. */
5880 
5881         REQ(CHILD(n, i), STRING);
5882         if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s,
5883                      &fstr, &fstrlen) != 0)
5884             goto error;
5885 
5886         /* Check that we're not mixing bytes with unicode. */
5887         if (i != 0 && bytesmode != this_bytesmode) {
5888             ast_error(c, n, "cannot mix bytes and nonbytes literals");
5889             /* s is NULL if the current string part is an f-string. */
5890             Py_XDECREF(s);
5891             goto error;
5892         }
5893         bytesmode = this_bytesmode;
5894 
5895         if (fstr != NULL) {
5896             int result;
5897             assert(s == NULL && !bytesmode);
5898             /* This is an f-string. Parse and concatenate it. */
5899             result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen,
5900                                                  this_rawmode, 0, c, n);
5901             if (result < 0)
5902                 goto error;
5903         } else {
5904             /* A string or byte string. */
5905             assert(s != NULL && fstr == NULL);
5906 
5907             assert(bytesmode ? PyBytes_CheckExact(s) :
5908                    PyUnicode_CheckExact(s));
5909 
5910             if (bytesmode) {
5911                 /* For bytes, concat as we go. */
5912                 if (i == 0) {
5913                     /* First time, just remember this value. */
5914                     bytes_str = s;
5915                 } else {
5916                     PyBytes_ConcatAndDel(&bytes_str, s);
5917                     if (!bytes_str)
5918                         goto error;
5919                 }
5920             } else {
5921                 /* This is a regular string. Concatenate it. */
5922                 if (FstringParser_ConcatAndDel(&state, s) < 0)
5923                     goto error;
5924             }
5925         }
5926     }
5927     if (bytesmode) {
5928         /* Just return the bytes object and we're done. */
5929         if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)
5930             goto error;
5931         return Constant(bytes_str, NULL, LINENO(n), n->n_col_offset,
5932                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5933     }
5934 
5935     /* We're not a bytes string, bytes_str should never have been set. */
5936     assert(bytes_str == NULL);
5937 
5938     return FstringParser_Finish(&state, c, n);
5939 
5940 error:
5941     Py_XDECREF(bytes_str);
5942     FstringParser_Dealloc(&state);
5943     return NULL;
5944 }
5945 
5946 PyObject *
_PyAST_GetDocString(asdl_seq * body)5947 _PyAST_GetDocString(asdl_seq *body)
5948 {
5949     if (!asdl_seq_LEN(body)) {
5950         return NULL;
5951     }
5952     stmt_ty st = (stmt_ty)asdl_seq_GET(body, 0);
5953     if (st->kind != Expr_kind) {
5954         return NULL;
5955     }
5956     expr_ty e = st->v.Expr.value;
5957     if (e->kind == Constant_kind && PyUnicode_CheckExact(e->v.Constant.value)) {
5958         return e->v.Constant.value;
5959     }
5960     return NULL;
5961 }
5962