• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This file includes functions to transform a concrete syntax tree (CST) to
3  * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4  *
5  */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "node.h"
9 #include "ast.h"
10 #include "token.h"
11 #include "pythonrun.h"
12 
13 #include <assert.h>
14 #include <stdbool.h>
15 
16 #define MAXLEVEL 200    /* Max parentheses level */
17 
18 static int validate_stmts(asdl_seq *);
19 static int validate_exprs(asdl_seq *, expr_context_ty, int);
20 static int validate_nonempty_seq(asdl_seq *, const char *, const char *);
21 static int validate_stmt(stmt_ty);
22 static int validate_expr(expr_ty, expr_context_ty);
23 
24 static int
validate_name(PyObject * name)25 validate_name(PyObject *name)
26 {
27     assert(PyUnicode_Check(name));
28     static const char * const forbidden[] = {
29         "None",
30         "True",
31         "False",
32         NULL
33     };
34     for (int i = 0; forbidden[i] != NULL; i++) {
35         if (_PyUnicode_EqualToASCIIString(name, forbidden[i])) {
36             PyErr_Format(PyExc_ValueError, "Name node can't be used with '%s' constant", forbidden[i]);
37             return 0;
38         }
39     }
40     return 1;
41 }
42 
43 static int
validate_comprehension(asdl_seq * gens)44 validate_comprehension(asdl_seq *gens)
45 {
46     Py_ssize_t i;
47     if (!asdl_seq_LEN(gens)) {
48         PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
49         return 0;
50     }
51     for (i = 0; i < asdl_seq_LEN(gens); i++) {
52         comprehension_ty comp = asdl_seq_GET(gens, i);
53         if (!validate_expr(comp->target, Store) ||
54             !validate_expr(comp->iter, Load) ||
55             !validate_exprs(comp->ifs, Load, 0))
56             return 0;
57     }
58     return 1;
59 }
60 
61 static int
validate_slice(slice_ty slice)62 validate_slice(slice_ty slice)
63 {
64     switch (slice->kind) {
65     case Slice_kind:
66         return (!slice->v.Slice.lower || validate_expr(slice->v.Slice.lower, Load)) &&
67             (!slice->v.Slice.upper || validate_expr(slice->v.Slice.upper, Load)) &&
68             (!slice->v.Slice.step || validate_expr(slice->v.Slice.step, Load));
69     case ExtSlice_kind: {
70         Py_ssize_t i;
71         if (!validate_nonempty_seq(slice->v.ExtSlice.dims, "dims", "ExtSlice"))
72             return 0;
73         for (i = 0; i < asdl_seq_LEN(slice->v.ExtSlice.dims); i++)
74             if (!validate_slice(asdl_seq_GET(slice->v.ExtSlice.dims, i)))
75                 return 0;
76         return 1;
77     }
78     case Index_kind:
79         return validate_expr(slice->v.Index.value, Load);
80     default:
81         PyErr_SetString(PyExc_SystemError, "unknown slice node");
82         return 0;
83     }
84 }
85 
86 static int
validate_keywords(asdl_seq * keywords)87 validate_keywords(asdl_seq *keywords)
88 {
89     Py_ssize_t i;
90     for (i = 0; i < asdl_seq_LEN(keywords); i++)
91         if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load))
92             return 0;
93     return 1;
94 }
95 
96 static int
validate_args(asdl_seq * args)97 validate_args(asdl_seq *args)
98 {
99     Py_ssize_t i;
100     for (i = 0; i < asdl_seq_LEN(args); i++) {
101         arg_ty arg = asdl_seq_GET(args, i);
102         if (arg->annotation && !validate_expr(arg->annotation, Load))
103             return 0;
104     }
105     return 1;
106 }
107 
108 static const char *
expr_context_name(expr_context_ty ctx)109 expr_context_name(expr_context_ty ctx)
110 {
111     switch (ctx) {
112     case Load:
113         return "Load";
114     case Store:
115         return "Store";
116     case Del:
117         return "Del";
118     case AugLoad:
119         return "AugLoad";
120     case AugStore:
121         return "AugStore";
122     case Param:
123         return "Param";
124     default:
125         Py_UNREACHABLE();
126     }
127 }
128 
129 static int
validate_arguments(arguments_ty args)130 validate_arguments(arguments_ty args)
131 {
132     if (!validate_args(args->posonlyargs) || !validate_args(args->args)) {
133         return 0;
134     }
135     if (args->vararg && args->vararg->annotation
136         && !validate_expr(args->vararg->annotation, Load)) {
137             return 0;
138     }
139     if (!validate_args(args->kwonlyargs))
140         return 0;
141     if (args->kwarg && args->kwarg->annotation
142         && !validate_expr(args->kwarg->annotation, Load)) {
143             return 0;
144     }
145     if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->posonlyargs) + asdl_seq_LEN(args->args)) {
146         PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
147         return 0;
148     }
149     if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
150         PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
151                         "kw_defaults on arguments");
152         return 0;
153     }
154     return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1);
155 }
156 
157 static int
validate_constant(PyObject * value)158 validate_constant(PyObject *value)
159 {
160     if (value == Py_None || value == Py_Ellipsis)
161         return 1;
162 
163     if (PyLong_CheckExact(value)
164             || PyFloat_CheckExact(value)
165             || PyComplex_CheckExact(value)
166             || PyBool_Check(value)
167             || PyUnicode_CheckExact(value)
168             || PyBytes_CheckExact(value))
169         return 1;
170 
171     if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
172         PyObject *it;
173 
174         it = PyObject_GetIter(value);
175         if (it == NULL)
176             return 0;
177 
178         while (1) {
179             PyObject *item = PyIter_Next(it);
180             if (item == NULL) {
181                 if (PyErr_Occurred()) {
182                     Py_DECREF(it);
183                     return 0;
184                 }
185                 break;
186             }
187 
188             if (!validate_constant(item)) {
189                 Py_DECREF(it);
190                 Py_DECREF(item);
191                 return 0;
192             }
193             Py_DECREF(item);
194         }
195 
196         Py_DECREF(it);
197         return 1;
198     }
199 
200     return 0;
201 }
202 
203 static int
validate_expr(expr_ty exp,expr_context_ty ctx)204 validate_expr(expr_ty exp, expr_context_ty ctx)
205 {
206     int check_ctx = 1;
207     expr_context_ty actual_ctx;
208 
209     /* First check expression context. */
210     switch (exp->kind) {
211     case Attribute_kind:
212         actual_ctx = exp->v.Attribute.ctx;
213         break;
214     case Subscript_kind:
215         actual_ctx = exp->v.Subscript.ctx;
216         break;
217     case Starred_kind:
218         actual_ctx = exp->v.Starred.ctx;
219         break;
220     case Name_kind:
221         if (!validate_name(exp->v.Name.id)) {
222             return 0;
223         }
224         actual_ctx = exp->v.Name.ctx;
225         break;
226     case List_kind:
227         actual_ctx = exp->v.List.ctx;
228         break;
229     case Tuple_kind:
230         actual_ctx = exp->v.Tuple.ctx;
231         break;
232     default:
233         if (ctx != Load) {
234             PyErr_Format(PyExc_ValueError, "expression which can't be "
235                          "assigned to in %s context", expr_context_name(ctx));
236             return 0;
237         }
238         check_ctx = 0;
239         /* set actual_ctx to prevent gcc warning */
240         actual_ctx = 0;
241     }
242     if (check_ctx && actual_ctx != ctx) {
243         PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
244                      expr_context_name(ctx), expr_context_name(actual_ctx));
245         return 0;
246     }
247 
248     /* Now validate expression. */
249     switch (exp->kind) {
250     case BoolOp_kind:
251         if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
252             PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
253             return 0;
254         }
255         return validate_exprs(exp->v.BoolOp.values, Load, 0);
256     case BinOp_kind:
257         return validate_expr(exp->v.BinOp.left, Load) &&
258             validate_expr(exp->v.BinOp.right, Load);
259     case UnaryOp_kind:
260         return validate_expr(exp->v.UnaryOp.operand, Load);
261     case Lambda_kind:
262         return validate_arguments(exp->v.Lambda.args) &&
263             validate_expr(exp->v.Lambda.body, Load);
264     case IfExp_kind:
265         return validate_expr(exp->v.IfExp.test, Load) &&
266             validate_expr(exp->v.IfExp.body, Load) &&
267             validate_expr(exp->v.IfExp.orelse, Load);
268     case Dict_kind:
269         if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
270             PyErr_SetString(PyExc_ValueError,
271                             "Dict doesn't have the same number of keys as values");
272             return 0;
273         }
274         /* null_ok=1 for keys expressions to allow dict unpacking to work in
275            dict literals, i.e. ``{**{a:b}}`` */
276         return validate_exprs(exp->v.Dict.keys, Load, /*null_ok=*/ 1) &&
277             validate_exprs(exp->v.Dict.values, Load, /*null_ok=*/ 0);
278     case Set_kind:
279         return validate_exprs(exp->v.Set.elts, Load, 0);
280 #define COMP(NAME) \
281         case NAME ## _kind: \
282             return validate_comprehension(exp->v.NAME.generators) && \
283                 validate_expr(exp->v.NAME.elt, Load);
284     COMP(ListComp)
285     COMP(SetComp)
286     COMP(GeneratorExp)
287 #undef COMP
288     case DictComp_kind:
289         return validate_comprehension(exp->v.DictComp.generators) &&
290             validate_expr(exp->v.DictComp.key, Load) &&
291             validate_expr(exp->v.DictComp.value, Load);
292     case Yield_kind:
293         return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load);
294     case YieldFrom_kind:
295         return validate_expr(exp->v.YieldFrom.value, Load);
296     case Await_kind:
297         return validate_expr(exp->v.Await.value, Load);
298     case Compare_kind:
299         if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
300             PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
301             return 0;
302         }
303         if (asdl_seq_LEN(exp->v.Compare.comparators) !=
304             asdl_seq_LEN(exp->v.Compare.ops)) {
305             PyErr_SetString(PyExc_ValueError, "Compare has a different number "
306                             "of comparators and operands");
307             return 0;
308         }
309         return validate_exprs(exp->v.Compare.comparators, Load, 0) &&
310             validate_expr(exp->v.Compare.left, Load);
311     case Call_kind:
312         return validate_expr(exp->v.Call.func, Load) &&
313             validate_exprs(exp->v.Call.args, Load, 0) &&
314             validate_keywords(exp->v.Call.keywords);
315     case Constant_kind:
316         if (!validate_constant(exp->v.Constant.value)) {
317             PyErr_Format(PyExc_TypeError,
318                          "got an invalid type in Constant: %s",
319                          Py_TYPE(exp->v.Constant.value)->tp_name);
320             return 0;
321         }
322         return 1;
323     case JoinedStr_kind:
324         return validate_exprs(exp->v.JoinedStr.values, Load, 0);
325     case FormattedValue_kind:
326         if (validate_expr(exp->v.FormattedValue.value, Load) == 0)
327             return 0;
328         if (exp->v.FormattedValue.format_spec)
329             return validate_expr(exp->v.FormattedValue.format_spec, Load);
330         return 1;
331     case Attribute_kind:
332         return validate_expr(exp->v.Attribute.value, Load);
333     case Subscript_kind:
334         return validate_slice(exp->v.Subscript.slice) &&
335             validate_expr(exp->v.Subscript.value, Load);
336     case Starred_kind:
337         return validate_expr(exp->v.Starred.value, ctx);
338     case List_kind:
339         return validate_exprs(exp->v.List.elts, ctx, 0);
340     case Tuple_kind:
341         return validate_exprs(exp->v.Tuple.elts, ctx, 0);
342     case NamedExpr_kind:
343         return validate_expr(exp->v.NamedExpr.value, Load);
344     /* This last case doesn't have any checking. */
345     case Name_kind:
346         return 1;
347     }
348     PyErr_SetString(PyExc_SystemError, "unexpected expression");
349     return 0;
350 }
351 
352 static int
validate_nonempty_seq(asdl_seq * seq,const char * what,const char * owner)353 validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
354 {
355     if (asdl_seq_LEN(seq))
356         return 1;
357     PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
358     return 0;
359 }
360 
361 static int
validate_assignlist(asdl_seq * targets,expr_context_ty ctx)362 validate_assignlist(asdl_seq *targets, expr_context_ty ctx)
363 {
364     return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
365         validate_exprs(targets, ctx, 0);
366 }
367 
368 static int
validate_body(asdl_seq * body,const char * owner)369 validate_body(asdl_seq *body, const char *owner)
370 {
371     return validate_nonempty_seq(body, "body", owner) && validate_stmts(body);
372 }
373 
374 static int
validate_stmt(stmt_ty stmt)375 validate_stmt(stmt_ty stmt)
376 {
377     Py_ssize_t i;
378     switch (stmt->kind) {
379     case FunctionDef_kind:
380         return validate_body(stmt->v.FunctionDef.body, "FunctionDef") &&
381             validate_arguments(stmt->v.FunctionDef.args) &&
382             validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) &&
383             (!stmt->v.FunctionDef.returns ||
384              validate_expr(stmt->v.FunctionDef.returns, Load));
385     case ClassDef_kind:
386         return validate_body(stmt->v.ClassDef.body, "ClassDef") &&
387             validate_exprs(stmt->v.ClassDef.bases, Load, 0) &&
388             validate_keywords(stmt->v.ClassDef.keywords) &&
389             validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0);
390     case Return_kind:
391         return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load);
392     case Delete_kind:
393         return validate_assignlist(stmt->v.Delete.targets, Del);
394     case Assign_kind:
395         return validate_assignlist(stmt->v.Assign.targets, Store) &&
396             validate_expr(stmt->v.Assign.value, Load);
397     case AugAssign_kind:
398         return validate_expr(stmt->v.AugAssign.target, Store) &&
399             validate_expr(stmt->v.AugAssign.value, Load);
400     case AnnAssign_kind:
401         if (stmt->v.AnnAssign.target->kind != Name_kind &&
402             stmt->v.AnnAssign.simple) {
403             PyErr_SetString(PyExc_TypeError,
404                             "AnnAssign with simple non-Name target");
405             return 0;
406         }
407         return validate_expr(stmt->v.AnnAssign.target, Store) &&
408                (!stmt->v.AnnAssign.value ||
409                 validate_expr(stmt->v.AnnAssign.value, Load)) &&
410                validate_expr(stmt->v.AnnAssign.annotation, Load);
411     case For_kind:
412         return validate_expr(stmt->v.For.target, Store) &&
413             validate_expr(stmt->v.For.iter, Load) &&
414             validate_body(stmt->v.For.body, "For") &&
415             validate_stmts(stmt->v.For.orelse);
416     case AsyncFor_kind:
417         return validate_expr(stmt->v.AsyncFor.target, Store) &&
418             validate_expr(stmt->v.AsyncFor.iter, Load) &&
419             validate_body(stmt->v.AsyncFor.body, "AsyncFor") &&
420             validate_stmts(stmt->v.AsyncFor.orelse);
421     case While_kind:
422         return validate_expr(stmt->v.While.test, Load) &&
423             validate_body(stmt->v.While.body, "While") &&
424             validate_stmts(stmt->v.While.orelse);
425     case If_kind:
426         return validate_expr(stmt->v.If.test, Load) &&
427             validate_body(stmt->v.If.body, "If") &&
428             validate_stmts(stmt->v.If.orelse);
429     case With_kind:
430         if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
431             return 0;
432         for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
433             withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
434             if (!validate_expr(item->context_expr, Load) ||
435                 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
436                 return 0;
437         }
438         return validate_body(stmt->v.With.body, "With");
439     case AsyncWith_kind:
440         if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith"))
441             return 0;
442         for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) {
443             withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i);
444             if (!validate_expr(item->context_expr, Load) ||
445                 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
446                 return 0;
447         }
448         return validate_body(stmt->v.AsyncWith.body, "AsyncWith");
449     case Raise_kind:
450         if (stmt->v.Raise.exc) {
451             return validate_expr(stmt->v.Raise.exc, Load) &&
452                 (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load));
453         }
454         if (stmt->v.Raise.cause) {
455             PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
456             return 0;
457         }
458         return 1;
459     case Try_kind:
460         if (!validate_body(stmt->v.Try.body, "Try"))
461             return 0;
462         if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
463             !asdl_seq_LEN(stmt->v.Try.finalbody)) {
464             PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
465             return 0;
466         }
467         if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
468             asdl_seq_LEN(stmt->v.Try.orelse)) {
469             PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
470             return 0;
471         }
472         for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
473             excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
474             if ((handler->v.ExceptHandler.type &&
475                  !validate_expr(handler->v.ExceptHandler.type, Load)) ||
476                 !validate_body(handler->v.ExceptHandler.body, "ExceptHandler"))
477                 return 0;
478         }
479         return (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
480                 validate_stmts(stmt->v.Try.finalbody)) &&
481             (!asdl_seq_LEN(stmt->v.Try.orelse) ||
482              validate_stmts(stmt->v.Try.orelse));
483     case Assert_kind:
484         return validate_expr(stmt->v.Assert.test, Load) &&
485             (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load));
486     case Import_kind:
487         return validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
488     case ImportFrom_kind:
489         if (stmt->v.ImportFrom.level < 0) {
490             PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level");
491             return 0;
492         }
493         return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
494     case Global_kind:
495         return validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
496     case Nonlocal_kind:
497         return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
498     case Expr_kind:
499         return validate_expr(stmt->v.Expr.value, Load);
500     case AsyncFunctionDef_kind:
501         return validate_body(stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") &&
502             validate_arguments(stmt->v.AsyncFunctionDef.args) &&
503             validate_exprs(stmt->v.AsyncFunctionDef.decorator_list, Load, 0) &&
504             (!stmt->v.AsyncFunctionDef.returns ||
505              validate_expr(stmt->v.AsyncFunctionDef.returns, Load));
506     case Pass_kind:
507     case Break_kind:
508     case Continue_kind:
509         return 1;
510     default:
511         PyErr_SetString(PyExc_SystemError, "unexpected statement");
512         return 0;
513     }
514 }
515 
516 static int
validate_stmts(asdl_seq * seq)517 validate_stmts(asdl_seq *seq)
518 {
519     Py_ssize_t i;
520     for (i = 0; i < asdl_seq_LEN(seq); i++) {
521         stmt_ty stmt = asdl_seq_GET(seq, i);
522         if (stmt) {
523             if (!validate_stmt(stmt))
524                 return 0;
525         }
526         else {
527             PyErr_SetString(PyExc_ValueError,
528                             "None disallowed in statement list");
529             return 0;
530         }
531     }
532     return 1;
533 }
534 
535 static int
validate_exprs(asdl_seq * exprs,expr_context_ty ctx,int null_ok)536 validate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok)
537 {
538     Py_ssize_t i;
539     for (i = 0; i < asdl_seq_LEN(exprs); i++) {
540         expr_ty expr = asdl_seq_GET(exprs, i);
541         if (expr) {
542             if (!validate_expr(expr, ctx))
543                 return 0;
544         }
545         else if (!null_ok) {
546             PyErr_SetString(PyExc_ValueError,
547                             "None disallowed in expression list");
548             return 0;
549         }
550 
551     }
552     return 1;
553 }
554 
555 int
PyAST_Validate(mod_ty mod)556 PyAST_Validate(mod_ty mod)
557 {
558     int res = 0;
559 
560     switch (mod->kind) {
561     case Module_kind:
562         res = validate_stmts(mod->v.Module.body);
563         break;
564     case Interactive_kind:
565         res = validate_stmts(mod->v.Interactive.body);
566         break;
567     case Expression_kind:
568         res = validate_expr(mod->v.Expression.body, Load);
569         break;
570     case Suite_kind:
571         PyErr_SetString(PyExc_ValueError, "Suite is not valid in the CPython compiler");
572         break;
573     default:
574         PyErr_SetString(PyExc_SystemError, "impossible module node");
575         res = 0;
576         break;
577     }
578     return res;
579 }
580 
581 /* This is done here, so defines like "test" don't interfere with AST use above. */
582 #include "grammar.h"
583 #include "parsetok.h"
584 #include "graminit.h"
585 
586 /* Data structure used internally */
587 struct compiling {
588     PyArena *c_arena; /* Arena for allocating memory. */
589     PyObject *c_filename; /* filename */
590     PyObject *c_normalize; /* Normalization function from unicodedata. */
591     int c_feature_version; /* Latest minor version of Python for allowed features */
592 };
593 
594 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
595 static expr_ty ast_for_expr(struct compiling *, const node *);
596 static stmt_ty ast_for_stmt(struct compiling *, const node *);
597 static asdl_seq *ast_for_suite(struct compiling *c, const node *n);
598 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
599                                   expr_context_ty);
600 static expr_ty ast_for_testlist(struct compiling *, const node *);
601 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
602 
603 static stmt_ty ast_for_with_stmt(struct compiling *, const node *, bool);
604 static stmt_ty ast_for_for_stmt(struct compiling *, const node *, bool);
605 
606 /* Note different signature for ast_for_call */
607 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty,
608                             const node *, const node *, const node *);
609 
610 static PyObject *parsenumber(struct compiling *, const char *);
611 static expr_ty parsestrplus(struct compiling *, const node *n);
612 static void get_last_end_pos(asdl_seq *, int *, int *);
613 
614 #define COMP_GENEXP   0
615 #define COMP_LISTCOMP 1
616 #define COMP_SETCOMP  2
617 
618 static int
init_normalization(struct compiling * c)619 init_normalization(struct compiling *c)
620 {
621     PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
622     if (!m)
623         return 0;
624     c->c_normalize = PyObject_GetAttrString(m, "normalize");
625     Py_DECREF(m);
626     if (!c->c_normalize)
627         return 0;
628     return 1;
629 }
630 
631 static identifier
new_identifier(const char * n,struct compiling * c)632 new_identifier(const char *n, struct compiling *c)
633 {
634     PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
635     if (!id)
636         return NULL;
637     /* PyUnicode_DecodeUTF8 should always return a ready string. */
638     assert(PyUnicode_IS_READY(id));
639     /* Check whether there are non-ASCII characters in the
640        identifier; if so, normalize to NFKC. */
641     if (!PyUnicode_IS_ASCII(id)) {
642         PyObject *id2;
643         _Py_IDENTIFIER(NFKC);
644         if (!c->c_normalize && !init_normalization(c)) {
645             Py_DECREF(id);
646             return NULL;
647         }
648         PyObject *form = _PyUnicode_FromId(&PyId_NFKC);
649         if (form == NULL) {
650             Py_DECREF(id);
651             return NULL;
652         }
653         PyObject *args[2] = {form, id};
654         id2 = _PyObject_FastCall(c->c_normalize, args, 2);
655         Py_DECREF(id);
656         if (!id2)
657             return NULL;
658         if (!PyUnicode_Check(id2)) {
659             PyErr_Format(PyExc_TypeError,
660                          "unicodedata.normalize() must return a string, not "
661                          "%.200s",
662                          Py_TYPE(id2)->tp_name);
663             Py_DECREF(id2);
664             return NULL;
665         }
666         id = id2;
667     }
668     PyUnicode_InternInPlace(&id);
669     if (PyArena_AddPyObject(c->c_arena, id) < 0) {
670         Py_DECREF(id);
671         return NULL;
672     }
673     return id;
674 }
675 
676 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c)
677 
678 static int
ast_error(struct compiling * c,const node * n,const char * errmsg,...)679 ast_error(struct compiling *c, const node *n, const char *errmsg, ...)
680 {
681     PyObject *value, *errstr, *loc, *tmp;
682     va_list va;
683 
684     va_start(va, errmsg);
685     errstr = PyUnicode_FromFormatV(errmsg, va);
686     va_end(va);
687     if (!errstr) {
688         return 0;
689     }
690     loc = PyErr_ProgramTextObject(c->c_filename, LINENO(n));
691     if (!loc) {
692         Py_INCREF(Py_None);
693         loc = Py_None;
694     }
695     tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset + 1, loc);
696     if (!tmp) {
697         Py_DECREF(errstr);
698         return 0;
699     }
700     value = PyTuple_Pack(2, errstr, tmp);
701     Py_DECREF(errstr);
702     Py_DECREF(tmp);
703     if (value) {
704         PyErr_SetObject(PyExc_SyntaxError, value);
705         Py_DECREF(value);
706     }
707     return 0;
708 }
709 
710 /* num_stmts() returns number of contained statements.
711 
712    Use this routine to determine how big a sequence is needed for
713    the statements in a parse tree.  Its raison d'etre is this bit of
714    grammar:
715 
716    stmt: simple_stmt | compound_stmt
717    simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
718 
719    A simple_stmt can contain multiple small_stmt elements joined
720    by semicolons.  If the arg is a simple_stmt, the number of
721    small_stmt elements is returned.
722 */
723 
724 static string
new_type_comment(const char * s,struct compiling * c)725 new_type_comment(const char *s, struct compiling *c)
726 {
727     PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
728     if (res == NULL)
729         return NULL;
730     if (PyArena_AddPyObject(c->c_arena, res) < 0) {
731         Py_DECREF(res);
732         return NULL;
733     }
734     return res;
735 }
736 #define NEW_TYPE_COMMENT(n) new_type_comment(STR(n), c)
737 
738 static int
num_stmts(const node * n)739 num_stmts(const node *n)
740 {
741     int i, l;
742     node *ch;
743 
744     switch (TYPE(n)) {
745         case single_input:
746             if (TYPE(CHILD(n, 0)) == NEWLINE)
747                 return 0;
748             else
749                 return num_stmts(CHILD(n, 0));
750         case file_input:
751             l = 0;
752             for (i = 0; i < NCH(n); i++) {
753                 ch = CHILD(n, i);
754                 if (TYPE(ch) == stmt)
755                     l += num_stmts(ch);
756             }
757             return l;
758         case stmt:
759             return num_stmts(CHILD(n, 0));
760         case compound_stmt:
761             return 1;
762         case simple_stmt:
763             return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
764         case suite:
765         case func_body_suite:
766             /* func_body_suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
767             /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
768             if (NCH(n) == 1)
769                 return num_stmts(CHILD(n, 0));
770             else {
771                 i = 2;
772                 l = 0;
773                 if (TYPE(CHILD(n, 1)) == TYPE_COMMENT)
774                     i += 2;
775                 for (; i < (NCH(n) - 1); i++)
776                     l += num_stmts(CHILD(n, i));
777                 return l;
778             }
779         default: {
780             char buf[128];
781 
782             sprintf(buf, "Non-statement found: %d %d",
783                     TYPE(n), NCH(n));
784             Py_FatalError(buf);
785         }
786     }
787     Py_UNREACHABLE();
788 }
789 
790 /* Transform the CST rooted at node * to the appropriate AST
791 */
792 
793 mod_ty
PyAST_FromNodeObject(const node * n,PyCompilerFlags * flags,PyObject * filename,PyArena * arena)794 PyAST_FromNodeObject(const node *n, PyCompilerFlags *flags,
795                      PyObject *filename, PyArena *arena)
796 {
797     int i, j, k, num;
798     asdl_seq *stmts = NULL;
799     asdl_seq *type_ignores = NULL;
800     stmt_ty s;
801     node *ch;
802     struct compiling c;
803     mod_ty res = NULL;
804     asdl_seq *argtypes = NULL;
805     expr_ty ret, arg;
806 
807     c.c_arena = arena;
808     /* borrowed reference */
809     c.c_filename = filename;
810     c.c_normalize = NULL;
811     c.c_feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ?
812         flags->cf_feature_version : PY_MINOR_VERSION;
813 
814     if (TYPE(n) == encoding_decl)
815         n = CHILD(n, 0);
816 
817     k = 0;
818     switch (TYPE(n)) {
819         case file_input:
820             stmts = _Py_asdl_seq_new(num_stmts(n), arena);
821             if (!stmts)
822                 goto out;
823             for (i = 0; i < NCH(n) - 1; i++) {
824                 ch = CHILD(n, i);
825                 if (TYPE(ch) == NEWLINE)
826                     continue;
827                 REQ(ch, stmt);
828                 num = num_stmts(ch);
829                 if (num == 1) {
830                     s = ast_for_stmt(&c, ch);
831                     if (!s)
832                         goto out;
833                     asdl_seq_SET(stmts, k++, s);
834                 }
835                 else {
836                     ch = CHILD(ch, 0);
837                     REQ(ch, simple_stmt);
838                     for (j = 0; j < num; j++) {
839                         s = ast_for_stmt(&c, CHILD(ch, j * 2));
840                         if (!s)
841                             goto out;
842                         asdl_seq_SET(stmts, k++, s);
843                     }
844                 }
845             }
846 
847             /* Type ignores are stored under the ENDMARKER in file_input. */
848             ch = CHILD(n, NCH(n) - 1);
849             REQ(ch, ENDMARKER);
850             num = NCH(ch);
851             type_ignores = _Py_asdl_seq_new(num, arena);
852             if (!type_ignores)
853                 goto out;
854 
855             for (i = 0; i < num; i++) {
856                 string type_comment = new_type_comment(STR(CHILD(ch, i)), &c);
857                 if (!type_comment)
858                     goto out;
859                 type_ignore_ty ti = TypeIgnore(LINENO(CHILD(ch, i)), type_comment, arena);
860                 if (!ti)
861                    goto out;
862                asdl_seq_SET(type_ignores, i, ti);
863             }
864 
865             res = Module(stmts, type_ignores, arena);
866             break;
867         case eval_input: {
868             expr_ty testlist_ast;
869 
870             /* XXX Why not comp_for here? */
871             testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
872             if (!testlist_ast)
873                 goto out;
874             res = Expression(testlist_ast, arena);
875             break;
876         }
877         case single_input:
878             if (TYPE(CHILD(n, 0)) == NEWLINE) {
879                 stmts = _Py_asdl_seq_new(1, arena);
880                 if (!stmts)
881                     goto out;
882                 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
883                                             n->n_end_lineno, n->n_end_col_offset,
884                                             arena));
885                 if (!asdl_seq_GET(stmts, 0))
886                     goto out;
887                 res = Interactive(stmts, arena);
888             }
889             else {
890                 n = CHILD(n, 0);
891                 num = num_stmts(n);
892                 stmts = _Py_asdl_seq_new(num, arena);
893                 if (!stmts)
894                     goto out;
895                 if (num == 1) {
896                     s = ast_for_stmt(&c, n);
897                     if (!s)
898                         goto out;
899                     asdl_seq_SET(stmts, 0, s);
900                 }
901                 else {
902                     /* Only a simple_stmt can contain multiple statements. */
903                     REQ(n, simple_stmt);
904                     for (i = 0; i < NCH(n); i += 2) {
905                         if (TYPE(CHILD(n, i)) == NEWLINE)
906                             break;
907                         s = ast_for_stmt(&c, CHILD(n, i));
908                         if (!s)
909                             goto out;
910                         asdl_seq_SET(stmts, i / 2, s);
911                     }
912                 }
913 
914                 res = Interactive(stmts, arena);
915             }
916             break;
917         case func_type_input:
918             n = CHILD(n, 0);
919             REQ(n, func_type);
920 
921             if (TYPE(CHILD(n, 1)) == typelist) {
922                 ch = CHILD(n, 1);
923                 /* this is overly permissive -- we don't pay any attention to
924                  * stars on the args -- just parse them into an ordered list */
925                 num = 0;
926                 for (i = 0; i < NCH(ch); i++) {
927                     if (TYPE(CHILD(ch, i)) == test) {
928                         num++;
929                     }
930                 }
931 
932                 argtypes = _Py_asdl_seq_new(num, arena);
933                 if (!argtypes)
934                     goto out;
935 
936                 j = 0;
937                 for (i = 0; i < NCH(ch); i++) {
938                     if (TYPE(CHILD(ch, i)) == test) {
939                         arg = ast_for_expr(&c, CHILD(ch, i));
940                         if (!arg)
941                             goto out;
942                         asdl_seq_SET(argtypes, j++, arg);
943                     }
944                 }
945             }
946             else {
947                 argtypes = _Py_asdl_seq_new(0, arena);
948                 if (!argtypes)
949                     goto out;
950             }
951 
952             ret = ast_for_expr(&c, CHILD(n, NCH(n) - 1));
953             if (!ret)
954                 goto out;
955             res = FunctionType(argtypes, ret, arena);
956             break;
957         default:
958             PyErr_Format(PyExc_SystemError,
959                          "invalid node %d for PyAST_FromNode", TYPE(n));
960             goto out;
961     }
962  out:
963     if (c.c_normalize) {
964         Py_DECREF(c.c_normalize);
965     }
966     return res;
967 }
968 
969 mod_ty
PyAST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename_str,PyArena * arena)970 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename_str,
971                PyArena *arena)
972 {
973     mod_ty mod;
974     PyObject *filename;
975     filename = PyUnicode_DecodeFSDefault(filename_str);
976     if (filename == NULL)
977         return NULL;
978     mod = PyAST_FromNodeObject(n, flags, filename, arena);
979     Py_DECREF(filename);
980     return mod;
981 
982 }
983 
984 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
985 */
986 
987 static operator_ty
get_operator(struct compiling * c,const node * n)988 get_operator(struct compiling *c, const node *n)
989 {
990     switch (TYPE(n)) {
991         case VBAR:
992             return BitOr;
993         case CIRCUMFLEX:
994             return BitXor;
995         case AMPER:
996             return BitAnd;
997         case LEFTSHIFT:
998             return LShift;
999         case RIGHTSHIFT:
1000             return RShift;
1001         case PLUS:
1002             return Add;
1003         case MINUS:
1004             return Sub;
1005         case STAR:
1006             return Mult;
1007         case AT:
1008             if (c->c_feature_version < 5) {
1009                 ast_error(c, n,
1010                           "The '@' operator is only supported in Python 3.5 and greater");
1011                 return (operator_ty)0;
1012             }
1013             return MatMult;
1014         case SLASH:
1015             return Div;
1016         case DOUBLESLASH:
1017             return FloorDiv;
1018         case PERCENT:
1019             return Mod;
1020         default:
1021             return (operator_ty)0;
1022     }
1023 }
1024 
1025 static const char * const FORBIDDEN[] = {
1026     "None",
1027     "True",
1028     "False",
1029     "__debug__",
1030     NULL,
1031 };
1032 
1033 static int
forbidden_name(struct compiling * c,identifier name,const node * n,int full_checks)1034 forbidden_name(struct compiling *c, identifier name, const node *n,
1035                int full_checks)
1036 {
1037     assert(PyUnicode_Check(name));
1038     const char * const *p = FORBIDDEN;
1039     if (!full_checks) {
1040         /* In most cases, the parser will protect True, False, and None
1041            from being assign to. */
1042         p += 3;
1043     }
1044     for (; *p; p++) {
1045         if (_PyUnicode_EqualToASCIIString(name, *p)) {
1046             ast_error(c, n, "cannot assign to %U", name);
1047             return 1;
1048         }
1049     }
1050     return 0;
1051 }
1052 
1053 static expr_ty
copy_location(expr_ty e,const node * n,const node * end)1054 copy_location(expr_ty e, const node *n, const node *end)
1055 {
1056     if (e) {
1057         e->lineno = LINENO(n);
1058         e->col_offset = n->n_col_offset;
1059         e->end_lineno = end->n_end_lineno;
1060         e->end_col_offset = end->n_end_col_offset;
1061     }
1062     return e;
1063 }
1064 
1065 static const char *
get_expr_name(expr_ty e)1066 get_expr_name(expr_ty e)
1067 {
1068     switch (e->kind) {
1069         case Attribute_kind:
1070             return "attribute";
1071         case Subscript_kind:
1072             return "subscript";
1073         case Starred_kind:
1074             return "starred";
1075         case Name_kind:
1076             return "name";
1077         case List_kind:
1078             return "list";
1079         case Tuple_kind:
1080             return "tuple";
1081         case Lambda_kind:
1082             return "lambda";
1083         case Call_kind:
1084             return "function call";
1085         case BoolOp_kind:
1086         case BinOp_kind:
1087         case UnaryOp_kind:
1088             return "operator";
1089         case GeneratorExp_kind:
1090             return "generator expression";
1091         case Yield_kind:
1092         case YieldFrom_kind:
1093             return "yield expression";
1094         case Await_kind:
1095             return "await expression";
1096         case ListComp_kind:
1097             return "list comprehension";
1098         case SetComp_kind:
1099             return "set comprehension";
1100         case DictComp_kind:
1101             return "dict comprehension";
1102         case Dict_kind:
1103             return "dict display";
1104         case Set_kind:
1105             return "set display";
1106         case JoinedStr_kind:
1107         case FormattedValue_kind:
1108             return "f-string expression";
1109         case Constant_kind: {
1110             PyObject *value = e->v.Constant.value;
1111             if (value == Py_None) {
1112                 return "None";
1113             }
1114             if (value == Py_False) {
1115                 return "False";
1116             }
1117             if (value == Py_True) {
1118                 return "True";
1119             }
1120             if (value == Py_Ellipsis) {
1121                 return "Ellipsis";
1122             }
1123             return "literal";
1124         }
1125         case Compare_kind:
1126             return "comparison";
1127         case IfExp_kind:
1128             return "conditional expression";
1129         case NamedExpr_kind:
1130             return "named expression";
1131         default:
1132             PyErr_Format(PyExc_SystemError,
1133                          "unexpected expression in assignment %d (line %d)",
1134                          e->kind, e->lineno);
1135             return NULL;
1136     }
1137 }
1138 
1139 /* Set the context ctx for expr_ty e, recursively traversing e.
1140 
1141    Only sets context for expr kinds that "can appear in assignment context"
1142    (according to ../Parser/Python.asdl).  For other expr kinds, it sets
1143    an appropriate syntax error and returns false.
1144 */
1145 
1146 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)1147 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
1148 {
1149     asdl_seq *s = NULL;
1150 
1151     /* The ast defines augmented store and load contexts, but the
1152        implementation here doesn't actually use them.  The code may be
1153        a little more complex than necessary as a result.  It also means
1154        that expressions in an augmented assignment have a Store context.
1155        Consider restructuring so that augmented assignment uses
1156        set_context(), too.
1157     */
1158     assert(ctx != AugStore && ctx != AugLoad);
1159 
1160     switch (e->kind) {
1161         case Attribute_kind:
1162             e->v.Attribute.ctx = ctx;
1163             if (ctx == Store && forbidden_name(c, e->v.Attribute.attr, n, 1))
1164                 return 0;
1165             break;
1166         case Subscript_kind:
1167             e->v.Subscript.ctx = ctx;
1168             break;
1169         case Starred_kind:
1170             e->v.Starred.ctx = ctx;
1171             if (!set_context(c, e->v.Starred.value, ctx, n))
1172                 return 0;
1173             break;
1174         case Name_kind:
1175             if (ctx == Store) {
1176                 if (forbidden_name(c, e->v.Name.id, n, 0))
1177                     return 0; /* forbidden_name() calls ast_error() */
1178             }
1179             e->v.Name.ctx = ctx;
1180             break;
1181         case List_kind:
1182             e->v.List.ctx = ctx;
1183             s = e->v.List.elts;
1184             break;
1185         case Tuple_kind:
1186             e->v.Tuple.ctx = ctx;
1187             s = e->v.Tuple.elts;
1188             break;
1189         default: {
1190             const char *expr_name = get_expr_name(e);
1191             if (expr_name != NULL) {
1192                 ast_error(c, n, "cannot %s %s",
1193                           ctx == Store ? "assign to" : "delete",
1194                           expr_name);
1195             }
1196             return 0;
1197         }
1198     }
1199 
1200     /* If the LHS is a list or tuple, we need to set the assignment
1201        context for all the contained elements.
1202     */
1203     if (s) {
1204         Py_ssize_t i;
1205 
1206         for (i = 0; i < asdl_seq_LEN(s); i++) {
1207             if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
1208                 return 0;
1209         }
1210     }
1211     return 1;
1212 }
1213 
1214 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)1215 ast_for_augassign(struct compiling *c, const node *n)
1216 {
1217     REQ(n, augassign);
1218     n = CHILD(n, 0);
1219     switch (STR(n)[0]) {
1220         case '+':
1221             return Add;
1222         case '-':
1223             return Sub;
1224         case '/':
1225             if (STR(n)[1] == '/')
1226                 return FloorDiv;
1227             else
1228                 return Div;
1229         case '%':
1230             return Mod;
1231         case '<':
1232             return LShift;
1233         case '>':
1234             return RShift;
1235         case '&':
1236             return BitAnd;
1237         case '^':
1238             return BitXor;
1239         case '|':
1240             return BitOr;
1241         case '*':
1242             if (STR(n)[1] == '*')
1243                 return Pow;
1244             else
1245                 return Mult;
1246         case '@':
1247             if (c->c_feature_version < 5) {
1248                 ast_error(c, n,
1249                           "The '@' operator is only supported in Python 3.5 and greater");
1250                 return (operator_ty)0;
1251             }
1252             return MatMult;
1253         default:
1254             PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
1255             return (operator_ty)0;
1256     }
1257 }
1258 
1259 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)1260 ast_for_comp_op(struct compiling *c, const node *n)
1261 {
1262     /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'
1263                |'is' 'not'
1264     */
1265     REQ(n, comp_op);
1266     if (NCH(n) == 1) {
1267         n = CHILD(n, 0);
1268         switch (TYPE(n)) {
1269             case LESS:
1270                 return Lt;
1271             case GREATER:
1272                 return Gt;
1273             case EQEQUAL:                       /* == */
1274                 return Eq;
1275             case LESSEQUAL:
1276                 return LtE;
1277             case GREATEREQUAL:
1278                 return GtE;
1279             case NOTEQUAL:
1280                 return NotEq;
1281             case NAME:
1282                 if (strcmp(STR(n), "in") == 0)
1283                     return In;
1284                 if (strcmp(STR(n), "is") == 0)
1285                     return Is;
1286                 /* fall through */
1287             default:
1288                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
1289                              STR(n));
1290                 return (cmpop_ty)0;
1291         }
1292     }
1293     else if (NCH(n) == 2) {
1294         /* handle "not in" and "is not" */
1295         switch (TYPE(CHILD(n, 0))) {
1296             case NAME:
1297                 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
1298                     return NotIn;
1299                 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
1300                     return IsNot;
1301                 /* fall through */
1302             default:
1303                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
1304                              STR(CHILD(n, 0)), STR(CHILD(n, 1)));
1305                 return (cmpop_ty)0;
1306         }
1307     }
1308     PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
1309                  NCH(n));
1310     return (cmpop_ty)0;
1311 }
1312 
1313 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)1314 seq_for_testlist(struct compiling *c, const node *n)
1315 {
1316     /* testlist: test (',' test)* [',']
1317        testlist_star_expr: test|star_expr (',' test|star_expr)* [',']
1318     */
1319     asdl_seq *seq;
1320     expr_ty expression;
1321     int i;
1322     assert(TYPE(n) == testlist || TYPE(n) == testlist_star_expr || TYPE(n) == testlist_comp);
1323 
1324     seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1325     if (!seq)
1326         return NULL;
1327 
1328     for (i = 0; i < NCH(n); i += 2) {
1329         const node *ch = CHILD(n, i);
1330         assert(TYPE(ch) == test || TYPE(ch) == test_nocond || TYPE(ch) == star_expr || TYPE(ch) == namedexpr_test);
1331 
1332         expression = ast_for_expr(c, ch);
1333         if (!expression)
1334             return NULL;
1335 
1336         assert(i / 2 < seq->size);
1337         asdl_seq_SET(seq, i / 2, expression);
1338     }
1339     return seq;
1340 }
1341 
1342 static arg_ty
ast_for_arg(struct compiling * c,const node * n)1343 ast_for_arg(struct compiling *c, const node *n)
1344 {
1345     identifier name;
1346     expr_ty annotation = NULL;
1347     node *ch;
1348     arg_ty ret;
1349 
1350     assert(TYPE(n) == tfpdef || TYPE(n) == vfpdef);
1351     ch = CHILD(n, 0);
1352     name = NEW_IDENTIFIER(ch);
1353     if (!name)
1354         return NULL;
1355     if (forbidden_name(c, name, ch, 0))
1356         return NULL;
1357 
1358     if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) {
1359         annotation = ast_for_expr(c, CHILD(n, 2));
1360         if (!annotation)
1361             return NULL;
1362     }
1363 
1364     ret = arg(name, annotation, NULL, LINENO(n), n->n_col_offset,
1365               n->n_end_lineno, n->n_end_col_offset, c->c_arena);
1366     if (!ret)
1367         return NULL;
1368     return ret;
1369 }
1370 
1371 /* returns -1 if failed to handle keyword only arguments
1372    returns new position to keep processing if successful
1373                (',' tfpdef ['=' test])*
1374                      ^^^
1375    start pointing here
1376  */
1377 static int
handle_keywordonly_args(struct compiling * c,const node * n,int start,asdl_seq * kwonlyargs,asdl_seq * kwdefaults)1378 handle_keywordonly_args(struct compiling *c, const node *n, int start,
1379                         asdl_seq *kwonlyargs, asdl_seq *kwdefaults)
1380 {
1381     PyObject *argname;
1382     node *ch;
1383     expr_ty expression, annotation;
1384     arg_ty arg = NULL;
1385     int i = start;
1386     int j = 0; /* index for kwdefaults and kwonlyargs */
1387 
1388     if (kwonlyargs == NULL) {
1389         ast_error(c, CHILD(n, start), "named arguments must follow bare *");
1390         return -1;
1391     }
1392     assert(kwdefaults != NULL);
1393     while (i < NCH(n)) {
1394         ch = CHILD(n, i);
1395         switch (TYPE(ch)) {
1396             case vfpdef:
1397             case tfpdef:
1398                 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1399                     expression = ast_for_expr(c, CHILD(n, i + 2));
1400                     if (!expression)
1401                         goto error;
1402                     asdl_seq_SET(kwdefaults, j, expression);
1403                     i += 2; /* '=' and test */
1404                 }
1405                 else { /* setting NULL if no default value exists */
1406                     asdl_seq_SET(kwdefaults, j, NULL);
1407                 }
1408                 if (NCH(ch) == 3) {
1409                     /* ch is NAME ':' test */
1410                     annotation = ast_for_expr(c, CHILD(ch, 2));
1411                     if (!annotation)
1412                         goto error;
1413                 }
1414                 else {
1415                     annotation = NULL;
1416                 }
1417                 ch = CHILD(ch, 0);
1418                 argname = NEW_IDENTIFIER(ch);
1419                 if (!argname)
1420                     goto error;
1421                 if (forbidden_name(c, argname, ch, 0))
1422                     goto error;
1423                 arg = arg(argname, annotation, NULL, LINENO(ch), ch->n_col_offset,
1424                           ch->n_end_lineno, ch->n_end_col_offset,
1425                           c->c_arena);
1426                 if (!arg)
1427                     goto error;
1428                 asdl_seq_SET(kwonlyargs, j++, arg);
1429                 i += 1; /* the name */
1430                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1431                     i += 1; /* the comma, if present */
1432                 break;
1433             case TYPE_COMMENT:
1434                 /* arg will be equal to the last argument processed */
1435                 arg->type_comment = NEW_TYPE_COMMENT(ch);
1436                 if (!arg->type_comment)
1437                     goto error;
1438                 i += 1;
1439                 break;
1440             case DOUBLESTAR:
1441                 return i;
1442             default:
1443                 ast_error(c, ch, "unexpected node");
1444                 goto error;
1445         }
1446     }
1447     return i;
1448  error:
1449     return -1;
1450 }
1451 
1452 /* Create AST for argument list. */
1453 
1454 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)1455 ast_for_arguments(struct compiling *c, const node *n)
1456 {
1457     /* This function handles both typedargslist (function definition)
1458        and varargslist (lambda definition).
1459 
1460        parameters: '(' [typedargslist] ')'
1461 
1462        The following definition for typedarglist is equivalent to this set of rules:
1463 
1464          arguments = argument (',' [TYPE_COMMENT] argument)*
1465          argument = tfpdef ['=' test]
1466          kwargs = '**' tfpdef [','] [TYPE_COMMENT]
1467          args = '*' [tfpdef]
1468          kwonly_kwargs = (',' [TYPE_COMMENT] argument)* (TYPE_COMMENT | [','
1469                          [TYPE_COMMENT] [kwargs]])
1470          args_kwonly_kwargs = args kwonly_kwargs | kwargs
1471          poskeyword_args_kwonly_kwargs = arguments ( TYPE_COMMENT | [','
1472                                          [TYPE_COMMENT] [args_kwonly_kwargs]])
1473          typedargslist_no_posonly  = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
1474          typedarglist = (arguments ',' [TYPE_COMMENT] '/' [',' [[TYPE_COMMENT]
1475                         typedargslist_no_posonly]])|(typedargslist_no_posonly)"
1476 
1477        typedargslist: ( (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1478            ',' [TYPE_COMMENT] '/' [',' [ [TYPE_COMMENT] tfpdef ['=' test] ( ','
1479            [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
1480            [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
1481            [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
1482            [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1483            (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
1484            '**' tfpdef [','] [TYPE_COMMENT]]] ) |  (tfpdef ['=' test] (','
1485            [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
1486            [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
1487            [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
1488            [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1489            (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
1490            '**' tfpdef [','] [TYPE_COMMENT]))
1491 
1492        tfpdef: NAME [':' test]
1493 
1494        The following definition for varargslist is equivalent to this set of rules:
1495 
1496          arguments = argument (',' argument )*
1497          argument = vfpdef ['=' test]
1498          kwargs = '**' vfpdef [',']
1499          args = '*' [vfpdef]
1500          kwonly_kwargs = (',' argument )* [',' [kwargs]]
1501          args_kwonly_kwargs = args kwonly_kwargs | kwargs
1502          poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]]
1503          vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
1504          varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] |
1505                        (vararglist_no_posonly)
1506 
1507        varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['='
1508            test] (',' vfpdef ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [','
1509            ['**' vfpdef [',']]] | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])*
1510            [',' ['**' vfpdef [',']]] | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef
1511            ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
1512            | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef
1513            [',']]] | '**' vfpdef [','])
1514 
1515        vfpdef: NAME
1516 
1517     */
1518     int i, j, k, l, nposonlyargs=0, nposargs = 0, nkwonlyargs = 0;
1519     int nposdefaults = 0, found_default = 0;
1520     asdl_seq *posonlyargs, *posargs, *posdefaults, *kwonlyargs, *kwdefaults;
1521     arg_ty vararg = NULL, kwarg = NULL;
1522     arg_ty arg = NULL;
1523     node *ch;
1524 
1525     if (TYPE(n) == parameters) {
1526         if (NCH(n) == 2) /* () as argument list */
1527             return arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1528         n = CHILD(n, 1);
1529     }
1530     assert(TYPE(n) == typedargslist || TYPE(n) == varargslist);
1531 
1532     /* First count the number of positional args & defaults.  The
1533        variable i is the loop index for this for loop and the next.
1534        The next loop picks up where the first leaves off.
1535     */
1536     for (i = 0; i < NCH(n); i++) {
1537         ch = CHILD(n, i);
1538         if (TYPE(ch) == STAR) {
1539             /* skip star */
1540             i++;
1541             if (i < NCH(n) && /* skip argument following star */
1542                 (TYPE(CHILD(n, i)) == tfpdef ||
1543                  TYPE(CHILD(n, i)) == vfpdef)) {
1544                 i++;
1545             }
1546             break;
1547         }
1548         if (TYPE(ch) == DOUBLESTAR) break;
1549         if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++;
1550         if (TYPE(ch) == EQUAL) nposdefaults++;
1551         if (TYPE(ch) == SLASH ) {
1552             nposonlyargs = nposargs;
1553             nposargs = 0;
1554         }
1555     }
1556     /* count the number of keyword only args &
1557        defaults for keyword only args */
1558     for ( ; i < NCH(n); ++i) {
1559         ch = CHILD(n, i);
1560         if (TYPE(ch) == DOUBLESTAR) break;
1561         if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++;
1562     }
1563     posonlyargs = (nposonlyargs ? _Py_asdl_seq_new(nposonlyargs, c->c_arena) : NULL);
1564     if (!posonlyargs && nposonlyargs) {
1565         return NULL;
1566     }
1567     posargs = (nposargs ? _Py_asdl_seq_new(nposargs, c->c_arena) : NULL);
1568     if (!posargs && nposargs)
1569         return NULL;
1570     kwonlyargs = (nkwonlyargs ?
1571                    _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1572     if (!kwonlyargs && nkwonlyargs)
1573         return NULL;
1574     posdefaults = (nposdefaults ?
1575                     _Py_asdl_seq_new(nposdefaults, c->c_arena) : NULL);
1576     if (!posdefaults && nposdefaults)
1577         return NULL;
1578     /* The length of kwonlyargs and kwdefaults are same
1579        since we set NULL as default for keyword only argument w/o default
1580        - we have sequence data structure, but no dictionary */
1581     kwdefaults = (nkwonlyargs ?
1582                    _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1583     if (!kwdefaults && nkwonlyargs)
1584         return NULL;
1585 
1586     /* tfpdef: NAME [':' test]
1587        vfpdef: NAME
1588     */
1589     i = 0;
1590     j = 0;  /* index for defaults */
1591     k = 0;  /* index for args */
1592     l = 0;  /* index for posonlyargs */
1593     while (i < NCH(n)) {
1594         ch = CHILD(n, i);
1595         switch (TYPE(ch)) {
1596             case tfpdef:
1597             case vfpdef:
1598                 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
1599                    anything other than EQUAL or a comma? */
1600                 /* XXX Should NCH(n) check be made a separate check? */
1601                 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1602                     expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
1603                     if (!expression)
1604                         return NULL;
1605                     assert(posdefaults != NULL);
1606                     asdl_seq_SET(posdefaults, j++, expression);
1607                     i += 2;
1608                     found_default = 1;
1609                 }
1610                 else if (found_default) {
1611                     ast_error(c, n,
1612                               "non-default argument follows default argument");
1613                     return NULL;
1614                 }
1615                 arg = ast_for_arg(c, ch);
1616                 if (!arg)
1617                     return NULL;
1618                 if (l < nposonlyargs) {
1619                     asdl_seq_SET(posonlyargs, l++, arg);
1620                 } else {
1621                     asdl_seq_SET(posargs, k++, arg);
1622                 }
1623                 i += 1; /* the name */
1624                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1625                     i += 1; /* the comma, if present */
1626                 break;
1627              case SLASH:
1628                 /* Advance the slash and the comma. If there are more names
1629                  * after the slash there will be a comma so we are advancing
1630                  * the correct number of nodes. If the slash is the last item,
1631                  * we will be advancing an extra token but then * i > NCH(n)
1632                  * and the enclosing while will finish correctly. */
1633                 i += 2;
1634                 break;
1635             case STAR:
1636                 if (i+1 >= NCH(n) ||
1637                     (i+2 == NCH(n) && (TYPE(CHILD(n, i+1)) == COMMA
1638                                        || TYPE(CHILD(n, i+1)) == TYPE_COMMENT))) {
1639                     ast_error(c, CHILD(n, i),
1640                               "named arguments must follow bare *");
1641                     return NULL;
1642                 }
1643                 ch = CHILD(n, i+1);  /* tfpdef or COMMA */
1644                 if (TYPE(ch) == COMMA) {
1645                     int res = 0;
1646                     i += 2; /* now follows keyword only arguments */
1647 
1648                     if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
1649                         ast_error(c, CHILD(n, i),
1650                                   "bare * has associated type comment");
1651                         return NULL;
1652                     }
1653 
1654                     res = handle_keywordonly_args(c, n, i,
1655                                                   kwonlyargs, kwdefaults);
1656                     if (res == -1) return NULL;
1657                     i = res; /* res has new position to process */
1658                 }
1659                 else {
1660                     vararg = ast_for_arg(c, ch);
1661                     if (!vararg)
1662                         return NULL;
1663 
1664                 i += 2; /* the star and the name */
1665                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1666                     i += 1; /* the comma, if present */
1667 
1668                 if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
1669                         vararg->type_comment = NEW_TYPE_COMMENT(CHILD(n, i));
1670                         if (!vararg->type_comment)
1671                             return NULL;
1672                         i += 1;
1673                     }
1674 
1675                     if (i < NCH(n) && (TYPE(CHILD(n, i)) == tfpdef
1676                                     || TYPE(CHILD(n, i)) == vfpdef)) {
1677                         int res = 0;
1678                         res = handle_keywordonly_args(c, n, i,
1679                                                       kwonlyargs, kwdefaults);
1680                         if (res == -1) return NULL;
1681                         i = res; /* res has new position to process */
1682                     }
1683                 }
1684                 break;
1685             case DOUBLESTAR:
1686                 ch = CHILD(n, i+1);  /* tfpdef */
1687                 assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef);
1688                 kwarg = ast_for_arg(c, ch);
1689                 if (!kwarg)
1690                     return NULL;
1691                 i += 2; /* the double star and the name */
1692                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1693                     i += 1; /* the comma, if present */
1694                 break;
1695             case TYPE_COMMENT:
1696                 assert(i);
1697 
1698                 if (kwarg)
1699                     arg = kwarg;
1700 
1701                 /* arg will be equal to the last argument processed */
1702                 arg->type_comment = NEW_TYPE_COMMENT(ch);
1703                 if (!arg->type_comment)
1704                     return NULL;
1705                 i += 1;
1706                 break;
1707             default:
1708                 PyErr_Format(PyExc_SystemError,
1709                              "unexpected node in varargslist: %d @ %d",
1710                              TYPE(ch), i);
1711                 return NULL;
1712         }
1713     }
1714     return arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg, posdefaults, c->c_arena);
1715 }
1716 
1717 static expr_ty
ast_for_dotted_name(struct compiling * c,const node * n)1718 ast_for_dotted_name(struct compiling *c, const node *n)
1719 {
1720     expr_ty e;
1721     identifier id;
1722     int lineno, col_offset;
1723     int i;
1724     node *ch;
1725 
1726     REQ(n, dotted_name);
1727 
1728     lineno = LINENO(n);
1729     col_offset = n->n_col_offset;
1730 
1731     ch = CHILD(n, 0);
1732     id = NEW_IDENTIFIER(ch);
1733     if (!id)
1734         return NULL;
1735     e = Name(id, Load, lineno, col_offset,
1736              ch->n_end_lineno, ch->n_end_col_offset, c->c_arena);
1737     if (!e)
1738         return NULL;
1739 
1740     for (i = 2; i < NCH(n); i+=2) {
1741         const node *child = CHILD(n, i);
1742         id = NEW_IDENTIFIER(child);
1743         if (!id)
1744             return NULL;
1745         e = Attribute(e, id, Load, lineno, col_offset,
1746                       child->n_end_lineno, child->n_end_col_offset, c->c_arena);
1747         if (!e)
1748             return NULL;
1749     }
1750 
1751     return e;
1752 }
1753 
1754 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)1755 ast_for_decorator(struct compiling *c, const node *n)
1756 {
1757     /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
1758     expr_ty d = NULL;
1759     expr_ty name_expr;
1760 
1761     REQ(n, decorator);
1762     REQ(CHILD(n, 0), AT);
1763     REQ(RCHILD(n, -1), NEWLINE);
1764 
1765     name_expr = ast_for_dotted_name(c, CHILD(n, 1));
1766     if (!name_expr)
1767         return NULL;
1768 
1769     if (NCH(n) == 3) { /* No arguments */
1770         d = name_expr;
1771         name_expr = NULL;
1772     }
1773     else if (NCH(n) == 5) { /* Call with no arguments */
1774         d = Call(name_expr, NULL, NULL,
1775                  name_expr->lineno, name_expr->col_offset,
1776                  CHILD(n, 3)->n_end_lineno, CHILD(n, 3)->n_end_col_offset,
1777                  c->c_arena);
1778         if (!d)
1779             return NULL;
1780         name_expr = NULL;
1781     }
1782     else {
1783         d = ast_for_call(c, CHILD(n, 3), name_expr,
1784                          CHILD(n, 1), CHILD(n, 2), CHILD(n, 4));
1785         if (!d)
1786             return NULL;
1787         name_expr = NULL;
1788     }
1789 
1790     return d;
1791 }
1792 
1793 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)1794 ast_for_decorators(struct compiling *c, const node *n)
1795 {
1796     asdl_seq* decorator_seq;
1797     expr_ty d;
1798     int i;
1799 
1800     REQ(n, decorators);
1801     decorator_seq = _Py_asdl_seq_new(NCH(n), c->c_arena);
1802     if (!decorator_seq)
1803         return NULL;
1804 
1805     for (i = 0; i < NCH(n); i++) {
1806         d = ast_for_decorator(c, CHILD(n, i));
1807         if (!d)
1808             return NULL;
1809         asdl_seq_SET(decorator_seq, i, d);
1810     }
1811     return decorator_seq;
1812 }
1813 
1814 static stmt_ty
ast_for_funcdef_impl(struct compiling * c,const node * n0,asdl_seq * decorator_seq,bool is_async)1815 ast_for_funcdef_impl(struct compiling *c, const node *n0,
1816                      asdl_seq *decorator_seq, bool is_async)
1817 {
1818     /* funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] suite */
1819     const node * const n = is_async ? CHILD(n0, 1) : n0;
1820     identifier name;
1821     arguments_ty args;
1822     asdl_seq *body;
1823     expr_ty returns = NULL;
1824     int name_i = 1;
1825     int end_lineno, end_col_offset;
1826     node *tc;
1827     string type_comment = NULL;
1828 
1829     if (is_async && c->c_feature_version < 5) {
1830         ast_error(c, n,
1831                   "Async functions are only supported in Python 3.5 and greater");
1832         return NULL;
1833     }
1834 
1835     REQ(n, funcdef);
1836 
1837     name = NEW_IDENTIFIER(CHILD(n, name_i));
1838     if (!name)
1839         return NULL;
1840     if (forbidden_name(c, name, CHILD(n, name_i), 0))
1841         return NULL;
1842     args = ast_for_arguments(c, CHILD(n, name_i + 1));
1843     if (!args)
1844         return NULL;
1845     if (TYPE(CHILD(n, name_i+2)) == RARROW) {
1846         returns = ast_for_expr(c, CHILD(n, name_i + 3));
1847         if (!returns)
1848             return NULL;
1849         name_i += 2;
1850     }
1851     if (TYPE(CHILD(n, name_i + 3)) == TYPE_COMMENT) {
1852         type_comment = NEW_TYPE_COMMENT(CHILD(n, name_i + 3));
1853         if (!type_comment)
1854             return NULL;
1855         name_i += 1;
1856     }
1857     body = ast_for_suite(c, CHILD(n, name_i + 3));
1858     if (!body)
1859         return NULL;
1860     get_last_end_pos(body, &end_lineno, &end_col_offset);
1861 
1862     if (NCH(CHILD(n, name_i + 3)) > 1) {
1863         /* Check if the suite has a type comment in it. */
1864         tc = CHILD(CHILD(n, name_i + 3), 1);
1865 
1866         if (TYPE(tc) == TYPE_COMMENT) {
1867             if (type_comment != NULL) {
1868                 ast_error(c, n, "Cannot have two type comments on def");
1869                 return NULL;
1870             }
1871             type_comment = NEW_TYPE_COMMENT(tc);
1872             if (!type_comment)
1873                 return NULL;
1874         }
1875     }
1876 
1877     if (is_async)
1878         return AsyncFunctionDef(name, args, body, decorator_seq, returns, type_comment,
1879                                 LINENO(n0), n0->n_col_offset, end_lineno, end_col_offset, c->c_arena);
1880     else
1881         return FunctionDef(name, args, body, decorator_seq, returns, type_comment,
1882                            LINENO(n), n->n_col_offset, end_lineno, end_col_offset, c->c_arena);
1883 }
1884 
1885 static stmt_ty
ast_for_async_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1886 ast_for_async_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1887 {
1888     /* async_funcdef: ASYNC funcdef */
1889     REQ(n, async_funcdef);
1890     REQ(CHILD(n, 0), ASYNC);
1891     REQ(CHILD(n, 1), funcdef);
1892 
1893     return ast_for_funcdef_impl(c, n, decorator_seq,
1894                                 true /* is_async */);
1895 }
1896 
1897 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1898 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1899 {
1900     /* funcdef: 'def' NAME parameters ['->' test] ':' suite */
1901     return ast_for_funcdef_impl(c, n, decorator_seq,
1902                                 false /* is_async */);
1903 }
1904 
1905 
1906 static stmt_ty
ast_for_async_stmt(struct compiling * c,const node * n)1907 ast_for_async_stmt(struct compiling *c, const node *n)
1908 {
1909     /* async_stmt: ASYNC (funcdef | with_stmt | for_stmt) */
1910     REQ(n, async_stmt);
1911     REQ(CHILD(n, 0), ASYNC);
1912 
1913     switch (TYPE(CHILD(n, 1))) {
1914         case funcdef:
1915             return ast_for_funcdef_impl(c, n, NULL,
1916                                         true /* is_async */);
1917         case with_stmt:
1918             return ast_for_with_stmt(c, n,
1919                                      true /* is_async */);
1920 
1921         case for_stmt:
1922             return ast_for_for_stmt(c, n,
1923                                     true /* is_async */);
1924 
1925         default:
1926             PyErr_Format(PyExc_SystemError,
1927                          "invalid async stament: %s",
1928                          STR(CHILD(n, 1)));
1929             return NULL;
1930     }
1931 }
1932 
1933 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)1934 ast_for_decorated(struct compiling *c, const node *n)
1935 {
1936     /* decorated: decorators (classdef | funcdef | async_funcdef) */
1937     stmt_ty thing = NULL;
1938     asdl_seq *decorator_seq = NULL;
1939 
1940     REQ(n, decorated);
1941 
1942     decorator_seq = ast_for_decorators(c, CHILD(n, 0));
1943     if (!decorator_seq)
1944       return NULL;
1945 
1946     assert(TYPE(CHILD(n, 1)) == funcdef ||
1947            TYPE(CHILD(n, 1)) == async_funcdef ||
1948            TYPE(CHILD(n, 1)) == classdef);
1949 
1950     if (TYPE(CHILD(n, 1)) == funcdef) {
1951       thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
1952     } else if (TYPE(CHILD(n, 1)) == classdef) {
1953       thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
1954     } else if (TYPE(CHILD(n, 1)) == async_funcdef) {
1955       thing = ast_for_async_funcdef(c, CHILD(n, 1), decorator_seq);
1956     }
1957     return thing;
1958 }
1959 
1960 static expr_ty
ast_for_namedexpr(struct compiling * c,const node * n)1961 ast_for_namedexpr(struct compiling *c, const node *n)
1962 {
1963     /* namedexpr_test: test [':=' test]
1964        argument: ( test [comp_for] |
1965             test ':=' test |
1966             test '=' test |
1967             '**' test |
1968             '*' test )
1969     */
1970     expr_ty target, value;
1971 
1972     target = ast_for_expr(c, CHILD(n, 0));
1973     if (!target)
1974         return NULL;
1975 
1976     value = ast_for_expr(c, CHILD(n, 2));
1977     if (!value)
1978         return NULL;
1979 
1980     if (target->kind != Name_kind) {
1981         const char *expr_name = get_expr_name(target);
1982         if (expr_name != NULL) {
1983             ast_error(c, n, "cannot use assignment expressions with %s", expr_name);
1984         }
1985         return NULL;
1986     }
1987 
1988     if (!set_context(c, target, Store, n))
1989         return NULL;
1990 
1991     return NamedExpr(target, value, LINENO(n), n->n_col_offset, n->n_end_lineno,
1992                      n->n_end_col_offset, c->c_arena);
1993 }
1994 
1995 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)1996 ast_for_lambdef(struct compiling *c, const node *n)
1997 {
1998     /* lambdef: 'lambda' [varargslist] ':' test
1999        lambdef_nocond: 'lambda' [varargslist] ':' test_nocond */
2000     arguments_ty args;
2001     expr_ty expression;
2002 
2003     if (NCH(n) == 3) {
2004         args = arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
2005         if (!args)
2006             return NULL;
2007         expression = ast_for_expr(c, CHILD(n, 2));
2008         if (!expression)
2009             return NULL;
2010     }
2011     else {
2012         args = ast_for_arguments(c, CHILD(n, 1));
2013         if (!args)
2014             return NULL;
2015         expression = ast_for_expr(c, CHILD(n, 3));
2016         if (!expression)
2017             return NULL;
2018     }
2019 
2020     return Lambda(args, expression, LINENO(n), n->n_col_offset,
2021                   n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2022 }
2023 
2024 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)2025 ast_for_ifexpr(struct compiling *c, const node *n)
2026 {
2027     /* test: or_test 'if' or_test 'else' test */
2028     expr_ty expression, body, orelse;
2029 
2030     assert(NCH(n) == 5);
2031     body = ast_for_expr(c, CHILD(n, 0));
2032     if (!body)
2033         return NULL;
2034     expression = ast_for_expr(c, CHILD(n, 2));
2035     if (!expression)
2036         return NULL;
2037     orelse = ast_for_expr(c, CHILD(n, 4));
2038     if (!orelse)
2039         return NULL;
2040     return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
2041                  n->n_end_lineno, n->n_end_col_offset,
2042                  c->c_arena);
2043 }
2044 
2045 /*
2046    Count the number of 'for' loops in a comprehension.
2047 
2048    Helper for ast_for_comprehension().
2049 */
2050 
2051 static int
count_comp_fors(struct compiling * c,const node * n)2052 count_comp_fors(struct compiling *c, const node *n)
2053 {
2054     int n_fors = 0;
2055 
2056   count_comp_for:
2057     n_fors++;
2058     REQ(n, comp_for);
2059     if (NCH(n) == 2) {
2060         REQ(CHILD(n, 0), ASYNC);
2061         n = CHILD(n, 1);
2062     }
2063     else if (NCH(n) == 1) {
2064         n = CHILD(n, 0);
2065     }
2066     else {
2067         goto error;
2068     }
2069     if (NCH(n) == (5)) {
2070         n = CHILD(n, 4);
2071     }
2072     else {
2073         return n_fors;
2074     }
2075   count_comp_iter:
2076     REQ(n, comp_iter);
2077     n = CHILD(n, 0);
2078     if (TYPE(n) == comp_for)
2079         goto count_comp_for;
2080     else if (TYPE(n) == comp_if) {
2081         if (NCH(n) == 3) {
2082             n = CHILD(n, 2);
2083             goto count_comp_iter;
2084         }
2085         else
2086             return n_fors;
2087     }
2088 
2089   error:
2090     /* Should never be reached */
2091     PyErr_SetString(PyExc_SystemError,
2092                     "logic error in count_comp_fors");
2093     return -1;
2094 }
2095 
2096 /* Count the number of 'if' statements in a comprehension.
2097 
2098    Helper for ast_for_comprehension().
2099 */
2100 
2101 static int
count_comp_ifs(struct compiling * c,const node * n)2102 count_comp_ifs(struct compiling *c, const node *n)
2103 {
2104     int n_ifs = 0;
2105 
2106     while (1) {
2107         REQ(n, comp_iter);
2108         if (TYPE(CHILD(n, 0)) == comp_for)
2109             return n_ifs;
2110         n = CHILD(n, 0);
2111         REQ(n, comp_if);
2112         n_ifs++;
2113         if (NCH(n) == 2)
2114             return n_ifs;
2115         n = CHILD(n, 2);
2116     }
2117 }
2118 
2119 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)2120 ast_for_comprehension(struct compiling *c, const node *n)
2121 {
2122     int i, n_fors;
2123     asdl_seq *comps;
2124 
2125     n_fors = count_comp_fors(c, n);
2126     if (n_fors == -1)
2127         return NULL;
2128 
2129     comps = _Py_asdl_seq_new(n_fors, c->c_arena);
2130     if (!comps)
2131         return NULL;
2132 
2133     for (i = 0; i < n_fors; i++) {
2134         comprehension_ty comp;
2135         asdl_seq *t;
2136         expr_ty expression, first;
2137         node *for_ch;
2138         node *sync_n;
2139         int is_async = 0;
2140 
2141         REQ(n, comp_for);
2142 
2143         if (NCH(n) == 2) {
2144             is_async = 1;
2145             REQ(CHILD(n, 0), ASYNC);
2146             sync_n = CHILD(n, 1);
2147         }
2148         else {
2149             sync_n = CHILD(n, 0);
2150         }
2151         REQ(sync_n, sync_comp_for);
2152 
2153         /* Async comprehensions only allowed in Python 3.6 and greater */
2154         if (is_async && c->c_feature_version < 6) {
2155             ast_error(c, n,
2156                       "Async comprehensions are only supported in Python 3.6 and greater");
2157             return NULL;
2158         }
2159 
2160         for_ch = CHILD(sync_n, 1);
2161         t = ast_for_exprlist(c, for_ch, Store);
2162         if (!t)
2163             return NULL;
2164         expression = ast_for_expr(c, CHILD(sync_n, 3));
2165         if (!expression)
2166             return NULL;
2167 
2168         /* Check the # of children rather than the length of t, since
2169            (x for x, in ...) has 1 element in t, but still requires a Tuple. */
2170         first = (expr_ty)asdl_seq_GET(t, 0);
2171         if (NCH(for_ch) == 1)
2172             comp = comprehension(first, expression, NULL,
2173                                  is_async, c->c_arena);
2174         else
2175             comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
2176                                        for_ch->n_end_lineno, for_ch->n_end_col_offset,
2177                                        c->c_arena),
2178                                  expression, NULL, is_async, c->c_arena);
2179         if (!comp)
2180             return NULL;
2181 
2182         if (NCH(sync_n) == 5) {
2183             int j, n_ifs;
2184             asdl_seq *ifs;
2185 
2186             n = CHILD(sync_n, 4);
2187             n_ifs = count_comp_ifs(c, n);
2188             if (n_ifs == -1)
2189                 return NULL;
2190 
2191             ifs = _Py_asdl_seq_new(n_ifs, c->c_arena);
2192             if (!ifs)
2193                 return NULL;
2194 
2195             for (j = 0; j < n_ifs; j++) {
2196                 REQ(n, comp_iter);
2197                 n = CHILD(n, 0);
2198                 REQ(n, comp_if);
2199 
2200                 expression = ast_for_expr(c, CHILD(n, 1));
2201                 if (!expression)
2202                     return NULL;
2203                 asdl_seq_SET(ifs, j, expression);
2204                 if (NCH(n) == 3)
2205                     n = CHILD(n, 2);
2206             }
2207             /* on exit, must guarantee that n is a comp_for */
2208             if (TYPE(n) == comp_iter)
2209                 n = CHILD(n, 0);
2210             comp->ifs = ifs;
2211         }
2212         asdl_seq_SET(comps, i, comp);
2213     }
2214     return comps;
2215 }
2216 
2217 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)2218 ast_for_itercomp(struct compiling *c, const node *n, int type)
2219 {
2220     /* testlist_comp: (test|star_expr)
2221      *                ( comp_for | (',' (test|star_expr))* [','] ) */
2222     expr_ty elt;
2223     asdl_seq *comps;
2224     node *ch;
2225 
2226     assert(NCH(n) > 1);
2227 
2228     ch = CHILD(n, 0);
2229     elt = ast_for_expr(c, ch);
2230     if (!elt)
2231         return NULL;
2232     if (elt->kind == Starred_kind) {
2233         ast_error(c, ch, "iterable unpacking cannot be used in comprehension");
2234         return NULL;
2235     }
2236 
2237     comps = ast_for_comprehension(c, CHILD(n, 1));
2238     if (!comps)
2239         return NULL;
2240 
2241     if (type == COMP_GENEXP)
2242         return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset,
2243                             n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2244     else if (type == COMP_LISTCOMP)
2245         return ListComp(elt, comps, LINENO(n), n->n_col_offset,
2246                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2247     else if (type == COMP_SETCOMP)
2248         return SetComp(elt, comps, LINENO(n), n->n_col_offset,
2249                        n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2250     else
2251         /* Should never happen */
2252         return NULL;
2253 }
2254 
2255 /* Fills in the key, value pair corresponding to the dict element.  In case
2256  * of an unpacking, key is NULL.  *i is advanced by the number of ast
2257  * elements.  Iff successful, nonzero is returned.
2258  */
2259 static int
ast_for_dictelement(struct compiling * c,const node * n,int * i,expr_ty * key,expr_ty * value)2260 ast_for_dictelement(struct compiling *c, const node *n, int *i,
2261                     expr_ty *key, expr_ty *value)
2262 {
2263     expr_ty expression;
2264     if (TYPE(CHILD(n, *i)) == DOUBLESTAR) {
2265         assert(NCH(n) - *i >= 2);
2266 
2267         expression = ast_for_expr(c, CHILD(n, *i + 1));
2268         if (!expression)
2269             return 0;
2270         *key = NULL;
2271         *value = expression;
2272 
2273         *i += 2;
2274     }
2275     else {
2276         assert(NCH(n) - *i >= 3);
2277 
2278         expression = ast_for_expr(c, CHILD(n, *i));
2279         if (!expression)
2280             return 0;
2281         *key = expression;
2282 
2283         REQ(CHILD(n, *i + 1), COLON);
2284 
2285         expression = ast_for_expr(c, CHILD(n, *i + 2));
2286         if (!expression)
2287             return 0;
2288         *value = expression;
2289 
2290         *i += 3;
2291     }
2292     return 1;
2293 }
2294 
2295 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)2296 ast_for_dictcomp(struct compiling *c, const node *n)
2297 {
2298     expr_ty key, value;
2299     asdl_seq *comps;
2300     int i = 0;
2301 
2302     if (!ast_for_dictelement(c, n, &i, &key, &value))
2303         return NULL;
2304     assert(key);
2305     assert(NCH(n) - i >= 1);
2306 
2307     comps = ast_for_comprehension(c, CHILD(n, i));
2308     if (!comps)
2309         return NULL;
2310 
2311     return DictComp(key, value, comps, LINENO(n), n->n_col_offset,
2312                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2313 }
2314 
2315 static expr_ty
ast_for_dictdisplay(struct compiling * c,const node * n)2316 ast_for_dictdisplay(struct compiling *c, const node *n)
2317 {
2318     int i;
2319     int j;
2320     int size;
2321     asdl_seq *keys, *values;
2322 
2323     size = (NCH(n) + 1) / 3; /* +1 in case no trailing comma */
2324     keys = _Py_asdl_seq_new(size, c->c_arena);
2325     if (!keys)
2326         return NULL;
2327 
2328     values = _Py_asdl_seq_new(size, c->c_arena);
2329     if (!values)
2330         return NULL;
2331 
2332     j = 0;
2333     for (i = 0; i < NCH(n); i++) {
2334         expr_ty key, value;
2335 
2336         if (!ast_for_dictelement(c, n, &i, &key, &value))
2337             return NULL;
2338         asdl_seq_SET(keys, j, key);
2339         asdl_seq_SET(values, j, value);
2340 
2341         j++;
2342     }
2343     keys->size = j;
2344     values->size = j;
2345     return Dict(keys, values, LINENO(n), n->n_col_offset,
2346                 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2347 }
2348 
2349 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)2350 ast_for_genexp(struct compiling *c, const node *n)
2351 {
2352     assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
2353     return ast_for_itercomp(c, n, COMP_GENEXP);
2354 }
2355 
2356 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)2357 ast_for_listcomp(struct compiling *c, const node *n)
2358 {
2359     assert(TYPE(n) == (testlist_comp));
2360     return ast_for_itercomp(c, n, COMP_LISTCOMP);
2361 }
2362 
2363 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)2364 ast_for_setcomp(struct compiling *c, const node *n)
2365 {
2366     assert(TYPE(n) == (dictorsetmaker));
2367     return ast_for_itercomp(c, n, COMP_SETCOMP);
2368 }
2369 
2370 static expr_ty
ast_for_setdisplay(struct compiling * c,const node * n)2371 ast_for_setdisplay(struct compiling *c, const node *n)
2372 {
2373     int i;
2374     int size;
2375     asdl_seq *elts;
2376 
2377     assert(TYPE(n) == (dictorsetmaker));
2378     size = (NCH(n) + 1) / 2; /* +1 in case no trailing comma */
2379     elts = _Py_asdl_seq_new(size, c->c_arena);
2380     if (!elts)
2381         return NULL;
2382     for (i = 0; i < NCH(n); i += 2) {
2383         expr_ty expression;
2384         expression = ast_for_expr(c, CHILD(n, i));
2385         if (!expression)
2386             return NULL;
2387         asdl_seq_SET(elts, i / 2, expression);
2388     }
2389     return Set(elts, LINENO(n), n->n_col_offset,
2390                n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2391 }
2392 
2393 static expr_ty
ast_for_atom(struct compiling * c,const node * n)2394 ast_for_atom(struct compiling *c, const node *n)
2395 {
2396     /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']'
2397        | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+
2398        | '...' | 'None' | 'True' | 'False'
2399     */
2400     node *ch = CHILD(n, 0);
2401 
2402     switch (TYPE(ch)) {
2403     case NAME: {
2404         PyObject *name;
2405         const char *s = STR(ch);
2406         size_t len = strlen(s);
2407         if (len >= 4 && len <= 5) {
2408             if (!strcmp(s, "None"))
2409                 return Constant(Py_None, NULL, LINENO(n), n->n_col_offset,
2410                                 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2411             if (!strcmp(s, "True"))
2412                 return Constant(Py_True, NULL, LINENO(n), n->n_col_offset,
2413                                 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2414             if (!strcmp(s, "False"))
2415                 return Constant(Py_False, NULL, LINENO(n), n->n_col_offset,
2416                                 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2417         }
2418         name = new_identifier(s, c);
2419         if (!name)
2420             return NULL;
2421         /* All names start in Load context, but may later be changed. */
2422         return Name(name, Load, LINENO(n), n->n_col_offset,
2423                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2424     }
2425     case STRING: {
2426         expr_ty str = parsestrplus(c, n);
2427         if (!str) {
2428             const char *errtype = NULL;
2429             if (PyErr_ExceptionMatches(PyExc_UnicodeError))
2430                 errtype = "unicode error";
2431             else if (PyErr_ExceptionMatches(PyExc_ValueError))
2432                 errtype = "value error";
2433             if (errtype) {
2434                 PyObject *type, *value, *tback, *errstr;
2435                 PyErr_Fetch(&type, &value, &tback);
2436                 errstr = PyObject_Str(value);
2437                 if (errstr) {
2438                     ast_error(c, n, "(%s) %U", errtype, errstr);
2439                     Py_DECREF(errstr);
2440                 }
2441                 else {
2442                     PyErr_Clear();
2443                     ast_error(c, n, "(%s) unknown error", errtype);
2444                 }
2445                 Py_DECREF(type);
2446                 Py_XDECREF(value);
2447                 Py_XDECREF(tback);
2448             }
2449             return NULL;
2450         }
2451         return str;
2452     }
2453     case NUMBER: {
2454         PyObject *pynum;
2455         /* Underscores in numeric literals are only allowed in Python 3.6 or greater */
2456         /* Check for underscores here rather than in parse_number so we can report a line number on error */
2457         if (c->c_feature_version < 6 && strchr(STR(ch), '_') != NULL) {
2458             ast_error(c, ch,
2459                       "Underscores in numeric literals are only supported in Python 3.6 and greater");
2460             return NULL;
2461         }
2462         pynum = parsenumber(c, STR(ch));
2463         if (!pynum) {
2464             PyThreadState *tstate = PyThreadState_GET();
2465             // The only way a ValueError should happen in _this_ code is via
2466             // PyLong_FromString hitting a length limit.
2467             if (tstate->curexc_type == PyExc_ValueError &&
2468                 tstate->curexc_value != NULL) {
2469                 PyObject *type, *value, *tb;
2470                 // This acts as PyErr_Clear() as we're replacing curexc.
2471                 PyErr_Fetch(&type, &value, &tb);
2472                 Py_XDECREF(tb);
2473                 Py_DECREF(type);
2474                 ast_error(c, ch,
2475                     "%S - Consider hexadecimal for huge integer literals "
2476                     "to avoid decimal conversion limits.",
2477                     value);
2478                 Py_DECREF(value);
2479             }
2480             return NULL;
2481         }
2482 
2483         if (PyArena_AddPyObject(c->c_arena, pynum) < 0) {
2484             Py_DECREF(pynum);
2485             return NULL;
2486         }
2487         return Constant(pynum, NULL, LINENO(n), n->n_col_offset,
2488                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2489     }
2490     case ELLIPSIS: /* Ellipsis */
2491         return Constant(Py_Ellipsis, NULL, LINENO(n), n->n_col_offset,
2492                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2493     case LPAR: /* some parenthesized expressions */
2494         ch = CHILD(n, 1);
2495 
2496         if (TYPE(ch) == RPAR)
2497             return Tuple(NULL, Load, LINENO(n), n->n_col_offset,
2498                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2499 
2500         if (TYPE(ch) == yield_expr)
2501             return ast_for_expr(c, ch);
2502 
2503         /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2504         if (NCH(ch) == 1) {
2505             return ast_for_testlist(c, ch);
2506         }
2507 
2508         if (TYPE(CHILD(ch, 1)) == comp_for) {
2509             return copy_location(ast_for_genexp(c, ch), n, n);
2510         }
2511         else {
2512             return copy_location(ast_for_testlist(c, ch), n, n);
2513         }
2514     case LSQB: /* list (or list comprehension) */
2515         ch = CHILD(n, 1);
2516 
2517         if (TYPE(ch) == RSQB)
2518             return List(NULL, Load, LINENO(n), n->n_col_offset,
2519                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2520 
2521         REQ(ch, testlist_comp);
2522         if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
2523             asdl_seq *elts = seq_for_testlist(c, ch);
2524             if (!elts)
2525                 return NULL;
2526 
2527             return List(elts, Load, LINENO(n), n->n_col_offset,
2528                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2529         }
2530         else {
2531             return copy_location(ast_for_listcomp(c, ch), n, n);
2532         }
2533     case LBRACE: {
2534         /* dictorsetmaker: ( ((test ':' test | '**' test)
2535          *                    (comp_for | (',' (test ':' test | '**' test))* [','])) |
2536          *                   ((test | '*' test)
2537          *                    (comp_for | (',' (test | '*' test))* [','])) ) */
2538         expr_ty res;
2539         ch = CHILD(n, 1);
2540         if (TYPE(ch) == RBRACE) {
2541             /* It's an empty dict. */
2542             return Dict(NULL, NULL, LINENO(n), n->n_col_offset,
2543                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2544         }
2545         else {
2546             int is_dict = (TYPE(CHILD(ch, 0)) == DOUBLESTAR);
2547             if (NCH(ch) == 1 ||
2548                     (NCH(ch) > 1 &&
2549                      TYPE(CHILD(ch, 1)) == COMMA)) {
2550                 /* It's a set display. */
2551                 res = ast_for_setdisplay(c, ch);
2552             }
2553             else if (NCH(ch) > 1 &&
2554                     TYPE(CHILD(ch, 1)) == comp_for) {
2555                 /* It's a set comprehension. */
2556                 res = ast_for_setcomp(c, ch);
2557             }
2558             else if (NCH(ch) > 3 - is_dict &&
2559                     TYPE(CHILD(ch, 3 - is_dict)) == comp_for) {
2560                 /* It's a dictionary comprehension. */
2561                 if (is_dict) {
2562                     ast_error(c, n,
2563                               "dict unpacking cannot be used in dict comprehension");
2564                     return NULL;
2565                 }
2566                 res = ast_for_dictcomp(c, ch);
2567             }
2568             else {
2569                 /* It's a dictionary display. */
2570                 res = ast_for_dictdisplay(c, ch);
2571             }
2572             return copy_location(res, n, n);
2573         }
2574     }
2575     default:
2576         PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
2577         return NULL;
2578     }
2579 }
2580 
2581 static slice_ty
ast_for_slice(struct compiling * c,const node * n)2582 ast_for_slice(struct compiling *c, const node *n)
2583 {
2584     node *ch;
2585     expr_ty lower = NULL, upper = NULL, step = NULL;
2586 
2587     REQ(n, subscript);
2588 
2589     /*
2590        subscript: test | [test] ':' [test] [sliceop]
2591        sliceop: ':' [test]
2592     */
2593     ch = CHILD(n, 0);
2594     if (NCH(n) == 1 && TYPE(ch) == test) {
2595         /* 'step' variable hold no significance in terms of being used over
2596            other vars */
2597         step = ast_for_expr(c, ch);
2598         if (!step)
2599             return NULL;
2600 
2601         return Index(step, c->c_arena);
2602     }
2603 
2604     if (TYPE(ch) == test) {
2605         lower = ast_for_expr(c, ch);
2606         if (!lower)
2607             return NULL;
2608     }
2609 
2610     /* If there's an upper bound it's in the second or third position. */
2611     if (TYPE(ch) == COLON) {
2612         if (NCH(n) > 1) {
2613             node *n2 = CHILD(n, 1);
2614 
2615             if (TYPE(n2) == test) {
2616                 upper = ast_for_expr(c, n2);
2617                 if (!upper)
2618                     return NULL;
2619             }
2620         }
2621     } else if (NCH(n) > 2) {
2622         node *n2 = CHILD(n, 2);
2623 
2624         if (TYPE(n2) == test) {
2625             upper = ast_for_expr(c, n2);
2626             if (!upper)
2627                 return NULL;
2628         }
2629     }
2630 
2631     ch = CHILD(n, NCH(n) - 1);
2632     if (TYPE(ch) == sliceop) {
2633         if (NCH(ch) != 1) {
2634             ch = CHILD(ch, 1);
2635             if (TYPE(ch) == test) {
2636                 step = ast_for_expr(c, ch);
2637                 if (!step)
2638                     return NULL;
2639             }
2640         }
2641     }
2642 
2643     return Slice(lower, upper, step, c->c_arena);
2644 }
2645 
2646 static expr_ty
ast_for_binop(struct compiling * c,const node * n)2647 ast_for_binop(struct compiling *c, const node *n)
2648 {
2649     /* Must account for a sequence of expressions.
2650        How should A op B op C by represented?
2651        BinOp(BinOp(A, op, B), op, C).
2652     */
2653 
2654     int i, nops;
2655     expr_ty expr1, expr2, result;
2656     operator_ty newoperator;
2657 
2658     expr1 = ast_for_expr(c, CHILD(n, 0));
2659     if (!expr1)
2660         return NULL;
2661 
2662     expr2 = ast_for_expr(c, CHILD(n, 2));
2663     if (!expr2)
2664         return NULL;
2665 
2666     newoperator = get_operator(c, CHILD(n, 1));
2667     if (!newoperator)
2668         return NULL;
2669 
2670     result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2671                    CHILD(n, 2)->n_end_lineno, CHILD(n, 2)->n_end_col_offset,
2672                    c->c_arena);
2673     if (!result)
2674         return NULL;
2675 
2676     nops = (NCH(n) - 1) / 2;
2677     for (i = 1; i < nops; i++) {
2678         expr_ty tmp_result, tmp;
2679         const node* next_oper = CHILD(n, i * 2 + 1);
2680 
2681         newoperator = get_operator(c, next_oper);
2682         if (!newoperator)
2683             return NULL;
2684 
2685         tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
2686         if (!tmp)
2687             return NULL;
2688 
2689         tmp_result = BinOp(result, newoperator, tmp,
2690                            LINENO(n), n->n_col_offset,
2691                            CHILD(n, i * 2 + 2)->n_end_lineno,
2692                            CHILD(n, i * 2 + 2)->n_end_col_offset,
2693                            c->c_arena);
2694         if (!tmp_result)
2695             return NULL;
2696         result = tmp_result;
2697     }
2698     return result;
2699 }
2700 
2701 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr,const node * start)2702 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr, const node *start)
2703 {
2704     /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
2705        subscriptlist: subscript (',' subscript)* [',']
2706        subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
2707      */
2708     const node *n_copy = n;
2709     REQ(n, trailer);
2710     if (TYPE(CHILD(n, 0)) == LPAR) {
2711         if (NCH(n) == 2)
2712             return Call(left_expr, NULL, NULL, LINENO(start), start->n_col_offset,
2713                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2714         else
2715             return ast_for_call(c, CHILD(n, 1), left_expr,
2716                                 start, CHILD(n, 0), CHILD(n, 2));
2717     }
2718     else if (TYPE(CHILD(n, 0)) == DOT) {
2719         PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
2720         if (!attr_id)
2721             return NULL;
2722         return Attribute(left_expr, attr_id, Load,
2723                          LINENO(start), start->n_col_offset,
2724                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2725     }
2726     else {
2727         REQ(CHILD(n, 0), LSQB);
2728         REQ(CHILD(n, 2), RSQB);
2729         n = CHILD(n, 1);
2730         if (NCH(n) == 1) {
2731             slice_ty slc = ast_for_slice(c, CHILD(n, 0));
2732             if (!slc)
2733                 return NULL;
2734             return Subscript(left_expr, slc, Load, LINENO(start), start->n_col_offset,
2735                              n_copy->n_end_lineno, n_copy->n_end_col_offset,
2736                              c->c_arena);
2737         }
2738         else {
2739             /* The grammar is ambiguous here. The ambiguity is resolved
2740                by treating the sequence as a tuple literal if there are
2741                no slice features.
2742             */
2743             Py_ssize_t j;
2744             slice_ty slc;
2745             expr_ty e;
2746             int simple = 1;
2747             asdl_seq *slices, *elts;
2748             slices = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2749             if (!slices)
2750                 return NULL;
2751             for (j = 0; j < NCH(n); j += 2) {
2752                 slc = ast_for_slice(c, CHILD(n, j));
2753                 if (!slc)
2754                     return NULL;
2755                 if (slc->kind != Index_kind)
2756                     simple = 0;
2757                 asdl_seq_SET(slices, j / 2, slc);
2758             }
2759             if (!simple) {
2760                 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
2761                                  Load, LINENO(start), start->n_col_offset,
2762                                  n_copy->n_end_lineno, n_copy->n_end_col_offset, c->c_arena);
2763             }
2764             /* extract Index values and put them in a Tuple */
2765             elts = _Py_asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
2766             if (!elts)
2767                 return NULL;
2768             for (j = 0; j < asdl_seq_LEN(slices); ++j) {
2769                 slc = (slice_ty)asdl_seq_GET(slices, j);
2770                 assert(slc->kind == Index_kind  && slc->v.Index.value);
2771                 asdl_seq_SET(elts, j, slc->v.Index.value);
2772             }
2773             e = Tuple(elts, Load, LINENO(n), n->n_col_offset,
2774                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2775             if (!e)
2776                 return NULL;
2777             return Subscript(left_expr, Index(e, c->c_arena),
2778                              Load, LINENO(start), start->n_col_offset,
2779                              n_copy->n_end_lineno, n_copy->n_end_col_offset, c->c_arena);
2780         }
2781     }
2782 }
2783 
2784 static expr_ty
ast_for_factor(struct compiling * c,const node * n)2785 ast_for_factor(struct compiling *c, const node *n)
2786 {
2787     expr_ty expression;
2788 
2789     expression = ast_for_expr(c, CHILD(n, 1));
2790     if (!expression)
2791         return NULL;
2792 
2793     switch (TYPE(CHILD(n, 0))) {
2794         case PLUS:
2795             return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
2796                            n->n_end_lineno, n->n_end_col_offset,
2797                            c->c_arena);
2798         case MINUS:
2799             return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
2800                            n->n_end_lineno, n->n_end_col_offset,
2801                            c->c_arena);
2802         case TILDE:
2803             return UnaryOp(Invert, expression, LINENO(n), n->n_col_offset,
2804                            n->n_end_lineno, n->n_end_col_offset,
2805                            c->c_arena);
2806     }
2807     PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
2808                  TYPE(CHILD(n, 0)));
2809     return NULL;
2810 }
2811 
2812 static expr_ty
ast_for_atom_expr(struct compiling * c,const node * n)2813 ast_for_atom_expr(struct compiling *c, const node *n)
2814 {
2815     int i, nch, start = 0;
2816     expr_ty e;
2817 
2818     REQ(n, atom_expr);
2819     nch = NCH(n);
2820 
2821     if (TYPE(CHILD(n, 0)) == AWAIT) {
2822         if (c->c_feature_version < 5) {
2823             ast_error(c, n,
2824                       "Await expressions are only supported in Python 3.5 and greater");
2825             return NULL;
2826         }
2827         start = 1;
2828         assert(nch > 1);
2829     }
2830 
2831     e = ast_for_atom(c, CHILD(n, start));
2832     if (!e)
2833         return NULL;
2834     if (nch == 1)
2835         return e;
2836     if (start && nch == 2) {
2837         return Await(e, LINENO(n), n->n_col_offset,
2838                      n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2839     }
2840 
2841     for (i = start + 1; i < nch; i++) {
2842         node *ch = CHILD(n, i);
2843         if (TYPE(ch) != trailer)
2844             break;
2845         e = ast_for_trailer(c, ch, e, CHILD(n, start));
2846         if (!e)
2847             return NULL;
2848     }
2849 
2850     if (start) {
2851         /* there was an 'await' */
2852         return Await(e, LINENO(n), n->n_col_offset,
2853                      n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2854     }
2855     else {
2856         return e;
2857     }
2858 }
2859 
2860 static expr_ty
ast_for_power(struct compiling * c,const node * n)2861 ast_for_power(struct compiling *c, const node *n)
2862 {
2863     /* power: atom trailer* ('**' factor)*
2864      */
2865     expr_ty e;
2866     REQ(n, power);
2867     e = ast_for_atom_expr(c, CHILD(n, 0));
2868     if (!e)
2869         return NULL;
2870     if (NCH(n) == 1)
2871         return e;
2872     if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
2873         expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
2874         if (!f)
2875             return NULL;
2876         e = BinOp(e, Pow, f, LINENO(n), n->n_col_offset,
2877                   n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2878     }
2879     return e;
2880 }
2881 
2882 static expr_ty
ast_for_starred(struct compiling * c,const node * n)2883 ast_for_starred(struct compiling *c, const node *n)
2884 {
2885     expr_ty tmp;
2886     REQ(n, star_expr);
2887 
2888     tmp = ast_for_expr(c, CHILD(n, 1));
2889     if (!tmp)
2890         return NULL;
2891 
2892     /* The Load context is changed later. */
2893     return Starred(tmp, Load, LINENO(n), n->n_col_offset,
2894                    n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2895 }
2896 
2897 
2898 /* Do not name a variable 'expr'!  Will cause a compile error.
2899 */
2900 
2901 static expr_ty
ast_for_expr(struct compiling * c,const node * n)2902 ast_for_expr(struct compiling *c, const node *n)
2903 {
2904     /* handle the full range of simple expressions
2905        namedexpr_test: test [':=' test]
2906        test: or_test ['if' or_test 'else' test] | lambdef
2907        test_nocond: or_test | lambdef_nocond
2908        or_test: and_test ('or' and_test)*
2909        and_test: not_test ('and' not_test)*
2910        not_test: 'not' not_test | comparison
2911        comparison: expr (comp_op expr)*
2912        expr: xor_expr ('|' xor_expr)*
2913        xor_expr: and_expr ('^' and_expr)*
2914        and_expr: shift_expr ('&' shift_expr)*
2915        shift_expr: arith_expr (('<<'|'>>') arith_expr)*
2916        arith_expr: term (('+'|'-') term)*
2917        term: factor (('*'|'@'|'/'|'%'|'//') factor)*
2918        factor: ('+'|'-'|'~') factor | power
2919        power: atom_expr ['**' factor]
2920        atom_expr: [AWAIT] atom trailer*
2921        yield_expr: 'yield' [yield_arg]
2922     */
2923 
2924     asdl_seq *seq;
2925     int i;
2926 
2927  loop:
2928     switch (TYPE(n)) {
2929         case namedexpr_test:
2930             if (NCH(n) == 3)
2931                 return ast_for_namedexpr(c, n);
2932             /* Fallthrough */
2933         case test:
2934         case test_nocond:
2935             if (TYPE(CHILD(n, 0)) == lambdef ||
2936                 TYPE(CHILD(n, 0)) == lambdef_nocond)
2937                 return ast_for_lambdef(c, CHILD(n, 0));
2938             else if (NCH(n) > 1)
2939                 return ast_for_ifexpr(c, n);
2940             /* Fallthrough */
2941         case or_test:
2942         case and_test:
2943             if (NCH(n) == 1) {
2944                 n = CHILD(n, 0);
2945                 goto loop;
2946             }
2947             seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2948             if (!seq)
2949                 return NULL;
2950             for (i = 0; i < NCH(n); i += 2) {
2951                 expr_ty e = ast_for_expr(c, CHILD(n, i));
2952                 if (!e)
2953                     return NULL;
2954                 asdl_seq_SET(seq, i / 2, e);
2955             }
2956             if (!strcmp(STR(CHILD(n, 1)), "and"))
2957                 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
2958                               n->n_end_lineno, n->n_end_col_offset,
2959                               c->c_arena);
2960             assert(!strcmp(STR(CHILD(n, 1)), "or"));
2961             return BoolOp(Or, seq, LINENO(n), n->n_col_offset,
2962                           n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2963         case not_test:
2964             if (NCH(n) == 1) {
2965                 n = CHILD(n, 0);
2966                 goto loop;
2967             }
2968             else {
2969                 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2970                 if (!expression)
2971                     return NULL;
2972 
2973                 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
2974                                n->n_end_lineno, n->n_end_col_offset,
2975                                c->c_arena);
2976             }
2977         case comparison:
2978             if (NCH(n) == 1) {
2979                 n = CHILD(n, 0);
2980                 goto loop;
2981             }
2982             else {
2983                 expr_ty expression;
2984                 asdl_int_seq *ops;
2985                 asdl_seq *cmps;
2986                 ops = _Py_asdl_int_seq_new(NCH(n) / 2, c->c_arena);
2987                 if (!ops)
2988                     return NULL;
2989                 cmps = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
2990                 if (!cmps) {
2991                     return NULL;
2992                 }
2993                 for (i = 1; i < NCH(n); i += 2) {
2994                     cmpop_ty newoperator;
2995 
2996                     newoperator = ast_for_comp_op(c, CHILD(n, i));
2997                     if (!newoperator) {
2998                         return NULL;
2999                     }
3000 
3001                     expression = ast_for_expr(c, CHILD(n, i + 1));
3002                     if (!expression) {
3003                         return NULL;
3004                     }
3005 
3006                     asdl_seq_SET(ops, i / 2, newoperator);
3007                     asdl_seq_SET(cmps, i / 2, expression);
3008                 }
3009                 expression = ast_for_expr(c, CHILD(n, 0));
3010                 if (!expression) {
3011                     return NULL;
3012                 }
3013 
3014                 return Compare(expression, ops, cmps, LINENO(n), n->n_col_offset,
3015                                n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3016             }
3017 
3018         case star_expr:
3019             return ast_for_starred(c, n);
3020         /* The next five cases all handle BinOps.  The main body of code
3021            is the same in each case, but the switch turned inside out to
3022            reuse the code for each type of operator.
3023          */
3024         case expr:
3025         case xor_expr:
3026         case and_expr:
3027         case shift_expr:
3028         case arith_expr:
3029         case term:
3030             if (NCH(n) == 1) {
3031                 n = CHILD(n, 0);
3032                 goto loop;
3033             }
3034             return ast_for_binop(c, n);
3035         case yield_expr: {
3036             node *an = NULL;
3037             node *en = NULL;
3038             int is_from = 0;
3039             expr_ty exp = NULL;
3040             if (NCH(n) > 1)
3041                 an = CHILD(n, 1); /* yield_arg */
3042             if (an) {
3043                 en = CHILD(an, NCH(an) - 1);
3044                 if (NCH(an) == 2) {
3045                     is_from = 1;
3046                     exp = ast_for_expr(c, en);
3047                 }
3048                 else
3049                     exp = ast_for_testlist(c, en);
3050                 if (!exp)
3051                     return NULL;
3052             }
3053             if (is_from)
3054                 return YieldFrom(exp, LINENO(n), n->n_col_offset,
3055                                  n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3056             return Yield(exp, LINENO(n), n->n_col_offset,
3057                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3058         }
3059         case factor:
3060             if (NCH(n) == 1) {
3061                 n = CHILD(n, 0);
3062                 goto loop;
3063             }
3064             return ast_for_factor(c, n);
3065         case power:
3066             return ast_for_power(c, n);
3067         default:
3068             PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
3069             return NULL;
3070     }
3071     /* should never get here unless if error is set */
3072     return NULL;
3073 }
3074 
3075 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func,const node * start,const node * maybegenbeg,const node * closepar)3076 ast_for_call(struct compiling *c, const node *n, expr_ty func,
3077              const node *start, const node *maybegenbeg, const node *closepar)
3078 {
3079     /*
3080       arglist: argument (',' argument)*  [',']
3081       argument: ( test [comp_for] | '*' test | test '=' test | '**' test )
3082     */
3083 
3084     int i, nargs, nkeywords;
3085     int ndoublestars;
3086     asdl_seq *args;
3087     asdl_seq *keywords;
3088 
3089     REQ(n, arglist);
3090 
3091     nargs = 0;
3092     nkeywords = 0;
3093     for (i = 0; i < NCH(n); i++) {
3094         node *ch = CHILD(n, i);
3095         if (TYPE(ch) == argument) {
3096             if (NCH(ch) == 1)
3097                 nargs++;
3098             else if (TYPE(CHILD(ch, 1)) == comp_for) {
3099                 nargs++;
3100                 if (!maybegenbeg) {
3101                     ast_error(c, ch, "invalid syntax");
3102                     return NULL;
3103                 }
3104                 if (NCH(n) > 1) {
3105                     ast_error(c, ch, "Generator expression must be parenthesized");
3106                     return NULL;
3107                 }
3108             }
3109             else if (TYPE(CHILD(ch, 0)) == STAR)
3110                 nargs++;
3111             else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
3112                 nargs++;
3113             }
3114             else
3115                 /* TYPE(CHILD(ch, 0)) == DOUBLESTAR or keyword argument */
3116                 nkeywords++;
3117         }
3118     }
3119 
3120     args = _Py_asdl_seq_new(nargs, c->c_arena);
3121     if (!args)
3122         return NULL;
3123     keywords = _Py_asdl_seq_new(nkeywords, c->c_arena);
3124     if (!keywords)
3125         return NULL;
3126 
3127     nargs = 0;  /* positional arguments + iterable argument unpackings */
3128     nkeywords = 0;  /* keyword arguments + keyword argument unpackings */
3129     ndoublestars = 0;  /* just keyword argument unpackings */
3130     for (i = 0; i < NCH(n); i++) {
3131         node *ch = CHILD(n, i);
3132         if (TYPE(ch) == argument) {
3133             expr_ty e;
3134             node *chch = CHILD(ch, 0);
3135             if (NCH(ch) == 1) {
3136                 /* a positional argument */
3137                 if (nkeywords) {
3138                     if (ndoublestars) {
3139                         ast_error(c, chch,
3140                                   "positional argument follows "
3141                                   "keyword argument unpacking");
3142                     }
3143                     else {
3144                         ast_error(c, chch,
3145                                   "positional argument follows "
3146                                   "keyword argument");
3147                     }
3148                     return NULL;
3149                 }
3150                 e = ast_for_expr(c, chch);
3151                 if (!e)
3152                     return NULL;
3153                 asdl_seq_SET(args, nargs++, e);
3154             }
3155             else if (TYPE(chch) == STAR) {
3156                 /* an iterable argument unpacking */
3157                 expr_ty starred;
3158                 if (ndoublestars) {
3159                     ast_error(c, chch,
3160                               "iterable argument unpacking follows "
3161                               "keyword argument unpacking");
3162                     return NULL;
3163                 }
3164                 e = ast_for_expr(c, CHILD(ch, 1));
3165                 if (!e)
3166                     return NULL;
3167                 starred = Starred(e, Load, LINENO(chch),
3168                         chch->n_col_offset,
3169                         e->end_lineno, e->end_col_offset,
3170                         c->c_arena);
3171                 if (!starred)
3172                     return NULL;
3173                 asdl_seq_SET(args, nargs++, starred);
3174 
3175             }
3176             else if (TYPE(chch) == DOUBLESTAR) {
3177                 /* a keyword argument unpacking */
3178                 keyword_ty kw;
3179                 i++;
3180                 e = ast_for_expr(c, CHILD(ch, 1));
3181                 if (!e)
3182                     return NULL;
3183                 kw = keyword(NULL, e, c->c_arena);
3184                 asdl_seq_SET(keywords, nkeywords++, kw);
3185                 ndoublestars++;
3186             }
3187             else if (TYPE(CHILD(ch, 1)) == comp_for) {
3188                 /* the lone generator expression */
3189                 e = copy_location(ast_for_genexp(c, ch), maybegenbeg, closepar);
3190                 if (!e)
3191                     return NULL;
3192                 asdl_seq_SET(args, nargs++, e);
3193             }
3194             else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
3195                 /* treat colon equal as positional argument */
3196                 if (nkeywords) {
3197                     if (ndoublestars) {
3198                         ast_error(c, chch,
3199                                   "positional argument follows "
3200                                   "keyword argument unpacking");
3201                     }
3202                     else {
3203                         ast_error(c, chch,
3204                                   "positional argument follows "
3205                                   "keyword argument");
3206                     }
3207                     return NULL;
3208                 }
3209                 e = ast_for_namedexpr(c, ch);
3210                 if (!e)
3211                     return NULL;
3212                 asdl_seq_SET(args, nargs++, e);
3213             }
3214             else {
3215                 /* a keyword argument */
3216                 keyword_ty kw;
3217                 identifier key, tmp;
3218                 int k;
3219 
3220                 // To remain LL(1), the grammar accepts any test (basically, any
3221                 // expression) in the keyword slot of a call site.  So, we need
3222                 // to manually enforce that the keyword is a NAME here.
3223                 static const int name_tree[] = {
3224                     test,
3225                     or_test,
3226                     and_test,
3227                     not_test,
3228                     comparison,
3229                     expr,
3230                     xor_expr,
3231                     and_expr,
3232                     shift_expr,
3233                     arith_expr,
3234                     term,
3235                     factor,
3236                     power,
3237                     atom_expr,
3238                     atom,
3239                     0,
3240                 };
3241                 node *expr_node = chch;
3242                 for (int i = 0; name_tree[i]; i++) {
3243                     if (TYPE(expr_node) != name_tree[i])
3244                         break;
3245                     if (NCH(expr_node) != 1)
3246                         break;
3247                     expr_node = CHILD(expr_node, 0);
3248                 }
3249                 if (TYPE(expr_node) != NAME) {
3250                     ast_error(c, chch,
3251                               "expression cannot contain assignment, "
3252                               "perhaps you meant \"==\"?");
3253                     return NULL;
3254                 }
3255                 key = new_identifier(STR(expr_node), c);
3256                 if (key == NULL) {
3257                     return NULL;
3258                 }
3259                 if (forbidden_name(c, key, chch, 1)) {
3260                     return NULL;
3261                 }
3262                 for (k = 0; k < nkeywords; k++) {
3263                     tmp = ((keyword_ty)asdl_seq_GET(keywords, k))->arg;
3264                     if (tmp && !PyUnicode_Compare(tmp, key)) {
3265                         ast_error(c, chch,
3266                                   "keyword argument repeated");
3267                         return NULL;
3268                     }
3269                 }
3270                 e = ast_for_expr(c, CHILD(ch, 2));
3271                 if (!e)
3272                     return NULL;
3273                 kw = keyword(key, e, c->c_arena);
3274                 if (!kw)
3275                     return NULL;
3276                 asdl_seq_SET(keywords, nkeywords++, kw);
3277             }
3278         }
3279     }
3280 
3281     return Call(func, args, keywords, LINENO(start), start->n_col_offset,
3282                 closepar->n_end_lineno, closepar->n_end_col_offset, c->c_arena);
3283 }
3284 
3285 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)3286 ast_for_testlist(struct compiling *c, const node* n)
3287 {
3288     /* testlist_comp: test (comp_for | (',' test)* [',']) */
3289     /* testlist: test (',' test)* [','] */
3290     assert(NCH(n) > 0);
3291     if (TYPE(n) == testlist_comp) {
3292         if (NCH(n) > 1)
3293             assert(TYPE(CHILD(n, 1)) != comp_for);
3294     }
3295     else {
3296         assert(TYPE(n) == testlist ||
3297                TYPE(n) == testlist_star_expr);
3298     }
3299     if (NCH(n) == 1)
3300         return ast_for_expr(c, CHILD(n, 0));
3301     else {
3302         asdl_seq *tmp = seq_for_testlist(c, n);
3303         if (!tmp)
3304             return NULL;
3305         return Tuple(tmp, Load, LINENO(n), n->n_col_offset,
3306                      n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3307     }
3308 }
3309 
3310 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)3311 ast_for_expr_stmt(struct compiling *c, const node *n)
3312 {
3313     REQ(n, expr_stmt);
3314     /* expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
3315                      [('=' (yield_expr|testlist_star_expr))+ [TYPE_COMMENT]] )
3316        annassign: ':' test ['=' (yield_expr|testlist)]
3317        testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
3318        augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
3319                    '<<=' | '>>=' | '**=' | '//=')
3320        test: ... here starts the operator precedence dance
3321      */
3322     int num = NCH(n);
3323 
3324     if (num == 1) {
3325         expr_ty e = ast_for_testlist(c, CHILD(n, 0));
3326         if (!e)
3327             return NULL;
3328 
3329         return Expr(e, LINENO(n), n->n_col_offset,
3330                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3331     }
3332     else if (TYPE(CHILD(n, 1)) == augassign) {
3333         expr_ty expr1, expr2;
3334         operator_ty newoperator;
3335         node *ch = CHILD(n, 0);
3336 
3337         expr1 = ast_for_testlist(c, ch);
3338         if (!expr1)
3339             return NULL;
3340         if(!set_context(c, expr1, Store, ch))
3341             return NULL;
3342         /* set_context checks that most expressions are not the left side.
3343           Augmented assignments can only have a name, a subscript, or an
3344           attribute on the left, though, so we have to explicitly check for
3345           those. */
3346         switch (expr1->kind) {
3347             case Name_kind:
3348             case Attribute_kind:
3349             case Subscript_kind:
3350                 break;
3351             default:
3352                 ast_error(c, ch, "illegal expression for augmented assignment");
3353                 return NULL;
3354         }
3355 
3356         ch = CHILD(n, 2);
3357         if (TYPE(ch) == testlist)
3358             expr2 = ast_for_testlist(c, ch);
3359         else
3360             expr2 = ast_for_expr(c, ch);
3361         if (!expr2)
3362             return NULL;
3363 
3364         newoperator = ast_for_augassign(c, CHILD(n, 1));
3365         if (!newoperator)
3366             return NULL;
3367 
3368         return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
3369                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3370     }
3371     else if (TYPE(CHILD(n, 1)) == annassign) {
3372         expr_ty expr1, expr2, expr3;
3373         node *ch = CHILD(n, 0);
3374         node *deep, *ann = CHILD(n, 1);
3375         int simple = 1;
3376 
3377         /* AnnAssigns are only allowed in Python 3.6 or greater */
3378         if (c->c_feature_version < 6) {
3379             ast_error(c, ch,
3380                       "Variable annotation syntax is only supported in Python 3.6 and greater");
3381             return NULL;
3382         }
3383 
3384         /* we keep track of parens to qualify (x) as expression not name */
3385         deep = ch;
3386         while (NCH(deep) == 1) {
3387             deep = CHILD(deep, 0);
3388         }
3389         if (NCH(deep) > 0 && TYPE(CHILD(deep, 0)) == LPAR) {
3390             simple = 0;
3391         }
3392         expr1 = ast_for_testlist(c, ch);
3393         if (!expr1) {
3394             return NULL;
3395         }
3396         switch (expr1->kind) {
3397             case Name_kind:
3398                 if (forbidden_name(c, expr1->v.Name.id, n, 0)) {
3399                     return NULL;
3400                 }
3401                 expr1->v.Name.ctx = Store;
3402                 break;
3403             case Attribute_kind:
3404                 if (forbidden_name(c, expr1->v.Attribute.attr, n, 1)) {
3405                     return NULL;
3406                 }
3407                 expr1->v.Attribute.ctx = Store;
3408                 break;
3409             case Subscript_kind:
3410                 expr1->v.Subscript.ctx = Store;
3411                 break;
3412             case List_kind:
3413                 ast_error(c, ch,
3414                           "only single target (not list) can be annotated");
3415                 return NULL;
3416             case Tuple_kind:
3417                 ast_error(c, ch,
3418                           "only single target (not tuple) can be annotated");
3419                 return NULL;
3420             default:
3421                 ast_error(c, ch,
3422                           "illegal target for annotation");
3423                 return NULL;
3424         }
3425 
3426         if (expr1->kind != Name_kind) {
3427             simple = 0;
3428         }
3429         ch = CHILD(ann, 1);
3430         expr2 = ast_for_expr(c, ch);
3431         if (!expr2) {
3432             return NULL;
3433         }
3434         if (NCH(ann) == 2) {
3435             return AnnAssign(expr1, expr2, NULL, simple,
3436                              LINENO(n), n->n_col_offset,
3437                              n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3438         }
3439         else {
3440             ch = CHILD(ann, 3);
3441             if (TYPE(ch) == testlist_star_expr) {
3442                 expr3 = ast_for_testlist(c, ch);
3443             }
3444             else {
3445                 expr3 = ast_for_expr(c, ch);
3446             }
3447             if (!expr3) {
3448                 return NULL;
3449             }
3450             return AnnAssign(expr1, expr2, expr3, simple,
3451                              LINENO(n), n->n_col_offset,
3452                              n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3453         }
3454     }
3455     else {
3456         int i, nch_minus_type, has_type_comment;
3457         asdl_seq *targets;
3458         node *value;
3459         expr_ty expression;
3460         string type_comment;
3461 
3462         /* a normal assignment */
3463         REQ(CHILD(n, 1), EQUAL);
3464 
3465         has_type_comment = TYPE(CHILD(n, num - 1)) == TYPE_COMMENT;
3466         nch_minus_type = num - has_type_comment;
3467 
3468         targets = _Py_asdl_seq_new(nch_minus_type / 2, c->c_arena);
3469         if (!targets)
3470             return NULL;
3471         for (i = 0; i < nch_minus_type - 2; i += 2) {
3472             expr_ty e;
3473             node *ch = CHILD(n, i);
3474             if (TYPE(ch) == yield_expr) {
3475                 ast_error(c, ch, "assignment to yield expression not possible");
3476                 return NULL;
3477             }
3478             e = ast_for_testlist(c, ch);
3479             if (!e)
3480               return NULL;
3481 
3482             /* set context to assign */
3483             if (!set_context(c, e, Store, CHILD(n, i)))
3484               return NULL;
3485 
3486             asdl_seq_SET(targets, i / 2, e);
3487         }
3488         value = CHILD(n, nch_minus_type - 1);
3489         if (TYPE(value) == testlist_star_expr)
3490             expression = ast_for_testlist(c, value);
3491         else
3492             expression = ast_for_expr(c, value);
3493         if (!expression)
3494             return NULL;
3495         if (has_type_comment) {
3496             type_comment = NEW_TYPE_COMMENT(CHILD(n, nch_minus_type));
3497             if (!type_comment)
3498                 return NULL;
3499         }
3500         else
3501             type_comment = NULL;
3502         return Assign(targets, expression, type_comment, LINENO(n), n->n_col_offset,
3503                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3504     }
3505 }
3506 
3507 
3508 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)3509 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
3510 {
3511     asdl_seq *seq;
3512     int i;
3513     expr_ty e;
3514 
3515     REQ(n, exprlist);
3516 
3517     seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3518     if (!seq)
3519         return NULL;
3520     for (i = 0; i < NCH(n); i += 2) {
3521         e = ast_for_expr(c, CHILD(n, i));
3522         if (!e)
3523             return NULL;
3524         asdl_seq_SET(seq, i / 2, e);
3525         if (context && !set_context(c, e, context, CHILD(n, i)))
3526             return NULL;
3527     }
3528     return seq;
3529 }
3530 
3531 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)3532 ast_for_del_stmt(struct compiling *c, const node *n)
3533 {
3534     asdl_seq *expr_list;
3535 
3536     /* del_stmt: 'del' exprlist */
3537     REQ(n, del_stmt);
3538 
3539     expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
3540     if (!expr_list)
3541         return NULL;
3542     return Delete(expr_list, LINENO(n), n->n_col_offset,
3543                   n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3544 }
3545 
3546 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)3547 ast_for_flow_stmt(struct compiling *c, const node *n)
3548 {
3549     /*
3550       flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
3551                  | yield_stmt
3552       break_stmt: 'break'
3553       continue_stmt: 'continue'
3554       return_stmt: 'return' [testlist]
3555       yield_stmt: yield_expr
3556       yield_expr: 'yield' testlist | 'yield' 'from' test
3557       raise_stmt: 'raise' [test [',' test [',' test]]]
3558     */
3559     node *ch;
3560 
3561     REQ(n, flow_stmt);
3562     ch = CHILD(n, 0);
3563     switch (TYPE(ch)) {
3564         case break_stmt:
3565             return Break(LINENO(n), n->n_col_offset,
3566                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3567         case continue_stmt:
3568             return Continue(LINENO(n), n->n_col_offset,
3569                             n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3570         case yield_stmt: { /* will reduce to yield_expr */
3571             expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
3572             if (!exp)
3573                 return NULL;
3574             return Expr(exp, LINENO(n), n->n_col_offset,
3575                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3576         }
3577         case return_stmt:
3578             if (NCH(ch) == 1)
3579                 return Return(NULL, LINENO(n), n->n_col_offset,
3580                               n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3581             else {
3582                 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
3583                 if (!expression)
3584                     return NULL;
3585                 return Return(expression, LINENO(n), n->n_col_offset,
3586                               n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3587             }
3588         case raise_stmt:
3589             if (NCH(ch) == 1)
3590                 return Raise(NULL, NULL, LINENO(n), n->n_col_offset,
3591                              n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3592             else if (NCH(ch) >= 2) {
3593                 expr_ty cause = NULL;
3594                 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
3595                 if (!expression)
3596                     return NULL;
3597                 if (NCH(ch) == 4) {
3598                     cause = ast_for_expr(c, CHILD(ch, 3));
3599                     if (!cause)
3600                         return NULL;
3601                 }
3602                 return Raise(expression, cause, LINENO(n), n->n_col_offset,
3603                              n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3604             }
3605             /* fall through */
3606         default:
3607             PyErr_Format(PyExc_SystemError,
3608                          "unexpected flow_stmt: %d", TYPE(ch));
3609             return NULL;
3610     }
3611 }
3612 
3613 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)3614 alias_for_import_name(struct compiling *c, const node *n, int store)
3615 {
3616     /*
3617       import_as_name: NAME ['as' NAME]
3618       dotted_as_name: dotted_name ['as' NAME]
3619       dotted_name: NAME ('.' NAME)*
3620     */
3621     identifier str, name;
3622 
3623  loop:
3624     switch (TYPE(n)) {
3625         case import_as_name: {
3626             node *name_node = CHILD(n, 0);
3627             str = NULL;
3628             name = NEW_IDENTIFIER(name_node);
3629             if (!name)
3630                 return NULL;
3631             if (NCH(n) == 3) {
3632                 node *str_node = CHILD(n, 2);
3633                 str = NEW_IDENTIFIER(str_node);
3634                 if (!str)
3635                     return NULL;
3636                 if (store && forbidden_name(c, str, str_node, 0))
3637                     return NULL;
3638             }
3639             else {
3640                 if (forbidden_name(c, name, name_node, 0))
3641                     return NULL;
3642             }
3643             return alias(name, str, c->c_arena);
3644         }
3645         case dotted_as_name:
3646             if (NCH(n) == 1) {
3647                 n = CHILD(n, 0);
3648                 goto loop;
3649             }
3650             else {
3651                 node *asname_node = CHILD(n, 2);
3652                 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
3653                 if (!a)
3654                     return NULL;
3655                 assert(!a->asname);
3656                 a->asname = NEW_IDENTIFIER(asname_node);
3657                 if (!a->asname)
3658                     return NULL;
3659                 if (forbidden_name(c, a->asname, asname_node, 0))
3660                     return NULL;
3661                 return a;
3662             }
3663         case dotted_name:
3664             if (NCH(n) == 1) {
3665                 node *name_node = CHILD(n, 0);
3666                 name = NEW_IDENTIFIER(name_node);
3667                 if (!name)
3668                     return NULL;
3669                 if (store && forbidden_name(c, name, name_node, 0))
3670                     return NULL;
3671                 return alias(name, NULL, c->c_arena);
3672             }
3673             else {
3674                 /* Create a string of the form "a.b.c" */
3675                 int i;
3676                 size_t len;
3677                 char *s;
3678                 PyObject *uni;
3679 
3680                 len = 0;
3681                 for (i = 0; i < NCH(n); i += 2)
3682                     /* length of string plus one for the dot */
3683                     len += strlen(STR(CHILD(n, i))) + 1;
3684                 len--; /* the last name doesn't have a dot */
3685                 str = PyBytes_FromStringAndSize(NULL, len);
3686                 if (!str)
3687                     return NULL;
3688                 s = PyBytes_AS_STRING(str);
3689                 if (!s)
3690                     return NULL;
3691                 for (i = 0; i < NCH(n); i += 2) {
3692                     char *sch = STR(CHILD(n, i));
3693                     strcpy(s, STR(CHILD(n, i)));
3694                     s += strlen(sch);
3695                     *s++ = '.';
3696                 }
3697                 --s;
3698                 *s = '\0';
3699                 uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str),
3700                                            PyBytes_GET_SIZE(str),
3701                                            NULL);
3702                 Py_DECREF(str);
3703                 if (!uni)
3704                     return NULL;
3705                 str = uni;
3706                 PyUnicode_InternInPlace(&str);
3707                 if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3708                     Py_DECREF(str);
3709                     return NULL;
3710                 }
3711                 return alias(str, NULL, c->c_arena);
3712             }
3713         case STAR:
3714             str = PyUnicode_InternFromString("*");
3715             if (!str)
3716                 return NULL;
3717             if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3718                 Py_DECREF(str);
3719                 return NULL;
3720             }
3721             return alias(str, NULL, c->c_arena);
3722         default:
3723             PyErr_Format(PyExc_SystemError,
3724                          "unexpected import name: %d", TYPE(n));
3725             return NULL;
3726     }
3727 
3728     PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
3729     return NULL;
3730 }
3731 
3732 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)3733 ast_for_import_stmt(struct compiling *c, const node *n)
3734 {
3735     /*
3736       import_stmt: import_name | import_from
3737       import_name: 'import' dotted_as_names
3738       import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+)
3739                    'import' ('*' | '(' import_as_names ')' | import_as_names)
3740     */
3741     int lineno;
3742     int col_offset;
3743     int i;
3744     asdl_seq *aliases;
3745 
3746     REQ(n, import_stmt);
3747     lineno = LINENO(n);
3748     col_offset = n->n_col_offset;
3749     n = CHILD(n, 0);
3750     if (TYPE(n) == import_name) {
3751         n = CHILD(n, 1);
3752         REQ(n, dotted_as_names);
3753         aliases = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3754         if (!aliases)
3755                 return NULL;
3756         for (i = 0; i < NCH(n); i += 2) {
3757             alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3758             if (!import_alias)
3759                 return NULL;
3760             asdl_seq_SET(aliases, i / 2, import_alias);
3761         }
3762         // Even though n is modified above, the end position is not changed
3763         return Import(aliases, lineno, col_offset,
3764                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3765     }
3766     else if (TYPE(n) == import_from) {
3767         int n_children;
3768         int idx, ndots = 0;
3769         const node *n_copy = n;
3770         alias_ty mod = NULL;
3771         identifier modname = NULL;
3772 
3773        /* Count the number of dots (for relative imports) and check for the
3774           optional module name */
3775         for (idx = 1; idx < NCH(n); idx++) {
3776             if (TYPE(CHILD(n, idx)) == dotted_name) {
3777                 mod = alias_for_import_name(c, CHILD(n, idx), 0);
3778                 if (!mod)
3779                     return NULL;
3780                 idx++;
3781                 break;
3782             } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) {
3783                 /* three consecutive dots are tokenized as one ELLIPSIS */
3784                 ndots += 3;
3785                 continue;
3786             } else if (TYPE(CHILD(n, idx)) != DOT) {
3787                 break;
3788             }
3789             ndots++;
3790         }
3791         idx++; /* skip over the 'import' keyword */
3792         switch (TYPE(CHILD(n, idx))) {
3793         case STAR:
3794             /* from ... import * */
3795             n = CHILD(n, idx);
3796             n_children = 1;
3797             break;
3798         case LPAR:
3799             /* from ... import (x, y, z) */
3800             n = CHILD(n, idx + 1);
3801             n_children = NCH(n);
3802             break;
3803         case import_as_names:
3804             /* from ... import x, y, z */
3805             n = CHILD(n, idx);
3806             n_children = NCH(n);
3807             if (n_children % 2 == 0) {
3808                 ast_error(c, n,
3809                           "trailing comma not allowed without"
3810                           " surrounding parentheses");
3811                 return NULL;
3812             }
3813             break;
3814         default:
3815             ast_error(c, n, "Unexpected node-type in from-import");
3816             return NULL;
3817         }
3818 
3819         aliases = _Py_asdl_seq_new((n_children + 1) / 2, c->c_arena);
3820         if (!aliases)
3821             return NULL;
3822 
3823         /* handle "from ... import *" special b/c there's no children */
3824         if (TYPE(n) == STAR) {
3825             alias_ty import_alias = alias_for_import_name(c, n, 1);
3826             if (!import_alias)
3827                 return NULL;
3828             asdl_seq_SET(aliases, 0, import_alias);
3829         }
3830         else {
3831             for (i = 0; i < NCH(n); i += 2) {
3832                 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3833                 if (!import_alias)
3834                     return NULL;
3835                 asdl_seq_SET(aliases, i / 2, import_alias);
3836             }
3837         }
3838         if (mod != NULL)
3839             modname = mod->name;
3840         return ImportFrom(modname, aliases, ndots, lineno, col_offset,
3841                           n_copy->n_end_lineno, n_copy->n_end_col_offset,
3842                           c->c_arena);
3843     }
3844     PyErr_Format(PyExc_SystemError,
3845                  "unknown import statement: starts with command '%s'",
3846                  STR(CHILD(n, 0)));
3847     return NULL;
3848 }
3849 
3850 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)3851 ast_for_global_stmt(struct compiling *c, const node *n)
3852 {
3853     /* global_stmt: 'global' NAME (',' NAME)* */
3854     identifier name;
3855     asdl_seq *s;
3856     int i;
3857 
3858     REQ(n, global_stmt);
3859     s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3860     if (!s)
3861         return NULL;
3862     for (i = 1; i < NCH(n); i += 2) {
3863         name = NEW_IDENTIFIER(CHILD(n, i));
3864         if (!name)
3865             return NULL;
3866         asdl_seq_SET(s, i / 2, name);
3867     }
3868     return Global(s, LINENO(n), n->n_col_offset,
3869                   n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3870 }
3871 
3872 static stmt_ty
ast_for_nonlocal_stmt(struct compiling * c,const node * n)3873 ast_for_nonlocal_stmt(struct compiling *c, const node *n)
3874 {
3875     /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */
3876     identifier name;
3877     asdl_seq *s;
3878     int i;
3879 
3880     REQ(n, nonlocal_stmt);
3881     s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3882     if (!s)
3883         return NULL;
3884     for (i = 1; i < NCH(n); i += 2) {
3885         name = NEW_IDENTIFIER(CHILD(n, i));
3886         if (!name)
3887             return NULL;
3888         asdl_seq_SET(s, i / 2, name);
3889     }
3890     return Nonlocal(s, LINENO(n), n->n_col_offset,
3891                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3892 }
3893 
3894 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)3895 ast_for_assert_stmt(struct compiling *c, const node *n)
3896 {
3897     /* assert_stmt: 'assert' test [',' test] */
3898     REQ(n, assert_stmt);
3899     if (NCH(n) == 2) {
3900         expr_ty expression = ast_for_expr(c, CHILD(n, 1));
3901         if (!expression)
3902             return NULL;
3903         return Assert(expression, NULL, LINENO(n), n->n_col_offset,
3904                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3905     }
3906     else if (NCH(n) == 4) {
3907         expr_ty expr1, expr2;
3908 
3909         expr1 = ast_for_expr(c, CHILD(n, 1));
3910         if (!expr1)
3911             return NULL;
3912         expr2 = ast_for_expr(c, CHILD(n, 3));
3913         if (!expr2)
3914             return NULL;
3915 
3916         return Assert(expr1, expr2, LINENO(n), n->n_col_offset,
3917                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3918     }
3919     PyErr_Format(PyExc_SystemError,
3920                  "improper number of parts to 'assert' statement: %d",
3921                  NCH(n));
3922     return NULL;
3923 }
3924 
3925 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)3926 ast_for_suite(struct compiling *c, const node *n)
3927 {
3928     /* suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
3929     asdl_seq *seq;
3930     stmt_ty s;
3931     int i, total, num, end, pos = 0;
3932     node *ch;
3933 
3934     if (TYPE(n) != func_body_suite) {
3935         REQ(n, suite);
3936     }
3937 
3938     total = num_stmts(n);
3939     seq = _Py_asdl_seq_new(total, c->c_arena);
3940     if (!seq)
3941         return NULL;
3942     if (TYPE(CHILD(n, 0)) == simple_stmt) {
3943         n = CHILD(n, 0);
3944         /* simple_stmt always ends with a NEWLINE,
3945            and may have a trailing SEMI
3946         */
3947         end = NCH(n) - 1;
3948         if (TYPE(CHILD(n, end - 1)) == SEMI)
3949             end--;
3950         /* loop by 2 to skip semi-colons */
3951         for (i = 0; i < end; i += 2) {
3952             ch = CHILD(n, i);
3953             s = ast_for_stmt(c, ch);
3954             if (!s)
3955                 return NULL;
3956             asdl_seq_SET(seq, pos++, s);
3957         }
3958     }
3959     else {
3960         i = 2;
3961         if (TYPE(CHILD(n, 1)) == TYPE_COMMENT) {
3962             i += 2;
3963             REQ(CHILD(n, 2), NEWLINE);
3964         }
3965 
3966         for (; i < (NCH(n) - 1); i++) {
3967             ch = CHILD(n, i);
3968             REQ(ch, stmt);
3969             num = num_stmts(ch);
3970             if (num == 1) {
3971                 /* small_stmt or compound_stmt with only one child */
3972                 s = ast_for_stmt(c, ch);
3973                 if (!s)
3974                     return NULL;
3975                 asdl_seq_SET(seq, pos++, s);
3976             }
3977             else {
3978                 int j;
3979                 ch = CHILD(ch, 0);
3980                 REQ(ch, simple_stmt);
3981                 for (j = 0; j < NCH(ch); j += 2) {
3982                     /* statement terminates with a semi-colon ';' */
3983                     if (NCH(CHILD(ch, j)) == 0) {
3984                         assert((j + 1) == NCH(ch));
3985                         break;
3986                     }
3987                     s = ast_for_stmt(c, CHILD(ch, j));
3988                     if (!s)
3989                         return NULL;
3990                     asdl_seq_SET(seq, pos++, s);
3991                 }
3992             }
3993         }
3994     }
3995     assert(pos == seq->size);
3996     return seq;
3997 }
3998 
3999 static void
get_last_end_pos(asdl_seq * s,int * end_lineno,int * end_col_offset)4000 get_last_end_pos(asdl_seq *s, int *end_lineno, int *end_col_offset)
4001 {
4002     Py_ssize_t tot = asdl_seq_LEN(s);
4003     // There must be no empty suites.
4004     assert(tot > 0);
4005     stmt_ty last = asdl_seq_GET(s, tot - 1);
4006     *end_lineno = last->end_lineno;
4007     *end_col_offset = last->end_col_offset;
4008 }
4009 
4010 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)4011 ast_for_if_stmt(struct compiling *c, const node *n)
4012 {
4013     /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
4014        ['else' ':' suite]
4015     */
4016     char *s;
4017     int end_lineno, end_col_offset;
4018 
4019     REQ(n, if_stmt);
4020 
4021     if (NCH(n) == 4) {
4022         expr_ty expression;
4023         asdl_seq *suite_seq;
4024 
4025         expression = ast_for_expr(c, CHILD(n, 1));
4026         if (!expression)
4027             return NULL;
4028         suite_seq = ast_for_suite(c, CHILD(n, 3));
4029         if (!suite_seq)
4030             return NULL;
4031         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4032 
4033         return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
4034                   end_lineno, end_col_offset, c->c_arena);
4035     }
4036 
4037     s = STR(CHILD(n, 4));
4038     /* s[2], the third character in the string, will be
4039        's' for el_s_e, or
4040        'i' for el_i_f
4041     */
4042     if (s[2] == 's') {
4043         expr_ty expression;
4044         asdl_seq *seq1, *seq2;
4045 
4046         expression = ast_for_expr(c, CHILD(n, 1));
4047         if (!expression)
4048             return NULL;
4049         seq1 = ast_for_suite(c, CHILD(n, 3));
4050         if (!seq1)
4051             return NULL;
4052         seq2 = ast_for_suite(c, CHILD(n, 6));
4053         if (!seq2)
4054             return NULL;
4055         get_last_end_pos(seq2, &end_lineno, &end_col_offset);
4056 
4057         return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
4058                   end_lineno, end_col_offset, c->c_arena);
4059     }
4060     else if (s[2] == 'i') {
4061         int i, n_elif, has_else = 0;
4062         expr_ty expression;
4063         asdl_seq *suite_seq;
4064         asdl_seq *orelse = NULL;
4065         n_elif = NCH(n) - 4;
4066         /* must reference the child n_elif+1 since 'else' token is third,
4067            not fourth, child from the end. */
4068         if (TYPE(CHILD(n, (n_elif + 1))) == NAME
4069             && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
4070             has_else = 1;
4071             n_elif -= 3;
4072         }
4073         n_elif /= 4;
4074 
4075         if (has_else) {
4076             asdl_seq *suite_seq2;
4077 
4078             orelse = _Py_asdl_seq_new(1, c->c_arena);
4079             if (!orelse)
4080                 return NULL;
4081             expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
4082             if (!expression)
4083                 return NULL;
4084             suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
4085             if (!suite_seq)
4086                 return NULL;
4087             suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
4088             if (!suite_seq2)
4089                 return NULL;
4090             get_last_end_pos(suite_seq2, &end_lineno, &end_col_offset);
4091 
4092             asdl_seq_SET(orelse, 0,
4093                          If(expression, suite_seq, suite_seq2,
4094                             LINENO(CHILD(n, NCH(n) - 7)),
4095                             CHILD(n, NCH(n) - 7)->n_col_offset,
4096                             end_lineno, end_col_offset, c->c_arena));
4097             /* the just-created orelse handled the last elif */
4098             n_elif--;
4099         }
4100 
4101         for (i = 0; i < n_elif; i++) {
4102             int off = 5 + (n_elif - i - 1) * 4;
4103             asdl_seq *newobj = _Py_asdl_seq_new(1, c->c_arena);
4104             if (!newobj)
4105                 return NULL;
4106             expression = ast_for_expr(c, CHILD(n, off));
4107             if (!expression)
4108                 return NULL;
4109             suite_seq = ast_for_suite(c, CHILD(n, off + 2));
4110             if (!suite_seq)
4111                 return NULL;
4112 
4113             if (orelse != NULL) {
4114                 get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4115             } else {
4116                 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4117             }
4118             asdl_seq_SET(newobj, 0,
4119                          If(expression, suite_seq, orelse,
4120                             LINENO(CHILD(n, off - 1)),
4121                             CHILD(n, off - 1)->n_col_offset,
4122                             end_lineno, end_col_offset, c->c_arena));
4123             orelse = newobj;
4124         }
4125         expression = ast_for_expr(c, CHILD(n, 1));
4126         if (!expression)
4127             return NULL;
4128         suite_seq = ast_for_suite(c, CHILD(n, 3));
4129         if (!suite_seq)
4130             return NULL;
4131         get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4132         return If(expression, suite_seq, orelse,
4133                   LINENO(n), n->n_col_offset,
4134                   end_lineno, end_col_offset, c->c_arena);
4135     }
4136 
4137     PyErr_Format(PyExc_SystemError,
4138                  "unexpected token in 'if' statement: %s", s);
4139     return NULL;
4140 }
4141 
4142 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)4143 ast_for_while_stmt(struct compiling *c, const node *n)
4144 {
4145     /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
4146     REQ(n, while_stmt);
4147     int end_lineno, end_col_offset;
4148 
4149     if (NCH(n) == 4) {
4150         expr_ty expression;
4151         asdl_seq *suite_seq;
4152 
4153         expression = ast_for_expr(c, CHILD(n, 1));
4154         if (!expression)
4155             return NULL;
4156         suite_seq = ast_for_suite(c, CHILD(n, 3));
4157         if (!suite_seq)
4158             return NULL;
4159         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4160         return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
4161                      end_lineno, end_col_offset, c->c_arena);
4162     }
4163     else if (NCH(n) == 7) {
4164         expr_ty expression;
4165         asdl_seq *seq1, *seq2;
4166 
4167         expression = ast_for_expr(c, CHILD(n, 1));
4168         if (!expression)
4169             return NULL;
4170         seq1 = ast_for_suite(c, CHILD(n, 3));
4171         if (!seq1)
4172             return NULL;
4173         seq2 = ast_for_suite(c, CHILD(n, 6));
4174         if (!seq2)
4175             return NULL;
4176         get_last_end_pos(seq2, &end_lineno, &end_col_offset);
4177 
4178         return While(expression, seq1, seq2, LINENO(n), n->n_col_offset,
4179                      end_lineno, end_col_offset, c->c_arena);
4180     }
4181 
4182     PyErr_Format(PyExc_SystemError,
4183                  "wrong number of tokens for 'while' statement: %d",
4184                  NCH(n));
4185     return NULL;
4186 }
4187 
4188 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n0,bool is_async)4189 ast_for_for_stmt(struct compiling *c, const node *n0, bool is_async)
4190 {
4191     const node * const n = is_async ? CHILD(n0, 1) : n0;
4192     asdl_seq *_target, *seq = NULL, *suite_seq;
4193     expr_ty expression;
4194     expr_ty target, first;
4195     const node *node_target;
4196     int end_lineno, end_col_offset;
4197     int has_type_comment;
4198     string type_comment;
4199 
4200     if (is_async && c->c_feature_version < 5) {
4201         ast_error(c, n,
4202                   "Async for loops are only supported in Python 3.5 and greater");
4203         return NULL;
4204     }
4205 
4206     /* for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite] */
4207     REQ(n, for_stmt);
4208 
4209     has_type_comment = TYPE(CHILD(n, 5)) == TYPE_COMMENT;
4210 
4211     if (NCH(n) == 9 + has_type_comment) {
4212         seq = ast_for_suite(c, CHILD(n, 8 + has_type_comment));
4213         if (!seq)
4214             return NULL;
4215     }
4216 
4217     node_target = CHILD(n, 1);
4218     _target = ast_for_exprlist(c, node_target, Store);
4219     if (!_target)
4220         return NULL;
4221     /* Check the # of children rather than the length of _target, since
4222        for x, in ... has 1 element in _target, but still requires a Tuple. */
4223     first = (expr_ty)asdl_seq_GET(_target, 0);
4224     if (NCH(node_target) == 1)
4225         target = first;
4226     else
4227         target = Tuple(_target, Store, first->lineno, first->col_offset,
4228                        node_target->n_end_lineno, node_target->n_end_col_offset,
4229                        c->c_arena);
4230 
4231     expression = ast_for_testlist(c, CHILD(n, 3));
4232     if (!expression)
4233         return NULL;
4234     suite_seq = ast_for_suite(c, CHILD(n, 5 + has_type_comment));
4235     if (!suite_seq)
4236         return NULL;
4237 
4238     if (seq != NULL) {
4239         get_last_end_pos(seq, &end_lineno, &end_col_offset);
4240     } else {
4241         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4242     }
4243 
4244     if (has_type_comment) {
4245         type_comment = NEW_TYPE_COMMENT(CHILD(n, 5));
4246         if (!type_comment)
4247             return NULL;
4248     }
4249     else
4250         type_comment = NULL;
4251 
4252     if (is_async)
4253         return AsyncFor(target, expression, suite_seq, seq, type_comment,
4254                         LINENO(n0), n0->n_col_offset,
4255                         end_lineno, end_col_offset, c->c_arena);
4256     else
4257         return For(target, expression, suite_seq, seq, type_comment,
4258                    LINENO(n), n->n_col_offset,
4259                    end_lineno, end_col_offset, c->c_arena);
4260 }
4261 
4262 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)4263 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
4264 {
4265     /* except_clause: 'except' [test ['as' test]] */
4266     int end_lineno, end_col_offset;
4267     REQ(exc, except_clause);
4268     REQ(body, suite);
4269 
4270     if (NCH(exc) == 1) {
4271         asdl_seq *suite_seq = ast_for_suite(c, body);
4272         if (!suite_seq)
4273             return NULL;
4274         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4275 
4276         return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
4277                              exc->n_col_offset,
4278                              end_lineno, end_col_offset, c->c_arena);
4279     }
4280     else if (NCH(exc) == 2) {
4281         expr_ty expression;
4282         asdl_seq *suite_seq;
4283 
4284         expression = ast_for_expr(c, CHILD(exc, 1));
4285         if (!expression)
4286             return NULL;
4287         suite_seq = ast_for_suite(c, body);
4288         if (!suite_seq)
4289             return NULL;
4290         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4291 
4292         return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
4293                              exc->n_col_offset,
4294                              end_lineno, end_col_offset, c->c_arena);
4295     }
4296     else if (NCH(exc) == 4) {
4297         asdl_seq *suite_seq;
4298         expr_ty expression;
4299         identifier e = NEW_IDENTIFIER(CHILD(exc, 3));
4300         if (!e)
4301             return NULL;
4302         if (forbidden_name(c, e, CHILD(exc, 3), 0))
4303             return NULL;
4304         expression = ast_for_expr(c, CHILD(exc, 1));
4305         if (!expression)
4306             return NULL;
4307         suite_seq = ast_for_suite(c, body);
4308         if (!suite_seq)
4309             return NULL;
4310         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4311 
4312         return ExceptHandler(expression, e, suite_seq, LINENO(exc),
4313                              exc->n_col_offset,
4314                              end_lineno, end_col_offset, c->c_arena);
4315     }
4316 
4317     PyErr_Format(PyExc_SystemError,
4318                  "wrong number of children for 'except' clause: %d",
4319                  NCH(exc));
4320     return NULL;
4321 }
4322 
4323 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)4324 ast_for_try_stmt(struct compiling *c, const node *n)
4325 {
4326     const int nch = NCH(n);
4327     int end_lineno, end_col_offset, n_except = (nch - 3)/3;
4328     asdl_seq *body, *handlers = NULL, *orelse = NULL, *finally = NULL;
4329     excepthandler_ty last_handler;
4330 
4331     REQ(n, try_stmt);
4332 
4333     body = ast_for_suite(c, CHILD(n, 2));
4334     if (body == NULL)
4335         return NULL;
4336 
4337     if (TYPE(CHILD(n, nch - 3)) == NAME) {
4338         if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
4339             if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
4340                 /* we can assume it's an "else",
4341                    because nch >= 9 for try-else-finally and
4342                    it would otherwise have a type of except_clause */
4343                 orelse = ast_for_suite(c, CHILD(n, nch - 4));
4344                 if (orelse == NULL)
4345                     return NULL;
4346                 n_except--;
4347             }
4348 
4349             finally = ast_for_suite(c, CHILD(n, nch - 1));
4350             if (finally == NULL)
4351                 return NULL;
4352             n_except--;
4353         }
4354         else {
4355             /* we can assume it's an "else",
4356                otherwise it would have a type of except_clause */
4357             orelse = ast_for_suite(c, CHILD(n, nch - 1));
4358             if (orelse == NULL)
4359                 return NULL;
4360             n_except--;
4361         }
4362     }
4363     else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
4364         ast_error(c, n, "malformed 'try' statement");
4365         return NULL;
4366     }
4367 
4368     if (n_except > 0) {
4369         int i;
4370         /* process except statements to create a try ... except */
4371         handlers = _Py_asdl_seq_new(n_except, c->c_arena);
4372         if (handlers == NULL)
4373             return NULL;
4374 
4375         for (i = 0; i < n_except; i++) {
4376             excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
4377                                                        CHILD(n, 5 + i * 3));
4378             if (!e)
4379                 return NULL;
4380             asdl_seq_SET(handlers, i, e);
4381         }
4382     }
4383 
4384     assert(finally != NULL || asdl_seq_LEN(handlers));
4385         if (finally != NULL) {
4386         // finally is always last
4387         get_last_end_pos(finally, &end_lineno, &end_col_offset);
4388     } else if (orelse != NULL) {
4389         // otherwise else is last
4390         get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4391     } else {
4392         // inline the get_last_end_pos logic due to layout mismatch
4393         last_handler = (excepthandler_ty) asdl_seq_GET(handlers, n_except - 1);
4394         end_lineno = last_handler->end_lineno;
4395         end_col_offset = last_handler->end_col_offset;
4396     }
4397     return Try(body, handlers, orelse, finally, LINENO(n), n->n_col_offset,
4398                end_lineno, end_col_offset, c->c_arena);
4399 }
4400 
4401 /* with_item: test ['as' expr] */
4402 static withitem_ty
ast_for_with_item(struct compiling * c,const node * n)4403 ast_for_with_item(struct compiling *c, const node *n)
4404 {
4405     expr_ty context_expr, optional_vars = NULL;
4406 
4407     REQ(n, with_item);
4408     context_expr = ast_for_expr(c, CHILD(n, 0));
4409     if (!context_expr)
4410         return NULL;
4411     if (NCH(n) == 3) {
4412         optional_vars = ast_for_expr(c, CHILD(n, 2));
4413 
4414         if (!optional_vars) {
4415             return NULL;
4416         }
4417         if (!set_context(c, optional_vars, Store, n)) {
4418             return NULL;
4419         }
4420     }
4421 
4422     return withitem(context_expr, optional_vars, c->c_arena);
4423 }
4424 
4425 /* with_stmt: 'with' with_item (',' with_item)*  ':' [TYPE_COMMENT] suite */
4426 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n0,bool is_async)4427 ast_for_with_stmt(struct compiling *c, const node *n0, bool is_async)
4428 {
4429     const node * const n = is_async ? CHILD(n0, 1) : n0;
4430     int i, n_items, nch_minus_type, has_type_comment, end_lineno, end_col_offset;
4431     asdl_seq *items, *body;
4432     string type_comment;
4433 
4434     if (is_async && c->c_feature_version < 5) {
4435         ast_error(c, n,
4436                   "Async with statements are only supported in Python 3.5 and greater");
4437         return NULL;
4438     }
4439 
4440     REQ(n, with_stmt);
4441 
4442     has_type_comment = TYPE(CHILD(n, NCH(n) - 2)) == TYPE_COMMENT;
4443     nch_minus_type = NCH(n) - has_type_comment;
4444 
4445     n_items = (nch_minus_type - 2) / 2;
4446     items = _Py_asdl_seq_new(n_items, c->c_arena);
4447     if (!items)
4448         return NULL;
4449     for (i = 1; i < nch_minus_type - 2; i += 2) {
4450         withitem_ty item = ast_for_with_item(c, CHILD(n, i));
4451         if (!item)
4452             return NULL;
4453         asdl_seq_SET(items, (i - 1) / 2, item);
4454     }
4455 
4456     body = ast_for_suite(c, CHILD(n, NCH(n) - 1));
4457     if (!body)
4458         return NULL;
4459     get_last_end_pos(body, &end_lineno, &end_col_offset);
4460 
4461     if (has_type_comment) {
4462         type_comment = NEW_TYPE_COMMENT(CHILD(n, NCH(n) - 2));
4463         if (!type_comment)
4464             return NULL;
4465     }
4466     else
4467         type_comment = NULL;
4468 
4469     if (is_async)
4470         return AsyncWith(items, body, type_comment, LINENO(n0), n0->n_col_offset,
4471                          end_lineno, end_col_offset, c->c_arena);
4472     else
4473         return With(items, body, type_comment, LINENO(n), n->n_col_offset,
4474                     end_lineno, end_col_offset, c->c_arena);
4475 }
4476 
4477 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)4478 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
4479 {
4480     /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */
4481     PyObject *classname;
4482     asdl_seq *s;
4483     expr_ty call;
4484     int end_lineno, end_col_offset;
4485 
4486     REQ(n, classdef);
4487 
4488     if (NCH(n) == 4) { /* class NAME ':' suite */
4489         s = ast_for_suite(c, CHILD(n, 3));
4490         if (!s)
4491             return NULL;
4492         get_last_end_pos(s, &end_lineno, &end_col_offset);
4493 
4494         classname = NEW_IDENTIFIER(CHILD(n, 1));
4495         if (!classname)
4496             return NULL;
4497         if (forbidden_name(c, classname, CHILD(n, 3), 0))
4498             return NULL;
4499         return ClassDef(classname, NULL, NULL, s, decorator_seq,
4500                         LINENO(n), n->n_col_offset,
4501                         end_lineno, end_col_offset, c->c_arena);
4502     }
4503 
4504     if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */
4505         s = ast_for_suite(c, CHILD(n, 5));
4506         if (!s)
4507             return NULL;
4508         get_last_end_pos(s, &end_lineno, &end_col_offset);
4509 
4510         classname = NEW_IDENTIFIER(CHILD(n, 1));
4511         if (!classname)
4512             return NULL;
4513         if (forbidden_name(c, classname, CHILD(n, 3), 0))
4514             return NULL;
4515         return ClassDef(classname, NULL, NULL, s, decorator_seq,
4516                         LINENO(n), n->n_col_offset,
4517                         end_lineno, end_col_offset, c->c_arena);
4518     }
4519 
4520     /* class NAME '(' arglist ')' ':' suite */
4521     /* build up a fake Call node so we can extract its pieces */
4522     {
4523         PyObject *dummy_name;
4524         expr_ty dummy;
4525         dummy_name = NEW_IDENTIFIER(CHILD(n, 1));
4526         if (!dummy_name)
4527             return NULL;
4528         dummy = Name(dummy_name, Load, LINENO(n), n->n_col_offset,
4529                      CHILD(n, 1)->n_end_lineno, CHILD(n, 1)->n_end_col_offset,
4530                      c->c_arena);
4531         call = ast_for_call(c, CHILD(n, 3), dummy,
4532                             CHILD(n, 1), NULL, CHILD(n, 4));
4533         if (!call)
4534             return NULL;
4535     }
4536     s = ast_for_suite(c, CHILD(n, 6));
4537     if (!s)
4538         return NULL;
4539     get_last_end_pos(s, &end_lineno, &end_col_offset);
4540 
4541     classname = NEW_IDENTIFIER(CHILD(n, 1));
4542     if (!classname)
4543         return NULL;
4544     if (forbidden_name(c, classname, CHILD(n, 1), 0))
4545         return NULL;
4546 
4547     return ClassDef(classname, call->v.Call.args, call->v.Call.keywords, s,
4548                     decorator_seq, LINENO(n), n->n_col_offset,
4549                     end_lineno, end_col_offset, c->c_arena);
4550 }
4551 
4552 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)4553 ast_for_stmt(struct compiling *c, const node *n)
4554 {
4555     if (TYPE(n) == stmt) {
4556         assert(NCH(n) == 1);
4557         n = CHILD(n, 0);
4558     }
4559     if (TYPE(n) == simple_stmt) {
4560         assert(num_stmts(n) == 1);
4561         n = CHILD(n, 0);
4562     }
4563     if (TYPE(n) == small_stmt) {
4564         n = CHILD(n, 0);
4565         /* small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt
4566                   | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
4567         */
4568         switch (TYPE(n)) {
4569             case expr_stmt:
4570                 return ast_for_expr_stmt(c, n);
4571             case del_stmt:
4572                 return ast_for_del_stmt(c, n);
4573             case pass_stmt:
4574                 return Pass(LINENO(n), n->n_col_offset,
4575                             n->n_end_lineno, n->n_end_col_offset, c->c_arena);
4576             case flow_stmt:
4577                 return ast_for_flow_stmt(c, n);
4578             case import_stmt:
4579                 return ast_for_import_stmt(c, n);
4580             case global_stmt:
4581                 return ast_for_global_stmt(c, n);
4582             case nonlocal_stmt:
4583                 return ast_for_nonlocal_stmt(c, n);
4584             case assert_stmt:
4585                 return ast_for_assert_stmt(c, n);
4586             default:
4587                 PyErr_Format(PyExc_SystemError,
4588                              "unhandled small_stmt: TYPE=%d NCH=%d\n",
4589                              TYPE(n), NCH(n));
4590                 return NULL;
4591         }
4592     }
4593     else {
4594         /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
4595                         | funcdef | classdef | decorated | async_stmt
4596         */
4597         node *ch = CHILD(n, 0);
4598         REQ(n, compound_stmt);
4599         switch (TYPE(ch)) {
4600             case if_stmt:
4601                 return ast_for_if_stmt(c, ch);
4602             case while_stmt:
4603                 return ast_for_while_stmt(c, ch);
4604             case for_stmt:
4605                 return ast_for_for_stmt(c, ch, 0);
4606             case try_stmt:
4607                 return ast_for_try_stmt(c, ch);
4608             case with_stmt:
4609                 return ast_for_with_stmt(c, ch, 0);
4610             case funcdef:
4611                 return ast_for_funcdef(c, ch, NULL);
4612             case classdef:
4613                 return ast_for_classdef(c, ch, NULL);
4614             case decorated:
4615                 return ast_for_decorated(c, ch);
4616             case async_stmt:
4617                 return ast_for_async_stmt(c, ch);
4618             default:
4619                 PyErr_Format(PyExc_SystemError,
4620                              "unhandled compound_stmt: TYPE=%d NCH=%d\n",
4621                              TYPE(n), NCH(n));
4622                 return NULL;
4623         }
4624     }
4625 }
4626 
4627 static PyObject *
parsenumber_raw(struct compiling * c,const char * s)4628 parsenumber_raw(struct compiling *c, const char *s)
4629 {
4630     const char *end;
4631     long x;
4632     double dx;
4633     Py_complex compl;
4634     int imflag;
4635 
4636     assert(s != NULL);
4637     errno = 0;
4638     end = s + strlen(s) - 1;
4639     imflag = *end == 'j' || *end == 'J';
4640     if (s[0] == '0') {
4641         x = (long) PyOS_strtoul(s, (char **)&end, 0);
4642         if (x < 0 && errno == 0) {
4643             return PyLong_FromString(s, (char **)0, 0);
4644         }
4645     }
4646     else
4647         x = PyOS_strtol(s, (char **)&end, 0);
4648     if (*end == '\0') {
4649         if (errno != 0)
4650             return PyLong_FromString(s, (char **)0, 0);
4651         return PyLong_FromLong(x);
4652     }
4653     /* XXX Huge floats may silently fail */
4654     if (imflag) {
4655         compl.real = 0.;
4656         compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
4657         if (compl.imag == -1.0 && PyErr_Occurred())
4658             return NULL;
4659         return PyComplex_FromCComplex(compl);
4660     }
4661     else
4662     {
4663         dx = PyOS_string_to_double(s, NULL, NULL);
4664         if (dx == -1.0 && PyErr_Occurred())
4665             return NULL;
4666         return PyFloat_FromDouble(dx);
4667     }
4668 }
4669 
4670 static PyObject *
parsenumber(struct compiling * c,const char * s)4671 parsenumber(struct compiling *c, const char *s)
4672 {
4673     char *dup, *end;
4674     PyObject *res = NULL;
4675 
4676     assert(s != NULL);
4677 
4678     if (strchr(s, '_') == NULL) {
4679         return parsenumber_raw(c, s);
4680     }
4681     /* Create a duplicate without underscores. */
4682     dup = PyMem_Malloc(strlen(s) + 1);
4683     if (dup == NULL) {
4684         return PyErr_NoMemory();
4685     }
4686     end = dup;
4687     for (; *s; s++) {
4688         if (*s != '_') {
4689             *end++ = *s;
4690         }
4691     }
4692     *end = '\0';
4693     res = parsenumber_raw(c, dup);
4694     PyMem_Free(dup);
4695     return res;
4696 }
4697 
4698 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end)4699 decode_utf8(struct compiling *c, const char **sPtr, const char *end)
4700 {
4701     const char *s, *t;
4702     t = s = *sPtr;
4703     /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
4704     while (s < end && (*s & 0x80)) s++;
4705     *sPtr = s;
4706     return PyUnicode_DecodeUTF8(t, s - t, NULL);
4707 }
4708 
4709 static int
warn_invalid_escape_sequence(struct compiling * c,const node * n,unsigned char first_invalid_escape_char)4710 warn_invalid_escape_sequence(struct compiling *c, const node *n,
4711                              unsigned char first_invalid_escape_char)
4712 {
4713     PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
4714                                          first_invalid_escape_char);
4715     if (msg == NULL) {
4716         return -1;
4717     }
4718     if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
4719                                    c->c_filename, LINENO(n),
4720                                    NULL, NULL) < 0)
4721     {
4722         if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
4723             /* Replace the DeprecationWarning exception with a SyntaxError
4724                to get a more accurate error report */
4725             PyErr_Clear();
4726             ast_error(c, n, "%U", msg);
4727         }
4728         Py_DECREF(msg);
4729         return -1;
4730     }
4731     Py_DECREF(msg);
4732     return 0;
4733 }
4734 
4735 static PyObject *
decode_unicode_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4736 decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
4737                             size_t len)
4738 {
4739     PyObject *v, *u;
4740     char *buf;
4741     char *p;
4742     const char *end;
4743 
4744     /* check for integer overflow */
4745     if (len > SIZE_MAX / 6)
4746         return NULL;
4747     /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
4748        "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
4749     u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
4750     if (u == NULL)
4751         return NULL;
4752     p = buf = PyBytes_AsString(u);
4753     end = s + len;
4754     while (s < end) {
4755         if (*s == '\\') {
4756             *p++ = *s++;
4757             if (s >= end || *s & 0x80) {
4758                 strcpy(p, "u005c");
4759                 p += 5;
4760                 if (s >= end)
4761                     break;
4762             }
4763         }
4764         if (*s & 0x80) { /* XXX inefficient */
4765             PyObject *w;
4766             int kind;
4767             void *data;
4768             Py_ssize_t len, i;
4769             w = decode_utf8(c, &s, end);
4770             if (w == NULL) {
4771                 Py_DECREF(u);
4772                 return NULL;
4773             }
4774             kind = PyUnicode_KIND(w);
4775             data = PyUnicode_DATA(w);
4776             len = PyUnicode_GET_LENGTH(w);
4777             for (i = 0; i < len; i++) {
4778                 Py_UCS4 chr = PyUnicode_READ(kind, data, i);
4779                 sprintf(p, "\\U%08x", chr);
4780                 p += 10;
4781             }
4782             /* Should be impossible to overflow */
4783             assert(p - buf <= PyBytes_GET_SIZE(u));
4784             Py_DECREF(w);
4785         } else {
4786             *p++ = *s++;
4787         }
4788     }
4789     len = p - buf;
4790     s = buf;
4791 
4792     const char *first_invalid_escape;
4793     v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
4794 
4795     if (v != NULL && first_invalid_escape != NULL) {
4796         if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4797             /* We have not decref u before because first_invalid_escape points
4798                inside u. */
4799             Py_XDECREF(u);
4800             Py_DECREF(v);
4801             return NULL;
4802         }
4803     }
4804     Py_XDECREF(u);
4805     return v;
4806 }
4807 
4808 static PyObject *
decode_bytes_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4809 decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
4810                           size_t len)
4811 {
4812     const char *first_invalid_escape;
4813     PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
4814                                              &first_invalid_escape);
4815     if (result == NULL)
4816         return NULL;
4817 
4818     if (first_invalid_escape != NULL) {
4819         if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4820             Py_DECREF(result);
4821             return NULL;
4822         }
4823     }
4824     return result;
4825 }
4826 
4827 /* Shift locations for the given node and all its children by adding `lineno`
4828    and `col_offset` to existing locations. */
fstring_shift_node_locations(node * n,int lineno,int col_offset)4829 static void fstring_shift_node_locations(node *n, int lineno, int col_offset)
4830 {
4831     n->n_col_offset = n->n_col_offset + col_offset;
4832     n->n_end_col_offset = n->n_end_col_offset + col_offset;
4833     for (int i = 0; i < NCH(n); ++i) {
4834         if (n->n_lineno && n->n_lineno < CHILD(n, i)->n_lineno) {
4835             /* Shifting column offsets unnecessary if there's been newlines. */
4836             col_offset = 0;
4837         }
4838         fstring_shift_node_locations(CHILD(n, i), lineno, col_offset);
4839     }
4840     n->n_lineno = n->n_lineno + lineno;
4841     n->n_end_lineno = n->n_end_lineno + lineno;
4842 }
4843 
4844 /* Fix locations for the given node and its children.
4845 
4846    `parent` is the enclosing node.
4847    `n` is the node which locations are going to be fixed relative to parent.
4848    `expr_str` is the child node's string representation, including braces.
4849 */
4850 static void
fstring_fix_node_location(const node * parent,node * n,char * expr_str)4851 fstring_fix_node_location(const node *parent, node *n, char *expr_str)
4852 {
4853     char *substr = NULL;
4854     char *start;
4855     int lines = LINENO(parent) - 1;
4856     int cols = parent->n_col_offset;
4857     /* Find the full fstring to fix location information in `n`. */
4858     while (parent && parent->n_type != STRING)
4859         parent = parent->n_child;
4860     if (parent && parent->n_str) {
4861         substr = strstr(parent->n_str, expr_str);
4862         if (substr) {
4863             start = substr;
4864             while (start > parent->n_str) {
4865                 if (start[0] == '\n')
4866                     break;
4867                 start--;
4868             }
4869             cols += (int)(substr - start);
4870             /* adjust the start based on the number of newlines encountered
4871                before the f-string expression */
4872             for (char* p = parent->n_str; p < substr; p++) {
4873                 if (*p == '\n') {
4874                     lines++;
4875                 }
4876             }
4877         }
4878     }
4879     fstring_shift_node_locations(n, lines, cols);
4880 }
4881 
4882 /* Compile this expression in to an expr_ty.  Add parens around the
4883    expression, in order to allow leading spaces in the expression. */
4884 static expr_ty
fstring_compile_expr(const char * expr_start,const char * expr_end,struct compiling * c,const node * n)4885 fstring_compile_expr(const char *expr_start, const char *expr_end,
4886                      struct compiling *c, const node *n)
4887 
4888 {
4889     node *mod_n;
4890     mod_ty mod;
4891     char *str;
4892     Py_ssize_t len;
4893     const char *s;
4894 
4895     assert(expr_end >= expr_start);
4896     assert(*(expr_start-1) == '{');
4897     assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':' ||
4898            *expr_end == '=');
4899 
4900     /* If the substring is all whitespace, it's an error.  We need to catch this
4901        here, and not when we call PyParser_SimpleParseStringFlagsFilename,
4902        because turning the expression '' in to '()' would go from being invalid
4903        to valid. */
4904     for (s = expr_start; s != expr_end; s++) {
4905         char c = *s;
4906         /* The Python parser ignores only the following whitespace
4907            characters (\r already is converted to \n). */
4908         if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) {
4909             break;
4910         }
4911     }
4912     if (s == expr_end) {
4913         ast_error(c, n, "f-string: empty expression not allowed");
4914         return NULL;
4915     }
4916 
4917     len = expr_end - expr_start;
4918     /* Allocate 3 extra bytes: open paren, close paren, null byte. */
4919     str = PyMem_Malloc(len + 3);
4920     if (str == NULL) {
4921         PyErr_NoMemory();
4922         return NULL;
4923     }
4924 
4925     str[0] = '(';
4926     memcpy(str+1, expr_start, len);
4927     str[len+1] = ')';
4928     str[len+2] = 0;
4929 
4930     PyCompilerFlags cf = _PyCompilerFlags_INIT;
4931     cf.cf_flags = PyCF_ONLY_AST;
4932     mod_n = PyParser_SimpleParseStringFlagsFilename(str, "<fstring>",
4933                                                     Py_eval_input, 0);
4934     if (!mod_n) {
4935         PyMem_Free(str);
4936         return NULL;
4937     }
4938     /* Reuse str to find the correct column offset. */
4939     str[0] = '{';
4940     str[len+1] = '}';
4941     fstring_fix_node_location(n, mod_n, str);
4942     mod = PyAST_FromNode(mod_n, &cf, "<fstring>", c->c_arena);
4943     PyMem_Free(str);
4944     PyNode_Free(mod_n);
4945     if (!mod)
4946         return NULL;
4947     return mod->v.Expression.body;
4948 }
4949 
4950 /* Return -1 on error.
4951 
4952    Return 0 if we reached the end of the literal.
4953 
4954    Return 1 if we haven't reached the end of the literal, but we want
4955    the caller to process the literal up to this point. Used for
4956    doubled braces.
4957 */
4958 static int
fstring_find_literal(const char ** str,const char * end,int raw,PyObject ** literal,int recurse_lvl,struct compiling * c,const node * n)4959 fstring_find_literal(const char **str, const char *end, int raw,
4960                      PyObject **literal, int recurse_lvl,
4961                      struct compiling *c, const node *n)
4962 {
4963     /* Get any literal string. It ends when we hit an un-doubled left
4964        brace (which isn't part of a unicode name escape such as
4965        "\N{EULER CONSTANT}"), or the end of the string. */
4966 
4967     const char *s = *str;
4968     const char *literal_start = s;
4969     int result = 0;
4970 
4971     assert(*literal == NULL);
4972     while (s < end) {
4973         char ch = *s++;
4974         if (!raw && ch == '\\' && s < end) {
4975             ch = *s++;
4976             if (ch == 'N') {
4977                 if (s < end && *s++ == '{') {
4978                     while (s < end && *s++ != '}') {
4979                     }
4980                     continue;
4981                 }
4982                 break;
4983             }
4984             if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
4985                 return -1;
4986             }
4987         }
4988         if (ch == '{' || ch == '}') {
4989             /* Check for doubled braces, but only at the top level. If
4990                we checked at every level, then f'{0:{3}}' would fail
4991                with the two closing braces. */
4992             if (recurse_lvl == 0) {
4993                 if (s < end && *s == ch) {
4994                     /* We're going to tell the caller that the literal ends
4995                        here, but that they should continue scanning. But also
4996                        skip over the second brace when we resume scanning. */
4997                     *str = s + 1;
4998                     result = 1;
4999                     goto done;
5000                 }
5001 
5002                 /* Where a single '{' is the start of a new expression, a
5003                    single '}' is not allowed. */
5004                 if (ch == '}') {
5005                     *str = s - 1;
5006                     ast_error(c, n, "f-string: single '}' is not allowed");
5007                     return -1;
5008                 }
5009             }
5010             /* We're either at a '{', which means we're starting another
5011                expression; or a '}', which means we're at the end of this
5012                f-string (for a nested format_spec). */
5013             s--;
5014             break;
5015         }
5016     }
5017     *str = s;
5018     assert(s <= end);
5019     assert(s == end || *s == '{' || *s == '}');
5020 done:
5021     if (literal_start != s) {
5022         if (raw)
5023             *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
5024                                                     s - literal_start,
5025                                                     NULL, NULL);
5026         else
5027             *literal = decode_unicode_with_escapes(c, n, literal_start,
5028                                                    s - literal_start);
5029         if (!*literal)
5030             return -1;
5031     }
5032     return result;
5033 }
5034 
5035 /* Forward declaration because parsing is recursive. */
5036 static expr_ty
5037 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
5038               struct compiling *c, const node *n);
5039 
5040 /* Parse the f-string at *str, ending at end.  We know *str starts an
5041    expression (so it must be a '{'). Returns the FormattedValue node, which
5042    includes the expression, conversion character, format_spec expression, and
5043    optionally the text of the expression (if = is used).
5044 
5045    Note that I don't do a perfect job here: I don't make sure that a
5046    closing brace doesn't match an opening paren, for example. It
5047    doesn't need to error on all invalid expressions, just correctly
5048    find the end of all valid ones. Any errors inside the expression
5049    will be caught when we parse it later.
5050 
5051    *expression is set to the expression.  For an '=' "debug" expression,
5052    *expr_text is set to the debug text (the original text of the expression,
5053    including the '=' and any whitespace around it, as a string object).  If
5054    not a debug expression, *expr_text set to NULL. */
5055 static int
fstring_find_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** expr_text,expr_ty * expression,struct compiling * c,const node * n)5056 fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
5057                   PyObject **expr_text, expr_ty *expression,
5058                   struct compiling *c, const node *n)
5059 {
5060     /* Return -1 on error, else 0. */
5061 
5062     const char *expr_start;
5063     const char *expr_end;
5064     expr_ty simple_expression;
5065     expr_ty format_spec = NULL; /* Optional format specifier. */
5066     int conversion = -1; /* The conversion char.  Use default if not
5067                             specified, or !r if using = and no format
5068                             spec. */
5069 
5070     /* 0 if we're not in a string, else the quote char we're trying to
5071        match (single or double quote). */
5072     char quote_char = 0;
5073 
5074     /* If we're inside a string, 1=normal, 3=triple-quoted. */
5075     int string_type = 0;
5076 
5077     /* Keep track of nesting level for braces/parens/brackets in
5078        expressions. */
5079     Py_ssize_t nested_depth = 0;
5080     char parenstack[MAXLEVEL];
5081 
5082     *expr_text = NULL;
5083 
5084     /* Can only nest one level deep. */
5085     if (recurse_lvl >= 2) {
5086         ast_error(c, n, "f-string: expressions nested too deeply");
5087         goto error;
5088     }
5089 
5090     /* The first char must be a left brace, or we wouldn't have gotten
5091        here. Skip over it. */
5092     assert(**str == '{');
5093     *str += 1;
5094 
5095     expr_start = *str;
5096     for (; *str < end; (*str)++) {
5097         char ch;
5098 
5099         /* Loop invariants. */
5100         assert(nested_depth >= 0);
5101         assert(*str >= expr_start && *str < end);
5102         if (quote_char)
5103             assert(string_type == 1 || string_type == 3);
5104         else
5105             assert(string_type == 0);
5106 
5107         ch = **str;
5108         /* Nowhere inside an expression is a backslash allowed. */
5109         if (ch == '\\') {
5110             /* Error: can't include a backslash character, inside
5111                parens or strings or not. */
5112             ast_error(c, n,
5113                       "f-string expression part "
5114                       "cannot include a backslash");
5115             goto error;
5116         }
5117         if (quote_char) {
5118             /* We're inside a string. See if we're at the end. */
5119             /* This code needs to implement the same non-error logic
5120                as tok_get from tokenizer.c, at the letter_quote
5121                label. To actually share that code would be a
5122                nightmare. But, it's unlikely to change and is small,
5123                so duplicate it here. Note we don't need to catch all
5124                of the errors, since they'll be caught when parsing the
5125                expression. We just need to match the non-error
5126                cases. Thus we can ignore \n in single-quoted strings,
5127                for example. Or non-terminated strings. */
5128             if (ch == quote_char) {
5129                 /* Does this match the string_type (single or triple
5130                    quoted)? */
5131                 if (string_type == 3) {
5132                     if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
5133                         /* We're at the end of a triple quoted string. */
5134                         *str += 2;
5135                         string_type = 0;
5136                         quote_char = 0;
5137                         continue;
5138                     }
5139                 } else {
5140                     /* We're at the end of a normal string. */
5141                     quote_char = 0;
5142                     string_type = 0;
5143                     continue;
5144                 }
5145             }
5146         } else if (ch == '\'' || ch == '"') {
5147             /* Is this a triple quoted string? */
5148             if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
5149                 string_type = 3;
5150                 *str += 2;
5151             } else {
5152                 /* Start of a normal string. */
5153                 string_type = 1;
5154             }
5155             /* Start looking for the end of the string. */
5156             quote_char = ch;
5157         } else if (ch == '[' || ch == '{' || ch == '(') {
5158             if (nested_depth >= MAXLEVEL) {
5159                 ast_error(c, n, "f-string: too many nested parenthesis");
5160                 goto error;
5161             }
5162             parenstack[nested_depth] = ch;
5163             nested_depth++;
5164         } else if (ch == '#') {
5165             /* Error: can't include a comment character, inside parens
5166                or not. */
5167             ast_error(c, n, "f-string expression part cannot include '#'");
5168             goto error;
5169         } else if (nested_depth == 0 &&
5170                    (ch == '!' || ch == ':' || ch == '}' ||
5171                     ch == '=' || ch == '>' || ch == '<')) {
5172             /* See if there's a next character. */
5173             if (*str+1 < end) {
5174                 char next = *(*str+1);
5175 
5176                 /* For "!=". since '=' is not an allowed conversion character,
5177                    nothing is lost in this test. */
5178                 if ((ch == '!' && next == '=') ||   /* != */
5179                     (ch == '=' && next == '=') ||   /* == */
5180                     (ch == '<' && next == '=') ||   /* <= */
5181                     (ch == '>' && next == '=')      /* >= */
5182                     ) {
5183                     *str += 1;
5184                     continue;
5185                 }
5186                 /* Don't get out of the loop for these, if they're single
5187                    chars (not part of 2-char tokens). If by themselves, they
5188                    don't end an expression (unlike say '!'). */
5189                 if (ch == '>' || ch == '<') {
5190                     continue;
5191                 }
5192             }
5193 
5194             /* Normal way out of this loop. */
5195             break;
5196         } else if (ch == ']' || ch == '}' || ch == ')') {
5197             if (!nested_depth) {
5198                 ast_error(c, n, "f-string: unmatched '%c'", ch);
5199                 goto error;
5200             }
5201             nested_depth--;
5202             int opening = parenstack[nested_depth];
5203             if (!((opening == '(' && ch == ')') ||
5204                   (opening == '[' && ch == ']') ||
5205                   (opening == '{' && ch == '}')))
5206             {
5207                 ast_error(c, n,
5208                           "f-string: closing parenthesis '%c' "
5209                           "does not match opening parenthesis '%c'",
5210                           ch, opening);
5211                 goto error;
5212             }
5213         } else {
5214             /* Just consume this char and loop around. */
5215         }
5216     }
5217     expr_end = *str;
5218     /* If we leave this loop in a string or with mismatched parens, we
5219        don't care. We'll get a syntax error when compiling the
5220        expression. But, we can produce a better error message, so
5221        let's just do that.*/
5222     if (quote_char) {
5223         ast_error(c, n, "f-string: unterminated string");
5224         goto error;
5225     }
5226     if (nested_depth) {
5227         int opening = parenstack[nested_depth - 1];
5228         ast_error(c, n, "f-string: unmatched '%c'", opening);
5229         goto error;
5230     }
5231 
5232     if (*str >= end)
5233         goto unexpected_end_of_string;
5234 
5235     /* Compile the expression as soon as possible, so we show errors
5236        related to the expression before errors related to the
5237        conversion or format_spec. */
5238     simple_expression = fstring_compile_expr(expr_start, expr_end, c, n);
5239     if (!simple_expression)
5240         goto error;
5241 
5242     /* Check for =, which puts the text value of the expression in
5243        expr_text. */
5244     if (**str == '=') {
5245         if (c->c_feature_version < 8) {
5246             ast_error(c, n,
5247                       "f-string: self documenting expressions are "
5248                       "only supported in Python 3.8 and greater");
5249             goto error;
5250         }
5251         *str += 1;
5252 
5253         /* Skip over ASCII whitespace.  No need to test for end of string
5254            here, since we know there's at least a trailing quote somewhere
5255            ahead. */
5256         while (Py_ISSPACE(**str)) {
5257             *str += 1;
5258         }
5259 
5260         /* Set *expr_text to the text of the expression. */
5261         *expr_text = PyUnicode_FromStringAndSize(expr_start, *str-expr_start);
5262         if (!*expr_text) {
5263             goto error;
5264         }
5265     }
5266 
5267     /* Check for a conversion char, if present. */
5268     if (**str == '!') {
5269         *str += 1;
5270         if (*str >= end)
5271             goto unexpected_end_of_string;
5272 
5273         conversion = **str;
5274         *str += 1;
5275 
5276         /* Validate the conversion. */
5277         if (!(conversion == 's' || conversion == 'r' || conversion == 'a')) {
5278             ast_error(c, n,
5279                       "f-string: invalid conversion character: "
5280                       "expected 's', 'r', or 'a'");
5281             goto error;
5282         }
5283 
5284     }
5285 
5286     /* Check for the format spec, if present. */
5287     if (*str >= end)
5288         goto unexpected_end_of_string;
5289     if (**str == ':') {
5290         *str += 1;
5291         if (*str >= end)
5292             goto unexpected_end_of_string;
5293 
5294         /* Parse the format spec. */
5295         format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n);
5296         if (!format_spec)
5297             goto error;
5298     }
5299 
5300     if (*str >= end || **str != '}')
5301         goto unexpected_end_of_string;
5302 
5303     /* We're at a right brace. Consume it. */
5304     assert(*str < end);
5305     assert(**str == '}');
5306     *str += 1;
5307 
5308     /* If we're in = mode (detected by non-NULL expr_text), and have no format
5309        spec and no explict conversion, set the conversion to 'r'. */
5310     if (*expr_text && format_spec == NULL && conversion == -1) {
5311         conversion = 'r';
5312     }
5313 
5314     /* And now create the FormattedValue node that represents this
5315        entire expression with the conversion and format spec. */
5316     *expression = FormattedValue(simple_expression, conversion,
5317                                  format_spec, LINENO(n),
5318                                  n->n_col_offset, n->n_end_lineno,
5319                                  n->n_end_col_offset, c->c_arena);
5320     if (!*expression)
5321         goto error;
5322 
5323     return 0;
5324 
5325 unexpected_end_of_string:
5326     ast_error(c, n, "f-string: expecting '}'");
5327     /* Falls through to error. */
5328 
5329 error:
5330     Py_XDECREF(*expr_text);
5331     return -1;
5332 
5333 }
5334 
5335 /* Return -1 on error.
5336 
5337    Return 0 if we have a literal (possible zero length) and an
5338    expression (zero length if at the end of the string.
5339 
5340    Return 1 if we have a literal, but no expression, and we want the
5341    caller to call us again. This is used to deal with doubled
5342    braces.
5343 
5344    When called multiple times on the string 'a{{b{0}c', this function
5345    will return:
5346 
5347    1. the literal 'a{' with no expression, and a return value
5348       of 1. Despite the fact that there's no expression, the return
5349       value of 1 means we're not finished yet.
5350 
5351    2. the literal 'b' and the expression '0', with a return value of
5352       0. The fact that there's an expression means we're not finished.
5353 
5354    3. literal 'c' with no expression and a return value of 0. The
5355       combination of the return value of 0 with no expression means
5356       we're finished.
5357 */
5358 static int
fstring_find_literal_and_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** literal,PyObject ** expr_text,expr_ty * expression,struct compiling * c,const node * n)5359 fstring_find_literal_and_expr(const char **str, const char *end, int raw,
5360                               int recurse_lvl, PyObject **literal,
5361                               PyObject **expr_text, expr_ty *expression,
5362                               struct compiling *c, const node *n)
5363 {
5364     int result;
5365 
5366     assert(*literal == NULL && *expression == NULL);
5367 
5368     /* Get any literal string. */
5369     result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n);
5370     if (result < 0)
5371         goto error;
5372 
5373     assert(result == 0 || result == 1);
5374 
5375     if (result == 1)
5376         /* We have a literal, but don't look at the expression. */
5377         return 1;
5378 
5379     if (*str >= end || **str == '}')
5380         /* We're at the end of the string or the end of a nested
5381            f-string: no expression. The top-level error case where we
5382            expect to be at the end of the string but we're at a '}' is
5383            handled later. */
5384         return 0;
5385 
5386     /* We must now be the start of an expression, on a '{'. */
5387     assert(**str == '{');
5388 
5389     if (fstring_find_expr(str, end, raw, recurse_lvl, expr_text,
5390                           expression, c, n) < 0)
5391         goto error;
5392 
5393     return 0;
5394 
5395 error:
5396     Py_CLEAR(*literal);
5397     return -1;
5398 }
5399 
5400 #define EXPRLIST_N_CACHED  64
5401 
5402 typedef struct {
5403     /* Incrementally build an array of expr_ty, so be used in an
5404        asdl_seq. Cache some small but reasonably sized number of
5405        expr_ty's, and then after that start dynamically allocating,
5406        doubling the number allocated each time. Note that the f-string
5407        f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
5408        Constant for the literal 'a'. So you add expr_ty's about twice as
5409        fast as you add expressions in an f-string. */
5410 
5411     Py_ssize_t allocated;  /* Number we've allocated. */
5412     Py_ssize_t size;       /* Number we've used. */
5413     expr_ty    *p;         /* Pointer to the memory we're actually
5414                               using. Will point to 'data' until we
5415                               start dynamically allocating. */
5416     expr_ty    data[EXPRLIST_N_CACHED];
5417 } ExprList;
5418 
5419 #ifdef NDEBUG
5420 #define ExprList_check_invariants(l)
5421 #else
5422 static void
ExprList_check_invariants(ExprList * l)5423 ExprList_check_invariants(ExprList *l)
5424 {
5425     /* Check our invariants. Make sure this object is "live", and
5426        hasn't been deallocated. */
5427     assert(l->size >= 0);
5428     assert(l->p != NULL);
5429     if (l->size <= EXPRLIST_N_CACHED)
5430         assert(l->data == l->p);
5431 }
5432 #endif
5433 
5434 static void
ExprList_Init(ExprList * l)5435 ExprList_Init(ExprList *l)
5436 {
5437     l->allocated = EXPRLIST_N_CACHED;
5438     l->size = 0;
5439 
5440     /* Until we start allocating dynamically, p points to data. */
5441     l->p = l->data;
5442 
5443     ExprList_check_invariants(l);
5444 }
5445 
5446 static int
ExprList_Append(ExprList * l,expr_ty exp)5447 ExprList_Append(ExprList *l, expr_ty exp)
5448 {
5449     ExprList_check_invariants(l);
5450     if (l->size >= l->allocated) {
5451         /* We need to alloc (or realloc) the memory. */
5452         Py_ssize_t new_size = l->allocated * 2;
5453 
5454         /* See if we've ever allocated anything dynamically. */
5455         if (l->p == l->data) {
5456             Py_ssize_t i;
5457             /* We're still using the cached data. Switch to
5458                alloc-ing. */
5459             l->p = PyMem_Malloc(sizeof(expr_ty) * new_size);
5460             if (!l->p)
5461                 return -1;
5462             /* Copy the cached data into the new buffer. */
5463             for (i = 0; i < l->size; i++)
5464                 l->p[i] = l->data[i];
5465         } else {
5466             /* Just realloc. */
5467             expr_ty *tmp = PyMem_Realloc(l->p, sizeof(expr_ty) * new_size);
5468             if (!tmp) {
5469                 PyMem_Free(l->p);
5470                 l->p = NULL;
5471                 return -1;
5472             }
5473             l->p = tmp;
5474         }
5475 
5476         l->allocated = new_size;
5477         assert(l->allocated == 2 * l->size);
5478     }
5479 
5480     l->p[l->size++] = exp;
5481 
5482     ExprList_check_invariants(l);
5483     return 0;
5484 }
5485 
5486 static void
ExprList_Dealloc(ExprList * l)5487 ExprList_Dealloc(ExprList *l)
5488 {
5489     ExprList_check_invariants(l);
5490 
5491     /* If there's been an error, or we've never dynamically allocated,
5492        do nothing. */
5493     if (!l->p || l->p == l->data) {
5494         /* Do nothing. */
5495     } else {
5496         /* We have dynamically allocated. Free the memory. */
5497         PyMem_Free(l->p);
5498     }
5499     l->p = NULL;
5500     l->size = -1;
5501 }
5502 
5503 static asdl_seq *
ExprList_Finish(ExprList * l,PyArena * arena)5504 ExprList_Finish(ExprList *l, PyArena *arena)
5505 {
5506     asdl_seq *seq;
5507 
5508     ExprList_check_invariants(l);
5509 
5510     /* Allocate the asdl_seq and copy the expressions in to it. */
5511     seq = _Py_asdl_seq_new(l->size, arena);
5512     if (seq) {
5513         Py_ssize_t i;
5514         for (i = 0; i < l->size; i++)
5515             asdl_seq_SET(seq, i, l->p[i]);
5516     }
5517     ExprList_Dealloc(l);
5518     return seq;
5519 }
5520 
5521 /* The FstringParser is designed to add a mix of strings and
5522    f-strings, and concat them together as needed. Ultimately, it
5523    generates an expr_ty. */
5524 typedef struct {
5525     PyObject *last_str;
5526     ExprList expr_list;
5527     int fmode;
5528 } FstringParser;
5529 
5530 #ifdef NDEBUG
5531 #define FstringParser_check_invariants(state)
5532 #else
5533 static void
FstringParser_check_invariants(FstringParser * state)5534 FstringParser_check_invariants(FstringParser *state)
5535 {
5536     if (state->last_str)
5537         assert(PyUnicode_CheckExact(state->last_str));
5538     ExprList_check_invariants(&state->expr_list);
5539 }
5540 #endif
5541 
5542 static void
FstringParser_Init(FstringParser * state)5543 FstringParser_Init(FstringParser *state)
5544 {
5545     state->last_str = NULL;
5546     state->fmode = 0;
5547     ExprList_Init(&state->expr_list);
5548     FstringParser_check_invariants(state);
5549 }
5550 
5551 static void
FstringParser_Dealloc(FstringParser * state)5552 FstringParser_Dealloc(FstringParser *state)
5553 {
5554     FstringParser_check_invariants(state);
5555 
5556     Py_XDECREF(state->last_str);
5557     ExprList_Dealloc(&state->expr_list);
5558 }
5559 
5560 /* Constants for the following */
5561 static PyObject *u_kind;
5562 
5563 /* Compute 'kind' field for string Constant (either 'u' or None) */
5564 static PyObject *
make_kind(struct compiling * c,const node * n)5565 make_kind(struct compiling *c, const node *n)
5566 {
5567     char *s = NULL;
5568     PyObject *kind = NULL;
5569 
5570     /* Find the first string literal, if any */
5571     while (TYPE(n) != STRING) {
5572         if (NCH(n) == 0)
5573             return NULL;
5574         n = CHILD(n, 0);
5575     }
5576     REQ(n, STRING);
5577 
5578     /* If it starts with 'u', return a PyUnicode "u" string */
5579     s = STR(n);
5580     if (s && *s == 'u') {
5581         if (!u_kind) {
5582             u_kind = PyUnicode_InternFromString("u");
5583             if (!u_kind)
5584                 return NULL;
5585         }
5586         kind = u_kind;
5587         if (PyArena_AddPyObject(c->c_arena, kind) < 0) {
5588             return NULL;
5589         }
5590         Py_INCREF(kind);
5591     }
5592     return kind;
5593 }
5594 
5595 /* Make a Constant node, but decref the PyUnicode object being added. */
5596 static expr_ty
make_str_node_and_del(PyObject ** str,struct compiling * c,const node * n)5597 make_str_node_and_del(PyObject **str, struct compiling *c, const node* n)
5598 {
5599     PyObject *s = *str;
5600     PyObject *kind = NULL;
5601     *str = NULL;
5602     assert(PyUnicode_CheckExact(s));
5603     if (PyArena_AddPyObject(c->c_arena, s) < 0) {
5604         Py_DECREF(s);
5605         return NULL;
5606     }
5607     kind = make_kind(c, n);
5608     if (kind == NULL && PyErr_Occurred())
5609         return NULL;
5610     return Constant(s, kind, LINENO(n), n->n_col_offset,
5611                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5612 }
5613 
5614 /* Add a non-f-string (that is, a regular literal string). str is
5615    decref'd. */
5616 static int
FstringParser_ConcatAndDel(FstringParser * state,PyObject * str)5617 FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
5618 {
5619     FstringParser_check_invariants(state);
5620 
5621     assert(PyUnicode_CheckExact(str));
5622 
5623     if (PyUnicode_GET_LENGTH(str) == 0) {
5624         Py_DECREF(str);
5625         return 0;
5626     }
5627 
5628     if (!state->last_str) {
5629         /* We didn't have a string before, so just remember this one. */
5630         state->last_str = str;
5631     } else {
5632         /* Concatenate this with the previous string. */
5633         PyUnicode_AppendAndDel(&state->last_str, str);
5634         if (!state->last_str)
5635             return -1;
5636     }
5637     FstringParser_check_invariants(state);
5638     return 0;
5639 }
5640 
5641 /* Parse an f-string. The f-string is in *str to end, with no
5642    'f' or quotes. */
5643 static int
FstringParser_ConcatFstring(FstringParser * state,const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5644 FstringParser_ConcatFstring(FstringParser *state, const char **str,
5645                             const char *end, int raw, int recurse_lvl,
5646                             struct compiling *c, const node *n)
5647 {
5648     FstringParser_check_invariants(state);
5649     state->fmode = 1;
5650 
5651     /* Parse the f-string. */
5652     while (1) {
5653         PyObject *literal = NULL;
5654         PyObject *expr_text = NULL;
5655         expr_ty expression = NULL;
5656 
5657         /* If there's a zero length literal in front of the
5658            expression, literal will be NULL. If we're at the end of
5659            the f-string, expression will be NULL (unless result == 1,
5660            see below). */
5661         int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
5662                                                    &literal, &expr_text,
5663                                                    &expression, c, n);
5664         if (result < 0)
5665             return -1;
5666 
5667         /* Add the literal, if any. */
5668         if (literal && FstringParser_ConcatAndDel(state, literal) < 0) {
5669             Py_XDECREF(expr_text);
5670             return -1;
5671         }
5672         /* Add the expr_text, if any. */
5673         if (expr_text && FstringParser_ConcatAndDel(state, expr_text) < 0) {
5674             return -1;
5675         }
5676 
5677         /* We've dealt with the literal and expr_text, their ownership has
5678            been transferred to the state object.  Don't look at them again. */
5679 
5680         /* See if we should just loop around to get the next literal
5681            and expression, while ignoring the expression this
5682            time. This is used for un-doubling braces, as an
5683            optimization. */
5684         if (result == 1)
5685             continue;
5686 
5687         if (!expression)
5688             /* We're done with this f-string. */
5689             break;
5690 
5691         /* We know we have an expression. Convert any existing string
5692            to a Constant node. */
5693         if (!state->last_str) {
5694             /* Do nothing. No previous literal. */
5695         } else {
5696             /* Convert the existing last_str literal to a Constant node. */
5697             expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5698             if (!str || ExprList_Append(&state->expr_list, str) < 0)
5699                 return -1;
5700         }
5701 
5702         if (ExprList_Append(&state->expr_list, expression) < 0)
5703             return -1;
5704     }
5705 
5706     /* If recurse_lvl is zero, then we must be at the end of the
5707        string. Otherwise, we must be at a right brace. */
5708 
5709     if (recurse_lvl == 0 && *str < end-1) {
5710         ast_error(c, n, "f-string: unexpected end of string");
5711         return -1;
5712     }
5713     if (recurse_lvl != 0 && **str != '}') {
5714         ast_error(c, n, "f-string: expecting '}'");
5715         return -1;
5716     }
5717 
5718     FstringParser_check_invariants(state);
5719     return 0;
5720 }
5721 
5722 /* Convert the partial state reflected in last_str and expr_list to an
5723    expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
5724 static expr_ty
FstringParser_Finish(FstringParser * state,struct compiling * c,const node * n)5725 FstringParser_Finish(FstringParser *state, struct compiling *c,
5726                      const node *n)
5727 {
5728     asdl_seq *seq;
5729 
5730     FstringParser_check_invariants(state);
5731 
5732     /* If we're just a constant string with no expressions, return
5733        that. */
5734     if (!state->fmode) {
5735         assert(!state->expr_list.size);
5736         if (!state->last_str) {
5737             /* Create a zero length string. */
5738             state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
5739             if (!state->last_str)
5740                 goto error;
5741         }
5742         return make_str_node_and_del(&state->last_str, c, n);
5743     }
5744 
5745     /* Create a Constant node out of last_str, if needed. It will be the
5746        last node in our expression list. */
5747     if (state->last_str) {
5748         expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5749         if (!str || ExprList_Append(&state->expr_list, str) < 0)
5750             goto error;
5751     }
5752     /* This has already been freed. */
5753     assert(state->last_str == NULL);
5754 
5755     seq = ExprList_Finish(&state->expr_list, c->c_arena);
5756     if (!seq)
5757         goto error;
5758 
5759     return JoinedStr(seq, LINENO(n), n->n_col_offset,
5760                      n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5761 
5762 error:
5763     FstringParser_Dealloc(state);
5764     return NULL;
5765 }
5766 
5767 /* Given an f-string (with no 'f' or quotes) that's in *str and ends
5768    at end, parse it into an expr_ty.  Return NULL on error.  Adjust
5769    str to point past the parsed portion. */
5770 static expr_ty
fstring_parse(const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5771 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
5772               struct compiling *c, const node *n)
5773 {
5774     FstringParser state;
5775 
5776     FstringParser_Init(&state);
5777     if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl,
5778                                     c, n) < 0) {
5779         FstringParser_Dealloc(&state);
5780         return NULL;
5781     }
5782 
5783     return FstringParser_Finish(&state, c, n);
5784 }
5785 
5786 /* n is a Python string literal, including the bracketing quote
5787    characters, and r, b, u, &/or f prefixes (if any), and embedded
5788    escape sequences (if any). parsestr parses it, and sets *result to
5789    decoded Python string object.  If the string is an f-string, set
5790    *fstr and *fstrlen to the unparsed string object.  Return 0 if no
5791    errors occurred.
5792 */
5793 static int
parsestr(struct compiling * c,const node * n,int * bytesmode,int * rawmode,PyObject ** result,const char ** fstr,Py_ssize_t * fstrlen)5794 parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
5795          PyObject **result, const char **fstr, Py_ssize_t *fstrlen)
5796 {
5797     size_t len;
5798     const char *s = STR(n);
5799     int quote = Py_CHARMASK(*s);
5800     int fmode = 0;
5801     *bytesmode = 0;
5802     *rawmode = 0;
5803     *result = NULL;
5804     *fstr = NULL;
5805     if (Py_ISALPHA(quote)) {
5806         while (!*bytesmode || !*rawmode) {
5807             if (quote == 'b' || quote == 'B') {
5808                 quote = *++s;
5809                 *bytesmode = 1;
5810             }
5811             else if (quote == 'u' || quote == 'U') {
5812                 quote = *++s;
5813             }
5814             else if (quote == 'r' || quote == 'R') {
5815                 quote = *++s;
5816                 *rawmode = 1;
5817             }
5818             else if (quote == 'f' || quote == 'F') {
5819                 quote = *++s;
5820                 fmode = 1;
5821             }
5822             else {
5823                 break;
5824             }
5825         }
5826     }
5827 
5828     /* fstrings are only allowed in Python 3.6 and greater */
5829     if (fmode && c->c_feature_version < 6) {
5830         ast_error(c, n, "Format strings are only supported in Python 3.6 and greater");
5831         return -1;
5832     }
5833 
5834     if (fmode && *bytesmode) {
5835         PyErr_BadInternalCall();
5836         return -1;
5837     }
5838     if (quote != '\'' && quote != '\"') {
5839         PyErr_BadInternalCall();
5840         return -1;
5841     }
5842     /* Skip the leading quote char. */
5843     s++;
5844     len = strlen(s);
5845     if (len > INT_MAX) {
5846         PyErr_SetString(PyExc_OverflowError,
5847                         "string to parse is too long");
5848         return -1;
5849     }
5850     if (s[--len] != quote) {
5851         /* Last quote char must match the first. */
5852         PyErr_BadInternalCall();
5853         return -1;
5854     }
5855     if (len >= 4 && s[0] == quote && s[1] == quote) {
5856         /* A triple quoted string. We've already skipped one quote at
5857            the start and one at the end of the string. Now skip the
5858            two at the start. */
5859         s += 2;
5860         len -= 2;
5861         /* And check that the last two match. */
5862         if (s[--len] != quote || s[--len] != quote) {
5863             PyErr_BadInternalCall();
5864             return -1;
5865         }
5866     }
5867 
5868     if (fmode) {
5869         /* Just return the bytes. The caller will parse the resulting
5870            string. */
5871         *fstr = s;
5872         *fstrlen = len;
5873         return 0;
5874     }
5875 
5876     /* Not an f-string. */
5877     /* Avoid invoking escape decoding routines if possible. */
5878     *rawmode = *rawmode || strchr(s, '\\') == NULL;
5879     if (*bytesmode) {
5880         /* Disallow non-ASCII characters. */
5881         const char *ch;
5882         for (ch = s; *ch; ch++) {
5883             if (Py_CHARMASK(*ch) >= 0x80) {
5884                 ast_error(c, n,
5885                           "bytes can only contain ASCII "
5886                           "literal characters.");
5887                 return -1;
5888             }
5889         }
5890         if (*rawmode)
5891             *result = PyBytes_FromStringAndSize(s, len);
5892         else
5893             *result = decode_bytes_with_escapes(c, n, s, len);
5894     } else {
5895         if (*rawmode)
5896             *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
5897         else
5898             *result = decode_unicode_with_escapes(c, n, s, len);
5899     }
5900     return *result == NULL ? -1 : 0;
5901 }
5902 
5903 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through
5904    each STRING atom, and process it as needed. For bytes, just
5905    concatenate them together, and the result will be a Constant node. For
5906    normal strings and f-strings, concatenate them together. The result
5907    will be a Constant node if there were no f-strings; a FormattedValue
5908    node if there's just an f-string (with no leading or trailing
5909    literals), or a JoinedStr node if there are multiple f-strings or
5910    any literals involved. */
5911 static expr_ty
parsestrplus(struct compiling * c,const node * n)5912 parsestrplus(struct compiling *c, const node *n)
5913 {
5914     int bytesmode = 0;
5915     PyObject *bytes_str = NULL;
5916     int i;
5917 
5918     FstringParser state;
5919     FstringParser_Init(&state);
5920 
5921     for (i = 0; i < NCH(n); i++) {
5922         int this_bytesmode;
5923         int this_rawmode;
5924         PyObject *s;
5925         const char *fstr;
5926         Py_ssize_t fstrlen = -1;  /* Silence a compiler warning. */
5927 
5928         REQ(CHILD(n, i), STRING);
5929         if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s,
5930                      &fstr, &fstrlen) != 0)
5931             goto error;
5932 
5933         /* Check that we're not mixing bytes with unicode. */
5934         if (i != 0 && bytesmode != this_bytesmode) {
5935             ast_error(c, n, "cannot mix bytes and nonbytes literals");
5936             /* s is NULL if the current string part is an f-string. */
5937             Py_XDECREF(s);
5938             goto error;
5939         }
5940         bytesmode = this_bytesmode;
5941 
5942         if (fstr != NULL) {
5943             int result;
5944             assert(s == NULL && !bytesmode);
5945             /* This is an f-string. Parse and concatenate it. */
5946             result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen,
5947                                                  this_rawmode, 0, c, n);
5948             if (result < 0)
5949                 goto error;
5950         } else {
5951             /* A string or byte string. */
5952             assert(s != NULL && fstr == NULL);
5953 
5954             assert(bytesmode ? PyBytes_CheckExact(s) :
5955                    PyUnicode_CheckExact(s));
5956 
5957             if (bytesmode) {
5958                 /* For bytes, concat as we go. */
5959                 if (i == 0) {
5960                     /* First time, just remember this value. */
5961                     bytes_str = s;
5962                 } else {
5963                     PyBytes_ConcatAndDel(&bytes_str, s);
5964                     if (!bytes_str)
5965                         goto error;
5966                 }
5967             } else {
5968                 /* This is a regular string. Concatenate it. */
5969                 if (FstringParser_ConcatAndDel(&state, s) < 0)
5970                     goto error;
5971             }
5972         }
5973     }
5974     if (bytesmode) {
5975         /* Just return the bytes object and we're done. */
5976         if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)
5977             goto error;
5978         return Constant(bytes_str, NULL, LINENO(n), n->n_col_offset,
5979                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5980     }
5981 
5982     /* We're not a bytes string, bytes_str should never have been set. */
5983     assert(bytes_str == NULL);
5984 
5985     return FstringParser_Finish(&state, c, n);
5986 
5987 error:
5988     Py_XDECREF(bytes_str);
5989     FstringParser_Dealloc(&state);
5990     return NULL;
5991 }
5992 
5993 PyObject *
_PyAST_GetDocString(asdl_seq * body)5994 _PyAST_GetDocString(asdl_seq *body)
5995 {
5996     if (!asdl_seq_LEN(body)) {
5997         return NULL;
5998     }
5999     stmt_ty st = (stmt_ty)asdl_seq_GET(body, 0);
6000     if (st->kind != Expr_kind) {
6001         return NULL;
6002     }
6003     expr_ty e = st->v.Expr.value;
6004     if (e->kind == Constant_kind && PyUnicode_CheckExact(e->v.Constant.value)) {
6005         return e->v.Constant.value;
6006     }
6007     return NULL;
6008 }
6009