1 /*
2 * This file includes functions to transform a concrete syntax tree (CST) to
3 * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4 *
5 */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "node.h"
9 #include "ast.h"
10 #include "token.h"
11 #include "pythonrun.h"
12
13 #include <assert.h>
14 #include <stdbool.h>
15
16 #define MAXLEVEL 200 /* Max parentheses level */
17
18 static int validate_stmts(asdl_seq *);
19 static int validate_exprs(asdl_seq *, expr_context_ty, int);
20 static int validate_nonempty_seq(asdl_seq *, const char *, const char *);
21 static int validate_stmt(stmt_ty);
22 static int validate_expr(expr_ty, expr_context_ty);
23
24 static int
validate_comprehension(asdl_seq * gens)25 validate_comprehension(asdl_seq *gens)
26 {
27 Py_ssize_t i;
28 if (!asdl_seq_LEN(gens)) {
29 PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
30 return 0;
31 }
32 for (i = 0; i < asdl_seq_LEN(gens); i++) {
33 comprehension_ty comp = asdl_seq_GET(gens, i);
34 if (!validate_expr(comp->target, Store) ||
35 !validate_expr(comp->iter, Load) ||
36 !validate_exprs(comp->ifs, Load, 0))
37 return 0;
38 }
39 return 1;
40 }
41
42 static int
validate_slice(slice_ty slice)43 validate_slice(slice_ty slice)
44 {
45 switch (slice->kind) {
46 case Slice_kind:
47 return (!slice->v.Slice.lower || validate_expr(slice->v.Slice.lower, Load)) &&
48 (!slice->v.Slice.upper || validate_expr(slice->v.Slice.upper, Load)) &&
49 (!slice->v.Slice.step || validate_expr(slice->v.Slice.step, Load));
50 case ExtSlice_kind: {
51 Py_ssize_t i;
52 if (!validate_nonempty_seq(slice->v.ExtSlice.dims, "dims", "ExtSlice"))
53 return 0;
54 for (i = 0; i < asdl_seq_LEN(slice->v.ExtSlice.dims); i++)
55 if (!validate_slice(asdl_seq_GET(slice->v.ExtSlice.dims, i)))
56 return 0;
57 return 1;
58 }
59 case Index_kind:
60 return validate_expr(slice->v.Index.value, Load);
61 default:
62 PyErr_SetString(PyExc_SystemError, "unknown slice node");
63 return 0;
64 }
65 }
66
67 static int
validate_keywords(asdl_seq * keywords)68 validate_keywords(asdl_seq *keywords)
69 {
70 Py_ssize_t i;
71 for (i = 0; i < asdl_seq_LEN(keywords); i++)
72 if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load))
73 return 0;
74 return 1;
75 }
76
77 static int
validate_args(asdl_seq * args)78 validate_args(asdl_seq *args)
79 {
80 Py_ssize_t i;
81 for (i = 0; i < asdl_seq_LEN(args); i++) {
82 arg_ty arg = asdl_seq_GET(args, i);
83 if (arg->annotation && !validate_expr(arg->annotation, Load))
84 return 0;
85 }
86 return 1;
87 }
88
89 static const char *
expr_context_name(expr_context_ty ctx)90 expr_context_name(expr_context_ty ctx)
91 {
92 switch (ctx) {
93 case Load:
94 return "Load";
95 case Store:
96 return "Store";
97 case Del:
98 return "Del";
99 case AugLoad:
100 return "AugLoad";
101 case AugStore:
102 return "AugStore";
103 case Param:
104 return "Param";
105 default:
106 Py_UNREACHABLE();
107 }
108 }
109
110 static int
validate_arguments(arguments_ty args)111 validate_arguments(arguments_ty args)
112 {
113 if (!validate_args(args->posonlyargs) || !validate_args(args->args)) {
114 return 0;
115 }
116 if (args->vararg && args->vararg->annotation
117 && !validate_expr(args->vararg->annotation, Load)) {
118 return 0;
119 }
120 if (!validate_args(args->kwonlyargs))
121 return 0;
122 if (args->kwarg && args->kwarg->annotation
123 && !validate_expr(args->kwarg->annotation, Load)) {
124 return 0;
125 }
126 if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->posonlyargs) + asdl_seq_LEN(args->args)) {
127 PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
128 return 0;
129 }
130 if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
131 PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
132 "kw_defaults on arguments");
133 return 0;
134 }
135 return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1);
136 }
137
138 static int
validate_constant(PyObject * value)139 validate_constant(PyObject *value)
140 {
141 if (value == Py_None || value == Py_Ellipsis)
142 return 1;
143
144 if (PyLong_CheckExact(value)
145 || PyFloat_CheckExact(value)
146 || PyComplex_CheckExact(value)
147 || PyBool_Check(value)
148 || PyUnicode_CheckExact(value)
149 || PyBytes_CheckExact(value))
150 return 1;
151
152 if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
153 PyObject *it;
154
155 it = PyObject_GetIter(value);
156 if (it == NULL)
157 return 0;
158
159 while (1) {
160 PyObject *item = PyIter_Next(it);
161 if (item == NULL) {
162 if (PyErr_Occurred()) {
163 Py_DECREF(it);
164 return 0;
165 }
166 break;
167 }
168
169 if (!validate_constant(item)) {
170 Py_DECREF(it);
171 Py_DECREF(item);
172 return 0;
173 }
174 Py_DECREF(item);
175 }
176
177 Py_DECREF(it);
178 return 1;
179 }
180
181 return 0;
182 }
183
184 static int
validate_expr(expr_ty exp,expr_context_ty ctx)185 validate_expr(expr_ty exp, expr_context_ty ctx)
186 {
187 int check_ctx = 1;
188 expr_context_ty actual_ctx;
189
190 /* First check expression context. */
191 switch (exp->kind) {
192 case Attribute_kind:
193 actual_ctx = exp->v.Attribute.ctx;
194 break;
195 case Subscript_kind:
196 actual_ctx = exp->v.Subscript.ctx;
197 break;
198 case Starred_kind:
199 actual_ctx = exp->v.Starred.ctx;
200 break;
201 case Name_kind:
202 actual_ctx = exp->v.Name.ctx;
203 break;
204 case List_kind:
205 actual_ctx = exp->v.List.ctx;
206 break;
207 case Tuple_kind:
208 actual_ctx = exp->v.Tuple.ctx;
209 break;
210 default:
211 if (ctx != Load) {
212 PyErr_Format(PyExc_ValueError, "expression which can't be "
213 "assigned to in %s context", expr_context_name(ctx));
214 return 0;
215 }
216 check_ctx = 0;
217 /* set actual_ctx to prevent gcc warning */
218 actual_ctx = 0;
219 }
220 if (check_ctx && actual_ctx != ctx) {
221 PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
222 expr_context_name(ctx), expr_context_name(actual_ctx));
223 return 0;
224 }
225
226 /* Now validate expression. */
227 switch (exp->kind) {
228 case BoolOp_kind:
229 if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
230 PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
231 return 0;
232 }
233 return validate_exprs(exp->v.BoolOp.values, Load, 0);
234 case BinOp_kind:
235 return validate_expr(exp->v.BinOp.left, Load) &&
236 validate_expr(exp->v.BinOp.right, Load);
237 case UnaryOp_kind:
238 return validate_expr(exp->v.UnaryOp.operand, Load);
239 case Lambda_kind:
240 return validate_arguments(exp->v.Lambda.args) &&
241 validate_expr(exp->v.Lambda.body, Load);
242 case IfExp_kind:
243 return validate_expr(exp->v.IfExp.test, Load) &&
244 validate_expr(exp->v.IfExp.body, Load) &&
245 validate_expr(exp->v.IfExp.orelse, Load);
246 case Dict_kind:
247 if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
248 PyErr_SetString(PyExc_ValueError,
249 "Dict doesn't have the same number of keys as values");
250 return 0;
251 }
252 /* null_ok=1 for keys expressions to allow dict unpacking to work in
253 dict literals, i.e. ``{**{a:b}}`` */
254 return validate_exprs(exp->v.Dict.keys, Load, /*null_ok=*/ 1) &&
255 validate_exprs(exp->v.Dict.values, Load, /*null_ok=*/ 0);
256 case Set_kind:
257 return validate_exprs(exp->v.Set.elts, Load, 0);
258 #define COMP(NAME) \
259 case NAME ## _kind: \
260 return validate_comprehension(exp->v.NAME.generators) && \
261 validate_expr(exp->v.NAME.elt, Load);
262 COMP(ListComp)
263 COMP(SetComp)
264 COMP(GeneratorExp)
265 #undef COMP
266 case DictComp_kind:
267 return validate_comprehension(exp->v.DictComp.generators) &&
268 validate_expr(exp->v.DictComp.key, Load) &&
269 validate_expr(exp->v.DictComp.value, Load);
270 case Yield_kind:
271 return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load);
272 case YieldFrom_kind:
273 return validate_expr(exp->v.YieldFrom.value, Load);
274 case Await_kind:
275 return validate_expr(exp->v.Await.value, Load);
276 case Compare_kind:
277 if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
278 PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
279 return 0;
280 }
281 if (asdl_seq_LEN(exp->v.Compare.comparators) !=
282 asdl_seq_LEN(exp->v.Compare.ops)) {
283 PyErr_SetString(PyExc_ValueError, "Compare has a different number "
284 "of comparators and operands");
285 return 0;
286 }
287 return validate_exprs(exp->v.Compare.comparators, Load, 0) &&
288 validate_expr(exp->v.Compare.left, Load);
289 case Call_kind:
290 return validate_expr(exp->v.Call.func, Load) &&
291 validate_exprs(exp->v.Call.args, Load, 0) &&
292 validate_keywords(exp->v.Call.keywords);
293 case Constant_kind:
294 if (!validate_constant(exp->v.Constant.value)) {
295 PyErr_Format(PyExc_TypeError,
296 "got an invalid type in Constant: %s",
297 Py_TYPE(exp->v.Constant.value)->tp_name);
298 return 0;
299 }
300 return 1;
301 case JoinedStr_kind:
302 return validate_exprs(exp->v.JoinedStr.values, Load, 0);
303 case FormattedValue_kind:
304 if (validate_expr(exp->v.FormattedValue.value, Load) == 0)
305 return 0;
306 if (exp->v.FormattedValue.format_spec)
307 return validate_expr(exp->v.FormattedValue.format_spec, Load);
308 return 1;
309 case Attribute_kind:
310 return validate_expr(exp->v.Attribute.value, Load);
311 case Subscript_kind:
312 return validate_slice(exp->v.Subscript.slice) &&
313 validate_expr(exp->v.Subscript.value, Load);
314 case Starred_kind:
315 return validate_expr(exp->v.Starred.value, ctx);
316 case List_kind:
317 return validate_exprs(exp->v.List.elts, ctx, 0);
318 case Tuple_kind:
319 return validate_exprs(exp->v.Tuple.elts, ctx, 0);
320 case NamedExpr_kind:
321 return validate_expr(exp->v.NamedExpr.value, Load);
322 /* This last case doesn't have any checking. */
323 case Name_kind:
324 return 1;
325 }
326 PyErr_SetString(PyExc_SystemError, "unexpected expression");
327 return 0;
328 }
329
330 static int
validate_nonempty_seq(asdl_seq * seq,const char * what,const char * owner)331 validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
332 {
333 if (asdl_seq_LEN(seq))
334 return 1;
335 PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
336 return 0;
337 }
338
339 static int
validate_assignlist(asdl_seq * targets,expr_context_ty ctx)340 validate_assignlist(asdl_seq *targets, expr_context_ty ctx)
341 {
342 return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
343 validate_exprs(targets, ctx, 0);
344 }
345
346 static int
validate_body(asdl_seq * body,const char * owner)347 validate_body(asdl_seq *body, const char *owner)
348 {
349 return validate_nonempty_seq(body, "body", owner) && validate_stmts(body);
350 }
351
352 static int
validate_stmt(stmt_ty stmt)353 validate_stmt(stmt_ty stmt)
354 {
355 Py_ssize_t i;
356 switch (stmt->kind) {
357 case FunctionDef_kind:
358 return validate_body(stmt->v.FunctionDef.body, "FunctionDef") &&
359 validate_arguments(stmt->v.FunctionDef.args) &&
360 validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) &&
361 (!stmt->v.FunctionDef.returns ||
362 validate_expr(stmt->v.FunctionDef.returns, Load));
363 case ClassDef_kind:
364 return validate_body(stmt->v.ClassDef.body, "ClassDef") &&
365 validate_exprs(stmt->v.ClassDef.bases, Load, 0) &&
366 validate_keywords(stmt->v.ClassDef.keywords) &&
367 validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0);
368 case Return_kind:
369 return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load);
370 case Delete_kind:
371 return validate_assignlist(stmt->v.Delete.targets, Del);
372 case Assign_kind:
373 return validate_assignlist(stmt->v.Assign.targets, Store) &&
374 validate_expr(stmt->v.Assign.value, Load);
375 case AugAssign_kind:
376 return validate_expr(stmt->v.AugAssign.target, Store) &&
377 validate_expr(stmt->v.AugAssign.value, Load);
378 case AnnAssign_kind:
379 if (stmt->v.AnnAssign.target->kind != Name_kind &&
380 stmt->v.AnnAssign.simple) {
381 PyErr_SetString(PyExc_TypeError,
382 "AnnAssign with simple non-Name target");
383 return 0;
384 }
385 return validate_expr(stmt->v.AnnAssign.target, Store) &&
386 (!stmt->v.AnnAssign.value ||
387 validate_expr(stmt->v.AnnAssign.value, Load)) &&
388 validate_expr(stmt->v.AnnAssign.annotation, Load);
389 case For_kind:
390 return validate_expr(stmt->v.For.target, Store) &&
391 validate_expr(stmt->v.For.iter, Load) &&
392 validate_body(stmt->v.For.body, "For") &&
393 validate_stmts(stmt->v.For.orelse);
394 case AsyncFor_kind:
395 return validate_expr(stmt->v.AsyncFor.target, Store) &&
396 validate_expr(stmt->v.AsyncFor.iter, Load) &&
397 validate_body(stmt->v.AsyncFor.body, "AsyncFor") &&
398 validate_stmts(stmt->v.AsyncFor.orelse);
399 case While_kind:
400 return validate_expr(stmt->v.While.test, Load) &&
401 validate_body(stmt->v.While.body, "While") &&
402 validate_stmts(stmt->v.While.orelse);
403 case If_kind:
404 return validate_expr(stmt->v.If.test, Load) &&
405 validate_body(stmt->v.If.body, "If") &&
406 validate_stmts(stmt->v.If.orelse);
407 case With_kind:
408 if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
409 return 0;
410 for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
411 withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
412 if (!validate_expr(item->context_expr, Load) ||
413 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
414 return 0;
415 }
416 return validate_body(stmt->v.With.body, "With");
417 case AsyncWith_kind:
418 if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith"))
419 return 0;
420 for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) {
421 withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i);
422 if (!validate_expr(item->context_expr, Load) ||
423 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
424 return 0;
425 }
426 return validate_body(stmt->v.AsyncWith.body, "AsyncWith");
427 case Raise_kind:
428 if (stmt->v.Raise.exc) {
429 return validate_expr(stmt->v.Raise.exc, Load) &&
430 (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load));
431 }
432 if (stmt->v.Raise.cause) {
433 PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
434 return 0;
435 }
436 return 1;
437 case Try_kind:
438 if (!validate_body(stmt->v.Try.body, "Try"))
439 return 0;
440 if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
441 !asdl_seq_LEN(stmt->v.Try.finalbody)) {
442 PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
443 return 0;
444 }
445 if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
446 asdl_seq_LEN(stmt->v.Try.orelse)) {
447 PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
448 return 0;
449 }
450 for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
451 excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
452 if ((handler->v.ExceptHandler.type &&
453 !validate_expr(handler->v.ExceptHandler.type, Load)) ||
454 !validate_body(handler->v.ExceptHandler.body, "ExceptHandler"))
455 return 0;
456 }
457 return (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
458 validate_stmts(stmt->v.Try.finalbody)) &&
459 (!asdl_seq_LEN(stmt->v.Try.orelse) ||
460 validate_stmts(stmt->v.Try.orelse));
461 case Assert_kind:
462 return validate_expr(stmt->v.Assert.test, Load) &&
463 (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load));
464 case Import_kind:
465 return validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
466 case ImportFrom_kind:
467 if (stmt->v.ImportFrom.level < 0) {
468 PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level");
469 return 0;
470 }
471 return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
472 case Global_kind:
473 return validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
474 case Nonlocal_kind:
475 return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
476 case Expr_kind:
477 return validate_expr(stmt->v.Expr.value, Load);
478 case AsyncFunctionDef_kind:
479 return validate_body(stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") &&
480 validate_arguments(stmt->v.AsyncFunctionDef.args) &&
481 validate_exprs(stmt->v.AsyncFunctionDef.decorator_list, Load, 0) &&
482 (!stmt->v.AsyncFunctionDef.returns ||
483 validate_expr(stmt->v.AsyncFunctionDef.returns, Load));
484 case Pass_kind:
485 case Break_kind:
486 case Continue_kind:
487 return 1;
488 default:
489 PyErr_SetString(PyExc_SystemError, "unexpected statement");
490 return 0;
491 }
492 }
493
494 static int
validate_stmts(asdl_seq * seq)495 validate_stmts(asdl_seq *seq)
496 {
497 Py_ssize_t i;
498 for (i = 0; i < asdl_seq_LEN(seq); i++) {
499 stmt_ty stmt = asdl_seq_GET(seq, i);
500 if (stmt) {
501 if (!validate_stmt(stmt))
502 return 0;
503 }
504 else {
505 PyErr_SetString(PyExc_ValueError,
506 "None disallowed in statement list");
507 return 0;
508 }
509 }
510 return 1;
511 }
512
513 static int
validate_exprs(asdl_seq * exprs,expr_context_ty ctx,int null_ok)514 validate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok)
515 {
516 Py_ssize_t i;
517 for (i = 0; i < asdl_seq_LEN(exprs); i++) {
518 expr_ty expr = asdl_seq_GET(exprs, i);
519 if (expr) {
520 if (!validate_expr(expr, ctx))
521 return 0;
522 }
523 else if (!null_ok) {
524 PyErr_SetString(PyExc_ValueError,
525 "None disallowed in expression list");
526 return 0;
527 }
528
529 }
530 return 1;
531 }
532
533 int
PyAST_Validate(mod_ty mod)534 PyAST_Validate(mod_ty mod)
535 {
536 int res = 0;
537
538 switch (mod->kind) {
539 case Module_kind:
540 res = validate_stmts(mod->v.Module.body);
541 break;
542 case Interactive_kind:
543 res = validate_stmts(mod->v.Interactive.body);
544 break;
545 case Expression_kind:
546 res = validate_expr(mod->v.Expression.body, Load);
547 break;
548 case Suite_kind:
549 PyErr_SetString(PyExc_ValueError, "Suite is not valid in the CPython compiler");
550 break;
551 default:
552 PyErr_SetString(PyExc_SystemError, "impossible module node");
553 res = 0;
554 break;
555 }
556 return res;
557 }
558
559 /* This is done here, so defines like "test" don't interfere with AST use above. */
560 #include "grammar.h"
561 #include "parsetok.h"
562 #include "graminit.h"
563
564 /* Data structure used internally */
565 struct compiling {
566 PyArena *c_arena; /* Arena for allocating memory. */
567 PyObject *c_filename; /* filename */
568 PyObject *c_normalize; /* Normalization function from unicodedata. */
569 int c_feature_version; /* Latest minor version of Python for allowed features */
570 };
571
572 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
573 static expr_ty ast_for_expr(struct compiling *, const node *);
574 static stmt_ty ast_for_stmt(struct compiling *, const node *);
575 static asdl_seq *ast_for_suite(struct compiling *c, const node *n);
576 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
577 expr_context_ty);
578 static expr_ty ast_for_testlist(struct compiling *, const node *);
579 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
580
581 static stmt_ty ast_for_with_stmt(struct compiling *, const node *, bool);
582 static stmt_ty ast_for_for_stmt(struct compiling *, const node *, bool);
583
584 /* Note different signature for ast_for_call */
585 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty,
586 const node *, const node *);
587
588 static PyObject *parsenumber(struct compiling *, const char *);
589 static expr_ty parsestrplus(struct compiling *, const node *n);
590 static void get_last_end_pos(asdl_seq *, int *, int *);
591
592 #define COMP_GENEXP 0
593 #define COMP_LISTCOMP 1
594 #define COMP_SETCOMP 2
595
596 static int
init_normalization(struct compiling * c)597 init_normalization(struct compiling *c)
598 {
599 PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
600 if (!m)
601 return 0;
602 c->c_normalize = PyObject_GetAttrString(m, "normalize");
603 Py_DECREF(m);
604 if (!c->c_normalize)
605 return 0;
606 return 1;
607 }
608
609 static identifier
new_identifier(const char * n,struct compiling * c)610 new_identifier(const char *n, struct compiling *c)
611 {
612 PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
613 if (!id)
614 return NULL;
615 /* PyUnicode_DecodeUTF8 should always return a ready string. */
616 assert(PyUnicode_IS_READY(id));
617 /* Check whether there are non-ASCII characters in the
618 identifier; if so, normalize to NFKC. */
619 if (!PyUnicode_IS_ASCII(id)) {
620 PyObject *id2;
621 _Py_IDENTIFIER(NFKC);
622 if (!c->c_normalize && !init_normalization(c)) {
623 Py_DECREF(id);
624 return NULL;
625 }
626 PyObject *form = _PyUnicode_FromId(&PyId_NFKC);
627 if (form == NULL) {
628 Py_DECREF(id);
629 return NULL;
630 }
631 PyObject *args[2] = {form, id};
632 id2 = _PyObject_FastCall(c->c_normalize, args, 2);
633 Py_DECREF(id);
634 if (!id2)
635 return NULL;
636 if (!PyUnicode_Check(id2)) {
637 PyErr_Format(PyExc_TypeError,
638 "unicodedata.normalize() must return a string, not "
639 "%.200s",
640 Py_TYPE(id2)->tp_name);
641 Py_DECREF(id2);
642 return NULL;
643 }
644 id = id2;
645 }
646 PyUnicode_InternInPlace(&id);
647 if (PyArena_AddPyObject(c->c_arena, id) < 0) {
648 Py_DECREF(id);
649 return NULL;
650 }
651 return id;
652 }
653
654 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c)
655
656 static int
ast_error(struct compiling * c,const node * n,const char * errmsg,...)657 ast_error(struct compiling *c, const node *n, const char *errmsg, ...)
658 {
659 PyObject *value, *errstr, *loc, *tmp;
660 va_list va;
661
662 va_start(va, errmsg);
663 errstr = PyUnicode_FromFormatV(errmsg, va);
664 va_end(va);
665 if (!errstr) {
666 return 0;
667 }
668 loc = PyErr_ProgramTextObject(c->c_filename, LINENO(n));
669 if (!loc) {
670 Py_INCREF(Py_None);
671 loc = Py_None;
672 }
673 tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset + 1, loc);
674 if (!tmp) {
675 Py_DECREF(errstr);
676 return 0;
677 }
678 value = PyTuple_Pack(2, errstr, tmp);
679 Py_DECREF(errstr);
680 Py_DECREF(tmp);
681 if (value) {
682 PyErr_SetObject(PyExc_SyntaxError, value);
683 Py_DECREF(value);
684 }
685 return 0;
686 }
687
688 /* num_stmts() returns number of contained statements.
689
690 Use this routine to determine how big a sequence is needed for
691 the statements in a parse tree. Its raison d'etre is this bit of
692 grammar:
693
694 stmt: simple_stmt | compound_stmt
695 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
696
697 A simple_stmt can contain multiple small_stmt elements joined
698 by semicolons. If the arg is a simple_stmt, the number of
699 small_stmt elements is returned.
700 */
701
702 static string
new_type_comment(const char * s,struct compiling * c)703 new_type_comment(const char *s, struct compiling *c)
704 {
705 PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
706 if (res == NULL)
707 return NULL;
708 if (PyArena_AddPyObject(c->c_arena, res) < 0) {
709 Py_DECREF(res);
710 return NULL;
711 }
712 return res;
713 }
714 #define NEW_TYPE_COMMENT(n) new_type_comment(STR(n), c)
715
716 static int
num_stmts(const node * n)717 num_stmts(const node *n)
718 {
719 int i, l;
720 node *ch;
721
722 switch (TYPE(n)) {
723 case single_input:
724 if (TYPE(CHILD(n, 0)) == NEWLINE)
725 return 0;
726 else
727 return num_stmts(CHILD(n, 0));
728 case file_input:
729 l = 0;
730 for (i = 0; i < NCH(n); i++) {
731 ch = CHILD(n, i);
732 if (TYPE(ch) == stmt)
733 l += num_stmts(ch);
734 }
735 return l;
736 case stmt:
737 return num_stmts(CHILD(n, 0));
738 case compound_stmt:
739 return 1;
740 case simple_stmt:
741 return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
742 case suite:
743 case func_body_suite:
744 /* func_body_suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
745 /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
746 if (NCH(n) == 1)
747 return num_stmts(CHILD(n, 0));
748 else {
749 i = 2;
750 l = 0;
751 if (TYPE(CHILD(n, 1)) == TYPE_COMMENT)
752 i += 2;
753 for (; i < (NCH(n) - 1); i++)
754 l += num_stmts(CHILD(n, i));
755 return l;
756 }
757 default: {
758 char buf[128];
759
760 sprintf(buf, "Non-statement found: %d %d",
761 TYPE(n), NCH(n));
762 Py_FatalError(buf);
763 }
764 }
765 Py_UNREACHABLE();
766 }
767
768 /* Transform the CST rooted at node * to the appropriate AST
769 */
770
771 mod_ty
PyAST_FromNodeObject(const node * n,PyCompilerFlags * flags,PyObject * filename,PyArena * arena)772 PyAST_FromNodeObject(const node *n, PyCompilerFlags *flags,
773 PyObject *filename, PyArena *arena)
774 {
775 int i, j, k, num;
776 asdl_seq *stmts = NULL;
777 asdl_seq *type_ignores = NULL;
778 stmt_ty s;
779 node *ch;
780 struct compiling c;
781 mod_ty res = NULL;
782 asdl_seq *argtypes = NULL;
783 expr_ty ret, arg;
784
785 c.c_arena = arena;
786 /* borrowed reference */
787 c.c_filename = filename;
788 c.c_normalize = NULL;
789 c.c_feature_version = flags ? flags->cf_feature_version : PY_MINOR_VERSION;
790
791 if (TYPE(n) == encoding_decl)
792 n = CHILD(n, 0);
793
794 k = 0;
795 switch (TYPE(n)) {
796 case file_input:
797 stmts = _Py_asdl_seq_new(num_stmts(n), arena);
798 if (!stmts)
799 goto out;
800 for (i = 0; i < NCH(n) - 1; i++) {
801 ch = CHILD(n, i);
802 if (TYPE(ch) == NEWLINE)
803 continue;
804 REQ(ch, stmt);
805 num = num_stmts(ch);
806 if (num == 1) {
807 s = ast_for_stmt(&c, ch);
808 if (!s)
809 goto out;
810 asdl_seq_SET(stmts, k++, s);
811 }
812 else {
813 ch = CHILD(ch, 0);
814 REQ(ch, simple_stmt);
815 for (j = 0; j < num; j++) {
816 s = ast_for_stmt(&c, CHILD(ch, j * 2));
817 if (!s)
818 goto out;
819 asdl_seq_SET(stmts, k++, s);
820 }
821 }
822 }
823
824 /* Type ignores are stored under the ENDMARKER in file_input. */
825 ch = CHILD(n, NCH(n) - 1);
826 REQ(ch, ENDMARKER);
827 num = NCH(ch);
828 type_ignores = _Py_asdl_seq_new(num, arena);
829 if (!type_ignores)
830 goto out;
831
832 for (i = 0; i < num; i++) {
833 string type_comment = new_type_comment(STR(CHILD(ch, i)), &c);
834 if (!type_comment)
835 goto out;
836 type_ignore_ty ti = TypeIgnore(LINENO(CHILD(ch, i)), type_comment, arena);
837 if (!ti)
838 goto out;
839 asdl_seq_SET(type_ignores, i, ti);
840 }
841
842 res = Module(stmts, type_ignores, arena);
843 break;
844 case eval_input: {
845 expr_ty testlist_ast;
846
847 /* XXX Why not comp_for here? */
848 testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
849 if (!testlist_ast)
850 goto out;
851 res = Expression(testlist_ast, arena);
852 break;
853 }
854 case single_input:
855 if (TYPE(CHILD(n, 0)) == NEWLINE) {
856 stmts = _Py_asdl_seq_new(1, arena);
857 if (!stmts)
858 goto out;
859 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
860 n->n_end_lineno, n->n_end_col_offset,
861 arena));
862 if (!asdl_seq_GET(stmts, 0))
863 goto out;
864 res = Interactive(stmts, arena);
865 }
866 else {
867 n = CHILD(n, 0);
868 num = num_stmts(n);
869 stmts = _Py_asdl_seq_new(num, arena);
870 if (!stmts)
871 goto out;
872 if (num == 1) {
873 s = ast_for_stmt(&c, n);
874 if (!s)
875 goto out;
876 asdl_seq_SET(stmts, 0, s);
877 }
878 else {
879 /* Only a simple_stmt can contain multiple statements. */
880 REQ(n, simple_stmt);
881 for (i = 0; i < NCH(n); i += 2) {
882 if (TYPE(CHILD(n, i)) == NEWLINE)
883 break;
884 s = ast_for_stmt(&c, CHILD(n, i));
885 if (!s)
886 goto out;
887 asdl_seq_SET(stmts, i / 2, s);
888 }
889 }
890
891 res = Interactive(stmts, arena);
892 }
893 break;
894 case func_type_input:
895 n = CHILD(n, 0);
896 REQ(n, func_type);
897
898 if (TYPE(CHILD(n, 1)) == typelist) {
899 ch = CHILD(n, 1);
900 /* this is overly permissive -- we don't pay any attention to
901 * stars on the args -- just parse them into an ordered list */
902 num = 0;
903 for (i = 0; i < NCH(ch); i++) {
904 if (TYPE(CHILD(ch, i)) == test) {
905 num++;
906 }
907 }
908
909 argtypes = _Py_asdl_seq_new(num, arena);
910 if (!argtypes)
911 goto out;
912
913 j = 0;
914 for (i = 0; i < NCH(ch); i++) {
915 if (TYPE(CHILD(ch, i)) == test) {
916 arg = ast_for_expr(&c, CHILD(ch, i));
917 if (!arg)
918 goto out;
919 asdl_seq_SET(argtypes, j++, arg);
920 }
921 }
922 }
923 else {
924 argtypes = _Py_asdl_seq_new(0, arena);
925 if (!argtypes)
926 goto out;
927 }
928
929 ret = ast_for_expr(&c, CHILD(n, NCH(n) - 1));
930 if (!ret)
931 goto out;
932 res = FunctionType(argtypes, ret, arena);
933 break;
934 default:
935 PyErr_Format(PyExc_SystemError,
936 "invalid node %d for PyAST_FromNode", TYPE(n));
937 goto out;
938 }
939 out:
940 if (c.c_normalize) {
941 Py_DECREF(c.c_normalize);
942 }
943 return res;
944 }
945
946 mod_ty
PyAST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename_str,PyArena * arena)947 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename_str,
948 PyArena *arena)
949 {
950 mod_ty mod;
951 PyObject *filename;
952 filename = PyUnicode_DecodeFSDefault(filename_str);
953 if (filename == NULL)
954 return NULL;
955 mod = PyAST_FromNodeObject(n, flags, filename, arena);
956 Py_DECREF(filename);
957 return mod;
958
959 }
960
961 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
962 */
963
964 static operator_ty
get_operator(struct compiling * c,const node * n)965 get_operator(struct compiling *c, const node *n)
966 {
967 switch (TYPE(n)) {
968 case VBAR:
969 return BitOr;
970 case CIRCUMFLEX:
971 return BitXor;
972 case AMPER:
973 return BitAnd;
974 case LEFTSHIFT:
975 return LShift;
976 case RIGHTSHIFT:
977 return RShift;
978 case PLUS:
979 return Add;
980 case MINUS:
981 return Sub;
982 case STAR:
983 return Mult;
984 case AT:
985 if (c->c_feature_version < 5) {
986 ast_error(c, n,
987 "The '@' operator is only supported in Python 3.5 and greater");
988 return (operator_ty)0;
989 }
990 return MatMult;
991 case SLASH:
992 return Div;
993 case DOUBLESLASH:
994 return FloorDiv;
995 case PERCENT:
996 return Mod;
997 default:
998 return (operator_ty)0;
999 }
1000 }
1001
1002 static const char * const FORBIDDEN[] = {
1003 "None",
1004 "True",
1005 "False",
1006 "__debug__",
1007 NULL,
1008 };
1009
1010 static int
forbidden_name(struct compiling * c,identifier name,const node * n,int full_checks)1011 forbidden_name(struct compiling *c, identifier name, const node *n,
1012 int full_checks)
1013 {
1014 assert(PyUnicode_Check(name));
1015 const char * const *p = FORBIDDEN;
1016 if (!full_checks) {
1017 /* In most cases, the parser will protect True, False, and None
1018 from being assign to. */
1019 p += 3;
1020 }
1021 for (; *p; p++) {
1022 if (_PyUnicode_EqualToASCIIString(name, *p)) {
1023 ast_error(c, n, "cannot assign to %U", name);
1024 return 1;
1025 }
1026 }
1027 return 0;
1028 }
1029
1030 static expr_ty
copy_location(expr_ty e,const node * n)1031 copy_location(expr_ty e, const node *n)
1032 {
1033 if (e) {
1034 e->lineno = LINENO(n);
1035 e->col_offset = n->n_col_offset;
1036 e->end_lineno = n->n_end_lineno;
1037 e->end_col_offset = n->n_end_col_offset;
1038 }
1039 return e;
1040 }
1041
1042 static const char *
get_expr_name(expr_ty e)1043 get_expr_name(expr_ty e)
1044 {
1045 switch (e->kind) {
1046 case Attribute_kind:
1047 return "attribute";
1048 case Subscript_kind:
1049 return "subscript";
1050 case Starred_kind:
1051 return "starred";
1052 case Name_kind:
1053 return "name";
1054 case List_kind:
1055 return "list";
1056 case Tuple_kind:
1057 return "tuple";
1058 case Lambda_kind:
1059 return "lambda";
1060 case Call_kind:
1061 return "function call";
1062 case BoolOp_kind:
1063 case BinOp_kind:
1064 case UnaryOp_kind:
1065 return "operator";
1066 case GeneratorExp_kind:
1067 return "generator expression";
1068 case Yield_kind:
1069 case YieldFrom_kind:
1070 return "yield expression";
1071 case Await_kind:
1072 return "await expression";
1073 case ListComp_kind:
1074 return "list comprehension";
1075 case SetComp_kind:
1076 return "set comprehension";
1077 case DictComp_kind:
1078 return "dict comprehension";
1079 case Dict_kind:
1080 return "dict display";
1081 case Set_kind:
1082 return "set display";
1083 case JoinedStr_kind:
1084 case FormattedValue_kind:
1085 return "f-string expression";
1086 case Constant_kind: {
1087 PyObject *value = e->v.Constant.value;
1088 if (value == Py_None) {
1089 return "None";
1090 }
1091 if (value == Py_False) {
1092 return "False";
1093 }
1094 if (value == Py_True) {
1095 return "True";
1096 }
1097 if (value == Py_Ellipsis) {
1098 return "Ellipsis";
1099 }
1100 return "literal";
1101 }
1102 case Compare_kind:
1103 return "comparison";
1104 case IfExp_kind:
1105 return "conditional expression";
1106 case NamedExpr_kind:
1107 return "named expression";
1108 default:
1109 PyErr_Format(PyExc_SystemError,
1110 "unexpected expression in assignment %d (line %d)",
1111 e->kind, e->lineno);
1112 return NULL;
1113 }
1114 }
1115
1116 /* Set the context ctx for expr_ty e, recursively traversing e.
1117
1118 Only sets context for expr kinds that "can appear in assignment context"
1119 (according to ../Parser/Python.asdl). For other expr kinds, it sets
1120 an appropriate syntax error and returns false.
1121 */
1122
1123 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)1124 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
1125 {
1126 asdl_seq *s = NULL;
1127
1128 /* The ast defines augmented store and load contexts, but the
1129 implementation here doesn't actually use them. The code may be
1130 a little more complex than necessary as a result. It also means
1131 that expressions in an augmented assignment have a Store context.
1132 Consider restructuring so that augmented assignment uses
1133 set_context(), too.
1134 */
1135 assert(ctx != AugStore && ctx != AugLoad);
1136
1137 switch (e->kind) {
1138 case Attribute_kind:
1139 e->v.Attribute.ctx = ctx;
1140 if (ctx == Store && forbidden_name(c, e->v.Attribute.attr, n, 1))
1141 return 0;
1142 break;
1143 case Subscript_kind:
1144 e->v.Subscript.ctx = ctx;
1145 break;
1146 case Starred_kind:
1147 e->v.Starred.ctx = ctx;
1148 if (!set_context(c, e->v.Starred.value, ctx, n))
1149 return 0;
1150 break;
1151 case Name_kind:
1152 if (ctx == Store) {
1153 if (forbidden_name(c, e->v.Name.id, n, 0))
1154 return 0; /* forbidden_name() calls ast_error() */
1155 }
1156 e->v.Name.ctx = ctx;
1157 break;
1158 case List_kind:
1159 e->v.List.ctx = ctx;
1160 s = e->v.List.elts;
1161 break;
1162 case Tuple_kind:
1163 e->v.Tuple.ctx = ctx;
1164 s = e->v.Tuple.elts;
1165 break;
1166 default: {
1167 const char *expr_name = get_expr_name(e);
1168 if (expr_name != NULL) {
1169 ast_error(c, n, "cannot %s %s",
1170 ctx == Store ? "assign to" : "delete",
1171 expr_name);
1172 }
1173 return 0;
1174 }
1175 }
1176
1177 /* If the LHS is a list or tuple, we need to set the assignment
1178 context for all the contained elements.
1179 */
1180 if (s) {
1181 Py_ssize_t i;
1182
1183 for (i = 0; i < asdl_seq_LEN(s); i++) {
1184 if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
1185 return 0;
1186 }
1187 }
1188 return 1;
1189 }
1190
1191 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)1192 ast_for_augassign(struct compiling *c, const node *n)
1193 {
1194 REQ(n, augassign);
1195 n = CHILD(n, 0);
1196 switch (STR(n)[0]) {
1197 case '+':
1198 return Add;
1199 case '-':
1200 return Sub;
1201 case '/':
1202 if (STR(n)[1] == '/')
1203 return FloorDiv;
1204 else
1205 return Div;
1206 case '%':
1207 return Mod;
1208 case '<':
1209 return LShift;
1210 case '>':
1211 return RShift;
1212 case '&':
1213 return BitAnd;
1214 case '^':
1215 return BitXor;
1216 case '|':
1217 return BitOr;
1218 case '*':
1219 if (STR(n)[1] == '*')
1220 return Pow;
1221 else
1222 return Mult;
1223 case '@':
1224 if (c->c_feature_version < 5) {
1225 ast_error(c, n,
1226 "The '@' operator is only supported in Python 3.5 and greater");
1227 return (operator_ty)0;
1228 }
1229 return MatMult;
1230 default:
1231 PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
1232 return (operator_ty)0;
1233 }
1234 }
1235
1236 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)1237 ast_for_comp_op(struct compiling *c, const node *n)
1238 {
1239 /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'
1240 |'is' 'not'
1241 */
1242 REQ(n, comp_op);
1243 if (NCH(n) == 1) {
1244 n = CHILD(n, 0);
1245 switch (TYPE(n)) {
1246 case LESS:
1247 return Lt;
1248 case GREATER:
1249 return Gt;
1250 case EQEQUAL: /* == */
1251 return Eq;
1252 case LESSEQUAL:
1253 return LtE;
1254 case GREATEREQUAL:
1255 return GtE;
1256 case NOTEQUAL:
1257 return NotEq;
1258 case NAME:
1259 if (strcmp(STR(n), "in") == 0)
1260 return In;
1261 if (strcmp(STR(n), "is") == 0)
1262 return Is;
1263 /* fall through */
1264 default:
1265 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
1266 STR(n));
1267 return (cmpop_ty)0;
1268 }
1269 }
1270 else if (NCH(n) == 2) {
1271 /* handle "not in" and "is not" */
1272 switch (TYPE(CHILD(n, 0))) {
1273 case NAME:
1274 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
1275 return NotIn;
1276 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
1277 return IsNot;
1278 /* fall through */
1279 default:
1280 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
1281 STR(CHILD(n, 0)), STR(CHILD(n, 1)));
1282 return (cmpop_ty)0;
1283 }
1284 }
1285 PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
1286 NCH(n));
1287 return (cmpop_ty)0;
1288 }
1289
1290 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)1291 seq_for_testlist(struct compiling *c, const node *n)
1292 {
1293 /* testlist: test (',' test)* [',']
1294 testlist_star_expr: test|star_expr (',' test|star_expr)* [',']
1295 */
1296 asdl_seq *seq;
1297 expr_ty expression;
1298 int i;
1299 assert(TYPE(n) == testlist || TYPE(n) == testlist_star_expr || TYPE(n) == testlist_comp);
1300
1301 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1302 if (!seq)
1303 return NULL;
1304
1305 for (i = 0; i < NCH(n); i += 2) {
1306 const node *ch = CHILD(n, i);
1307 assert(TYPE(ch) == test || TYPE(ch) == test_nocond || TYPE(ch) == star_expr || TYPE(ch) == namedexpr_test);
1308
1309 expression = ast_for_expr(c, ch);
1310 if (!expression)
1311 return NULL;
1312
1313 assert(i / 2 < seq->size);
1314 asdl_seq_SET(seq, i / 2, expression);
1315 }
1316 return seq;
1317 }
1318
1319 static arg_ty
ast_for_arg(struct compiling * c,const node * n)1320 ast_for_arg(struct compiling *c, const node *n)
1321 {
1322 identifier name;
1323 expr_ty annotation = NULL;
1324 node *ch;
1325 arg_ty ret;
1326
1327 assert(TYPE(n) == tfpdef || TYPE(n) == vfpdef);
1328 ch = CHILD(n, 0);
1329 name = NEW_IDENTIFIER(ch);
1330 if (!name)
1331 return NULL;
1332 if (forbidden_name(c, name, ch, 0))
1333 return NULL;
1334
1335 if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) {
1336 annotation = ast_for_expr(c, CHILD(n, 2));
1337 if (!annotation)
1338 return NULL;
1339 }
1340
1341 ret = arg(name, annotation, NULL, LINENO(n), n->n_col_offset,
1342 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
1343 if (!ret)
1344 return NULL;
1345 return ret;
1346 }
1347
1348 /* returns -1 if failed to handle keyword only arguments
1349 returns new position to keep processing if successful
1350 (',' tfpdef ['=' test])*
1351 ^^^
1352 start pointing here
1353 */
1354 static int
handle_keywordonly_args(struct compiling * c,const node * n,int start,asdl_seq * kwonlyargs,asdl_seq * kwdefaults)1355 handle_keywordonly_args(struct compiling *c, const node *n, int start,
1356 asdl_seq *kwonlyargs, asdl_seq *kwdefaults)
1357 {
1358 PyObject *argname;
1359 node *ch;
1360 expr_ty expression, annotation;
1361 arg_ty arg = NULL;
1362 int i = start;
1363 int j = 0; /* index for kwdefaults and kwonlyargs */
1364
1365 if (kwonlyargs == NULL) {
1366 ast_error(c, CHILD(n, start), "named arguments must follow bare *");
1367 return -1;
1368 }
1369 assert(kwdefaults != NULL);
1370 while (i < NCH(n)) {
1371 ch = CHILD(n, i);
1372 switch (TYPE(ch)) {
1373 case vfpdef:
1374 case tfpdef:
1375 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1376 expression = ast_for_expr(c, CHILD(n, i + 2));
1377 if (!expression)
1378 goto error;
1379 asdl_seq_SET(kwdefaults, j, expression);
1380 i += 2; /* '=' and test */
1381 }
1382 else { /* setting NULL if no default value exists */
1383 asdl_seq_SET(kwdefaults, j, NULL);
1384 }
1385 if (NCH(ch) == 3) {
1386 /* ch is NAME ':' test */
1387 annotation = ast_for_expr(c, CHILD(ch, 2));
1388 if (!annotation)
1389 goto error;
1390 }
1391 else {
1392 annotation = NULL;
1393 }
1394 ch = CHILD(ch, 0);
1395 argname = NEW_IDENTIFIER(ch);
1396 if (!argname)
1397 goto error;
1398 if (forbidden_name(c, argname, ch, 0))
1399 goto error;
1400 arg = arg(argname, annotation, NULL, LINENO(ch), ch->n_col_offset,
1401 ch->n_end_lineno, ch->n_end_col_offset,
1402 c->c_arena);
1403 if (!arg)
1404 goto error;
1405 asdl_seq_SET(kwonlyargs, j++, arg);
1406 i += 1; /* the name */
1407 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1408 i += 1; /* the comma, if present */
1409 break;
1410 case TYPE_COMMENT:
1411 /* arg will be equal to the last argument processed */
1412 arg->type_comment = NEW_TYPE_COMMENT(ch);
1413 if (!arg->type_comment)
1414 goto error;
1415 i += 1;
1416 break;
1417 case DOUBLESTAR:
1418 return i;
1419 default:
1420 ast_error(c, ch, "unexpected node");
1421 goto error;
1422 }
1423 }
1424 return i;
1425 error:
1426 return -1;
1427 }
1428
1429 /* Create AST for argument list. */
1430
1431 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)1432 ast_for_arguments(struct compiling *c, const node *n)
1433 {
1434 /* This function handles both typedargslist (function definition)
1435 and varargslist (lambda definition).
1436
1437 parameters: '(' [typedargslist] ')'
1438
1439 The following definition for typedarglist is equivalent to this set of rules:
1440
1441 arguments = argument (',' [TYPE_COMMENT] argument)*
1442 argument = tfpdef ['=' test]
1443 kwargs = '**' tfpdef [','] [TYPE_COMMENT]
1444 args = '*' [tfpdef]
1445 kwonly_kwargs = (',' [TYPE_COMMENT] argument)* (TYPE_COMMENT | [','
1446 [TYPE_COMMENT] [kwargs]])
1447 args_kwonly_kwargs = args kwonly_kwargs | kwargs
1448 poskeyword_args_kwonly_kwargs = arguments ( TYPE_COMMENT | [','
1449 [TYPE_COMMENT] [args_kwonly_kwargs]])
1450 typedargslist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
1451 typedarglist = (arguments ',' [TYPE_COMMENT] '/' [',' [[TYPE_COMMENT]
1452 typedargslist_no_posonly]])|(typedargslist_no_posonly)"
1453
1454 typedargslist: ( (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1455 ',' [TYPE_COMMENT] '/' [',' [ [TYPE_COMMENT] tfpdef ['=' test] ( ','
1456 [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
1457 [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
1458 [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
1459 [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1460 (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
1461 '**' tfpdef [','] [TYPE_COMMENT]]] ) | (tfpdef ['=' test] (','
1462 [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
1463 [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
1464 [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
1465 [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1466 (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
1467 '**' tfpdef [','] [TYPE_COMMENT]))
1468
1469 tfpdef: NAME [':' test]
1470
1471 The following definition for varargslist is equivalent to this set of rules:
1472
1473 arguments = argument (',' argument )*
1474 argument = vfpdef ['=' test]
1475 kwargs = '**' vfpdef [',']
1476 args = '*' [vfpdef]
1477 kwonly_kwargs = (',' argument )* [',' [kwargs]]
1478 args_kwonly_kwargs = args kwonly_kwargs | kwargs
1479 poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]]
1480 vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
1481 varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] |
1482 (vararglist_no_posonly)
1483
1484 varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['='
1485 test] (',' vfpdef ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [','
1486 ['**' vfpdef [',']]] | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])*
1487 [',' ['**' vfpdef [',']]] | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef
1488 ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
1489 | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef
1490 [',']]] | '**' vfpdef [','])
1491
1492 vfpdef: NAME
1493
1494 */
1495 int i, j, k, l, nposonlyargs=0, nposargs = 0, nkwonlyargs = 0;
1496 int nposdefaults = 0, found_default = 0;
1497 asdl_seq *posonlyargs, *posargs, *posdefaults, *kwonlyargs, *kwdefaults;
1498 arg_ty vararg = NULL, kwarg = NULL;
1499 arg_ty arg = NULL;
1500 node *ch;
1501
1502 if (TYPE(n) == parameters) {
1503 if (NCH(n) == 2) /* () as argument list */
1504 return arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1505 n = CHILD(n, 1);
1506 }
1507 assert(TYPE(n) == typedargslist || TYPE(n) == varargslist);
1508
1509 /* First count the number of positional args & defaults. The
1510 variable i is the loop index for this for loop and the next.
1511 The next loop picks up where the first leaves off.
1512 */
1513 for (i = 0; i < NCH(n); i++) {
1514 ch = CHILD(n, i);
1515 if (TYPE(ch) == STAR) {
1516 /* skip star */
1517 i++;
1518 if (i < NCH(n) && /* skip argument following star */
1519 (TYPE(CHILD(n, i)) == tfpdef ||
1520 TYPE(CHILD(n, i)) == vfpdef)) {
1521 i++;
1522 }
1523 break;
1524 }
1525 if (TYPE(ch) == DOUBLESTAR) break;
1526 if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++;
1527 if (TYPE(ch) == EQUAL) nposdefaults++;
1528 if (TYPE(ch) == SLASH ) {
1529 nposonlyargs = nposargs;
1530 nposargs = 0;
1531 }
1532 }
1533 /* count the number of keyword only args &
1534 defaults for keyword only args */
1535 for ( ; i < NCH(n); ++i) {
1536 ch = CHILD(n, i);
1537 if (TYPE(ch) == DOUBLESTAR) break;
1538 if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++;
1539 }
1540 posonlyargs = (nposonlyargs ? _Py_asdl_seq_new(nposonlyargs, c->c_arena) : NULL);
1541 if (!posonlyargs && nposonlyargs) {
1542 return NULL;
1543 }
1544 posargs = (nposargs ? _Py_asdl_seq_new(nposargs, c->c_arena) : NULL);
1545 if (!posargs && nposargs)
1546 return NULL;
1547 kwonlyargs = (nkwonlyargs ?
1548 _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1549 if (!kwonlyargs && nkwonlyargs)
1550 return NULL;
1551 posdefaults = (nposdefaults ?
1552 _Py_asdl_seq_new(nposdefaults, c->c_arena) : NULL);
1553 if (!posdefaults && nposdefaults)
1554 return NULL;
1555 /* The length of kwonlyargs and kwdefaults are same
1556 since we set NULL as default for keyword only argument w/o default
1557 - we have sequence data structure, but no dictionary */
1558 kwdefaults = (nkwonlyargs ?
1559 _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1560 if (!kwdefaults && nkwonlyargs)
1561 return NULL;
1562
1563 /* tfpdef: NAME [':' test]
1564 vfpdef: NAME
1565 */
1566 i = 0;
1567 j = 0; /* index for defaults */
1568 k = 0; /* index for args */
1569 l = 0; /* index for posonlyargs */
1570 while (i < NCH(n)) {
1571 ch = CHILD(n, i);
1572 switch (TYPE(ch)) {
1573 case tfpdef:
1574 case vfpdef:
1575 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
1576 anything other than EQUAL or a comma? */
1577 /* XXX Should NCH(n) check be made a separate check? */
1578 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1579 expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
1580 if (!expression)
1581 return NULL;
1582 assert(posdefaults != NULL);
1583 asdl_seq_SET(posdefaults, j++, expression);
1584 i += 2;
1585 found_default = 1;
1586 }
1587 else if (found_default) {
1588 ast_error(c, n,
1589 "non-default argument follows default argument");
1590 return NULL;
1591 }
1592 arg = ast_for_arg(c, ch);
1593 if (!arg)
1594 return NULL;
1595 if (l < nposonlyargs) {
1596 asdl_seq_SET(posonlyargs, l++, arg);
1597 } else {
1598 asdl_seq_SET(posargs, k++, arg);
1599 }
1600 i += 1; /* the name */
1601 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1602 i += 1; /* the comma, if present */
1603 break;
1604 case SLASH:
1605 /* Advance the slash and the comma. If there are more names
1606 * after the slash there will be a comma so we are advancing
1607 * the correct number of nodes. If the slash is the last item,
1608 * we will be advancing an extra token but then * i > NCH(n)
1609 * and the enclosing while will finish correctly. */
1610 i += 2;
1611 break;
1612 case STAR:
1613 if (i+1 >= NCH(n) ||
1614 (i+2 == NCH(n) && (TYPE(CHILD(n, i+1)) == COMMA
1615 || TYPE(CHILD(n, i+1)) == TYPE_COMMENT))) {
1616 ast_error(c, CHILD(n, i),
1617 "named arguments must follow bare *");
1618 return NULL;
1619 }
1620 ch = CHILD(n, i+1); /* tfpdef or COMMA */
1621 if (TYPE(ch) == COMMA) {
1622 int res = 0;
1623 i += 2; /* now follows keyword only arguments */
1624
1625 if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
1626 ast_error(c, CHILD(n, i),
1627 "bare * has associated type comment");
1628 return NULL;
1629 }
1630
1631 res = handle_keywordonly_args(c, n, i,
1632 kwonlyargs, kwdefaults);
1633 if (res == -1) return NULL;
1634 i = res; /* res has new position to process */
1635 }
1636 else {
1637 vararg = ast_for_arg(c, ch);
1638 if (!vararg)
1639 return NULL;
1640
1641 i += 2; /* the star and the name */
1642 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1643 i += 1; /* the comma, if present */
1644
1645 if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
1646 vararg->type_comment = NEW_TYPE_COMMENT(CHILD(n, i));
1647 if (!vararg->type_comment)
1648 return NULL;
1649 i += 1;
1650 }
1651
1652 if (i < NCH(n) && (TYPE(CHILD(n, i)) == tfpdef
1653 || TYPE(CHILD(n, i)) == vfpdef)) {
1654 int res = 0;
1655 res = handle_keywordonly_args(c, n, i,
1656 kwonlyargs, kwdefaults);
1657 if (res == -1) return NULL;
1658 i = res; /* res has new position to process */
1659 }
1660 }
1661 break;
1662 case DOUBLESTAR:
1663 ch = CHILD(n, i+1); /* tfpdef */
1664 assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef);
1665 kwarg = ast_for_arg(c, ch);
1666 if (!kwarg)
1667 return NULL;
1668 i += 2; /* the double star and the name */
1669 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1670 i += 1; /* the comma, if present */
1671 break;
1672 case TYPE_COMMENT:
1673 assert(i);
1674
1675 if (kwarg)
1676 arg = kwarg;
1677
1678 /* arg will be equal to the last argument processed */
1679 arg->type_comment = NEW_TYPE_COMMENT(ch);
1680 if (!arg->type_comment)
1681 return NULL;
1682 i += 1;
1683 break;
1684 default:
1685 PyErr_Format(PyExc_SystemError,
1686 "unexpected node in varargslist: %d @ %d",
1687 TYPE(ch), i);
1688 return NULL;
1689 }
1690 }
1691 return arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg, posdefaults, c->c_arena);
1692 }
1693
1694 static expr_ty
ast_for_dotted_name(struct compiling * c,const node * n)1695 ast_for_dotted_name(struct compiling *c, const node *n)
1696 {
1697 expr_ty e;
1698 identifier id;
1699 int lineno, col_offset;
1700 int i;
1701 node *ch;
1702
1703 REQ(n, dotted_name);
1704
1705 lineno = LINENO(n);
1706 col_offset = n->n_col_offset;
1707
1708 ch = CHILD(n, 0);
1709 id = NEW_IDENTIFIER(ch);
1710 if (!id)
1711 return NULL;
1712 e = Name(id, Load, lineno, col_offset,
1713 ch->n_end_lineno, ch->n_end_col_offset, c->c_arena);
1714 if (!e)
1715 return NULL;
1716
1717 for (i = 2; i < NCH(n); i+=2) {
1718 id = NEW_IDENTIFIER(CHILD(n, i));
1719 if (!id)
1720 return NULL;
1721 e = Attribute(e, id, Load, lineno, col_offset,
1722 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
1723 if (!e)
1724 return NULL;
1725 }
1726
1727 return e;
1728 }
1729
1730 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)1731 ast_for_decorator(struct compiling *c, const node *n)
1732 {
1733 /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
1734 expr_ty d = NULL;
1735 expr_ty name_expr;
1736
1737 REQ(n, decorator);
1738 REQ(CHILD(n, 0), AT);
1739 REQ(RCHILD(n, -1), NEWLINE);
1740
1741 name_expr = ast_for_dotted_name(c, CHILD(n, 1));
1742 if (!name_expr)
1743 return NULL;
1744
1745 if (NCH(n) == 3) { /* No arguments */
1746 d = name_expr;
1747 name_expr = NULL;
1748 }
1749 else if (NCH(n) == 5) { /* Call with no arguments */
1750 d = Call(name_expr, NULL, NULL,
1751 name_expr->lineno, name_expr->col_offset,
1752 CHILD(n, 3)->n_end_lineno, CHILD(n, 3)->n_end_col_offset,
1753 c->c_arena);
1754 if (!d)
1755 return NULL;
1756 name_expr = NULL;
1757 }
1758 else {
1759 d = ast_for_call(c, CHILD(n, 3), name_expr, CHILD(n, 2), CHILD(n, 4));
1760 if (!d)
1761 return NULL;
1762 name_expr = NULL;
1763 }
1764
1765 return d;
1766 }
1767
1768 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)1769 ast_for_decorators(struct compiling *c, const node *n)
1770 {
1771 asdl_seq* decorator_seq;
1772 expr_ty d;
1773 int i;
1774
1775 REQ(n, decorators);
1776 decorator_seq = _Py_asdl_seq_new(NCH(n), c->c_arena);
1777 if (!decorator_seq)
1778 return NULL;
1779
1780 for (i = 0; i < NCH(n); i++) {
1781 d = ast_for_decorator(c, CHILD(n, i));
1782 if (!d)
1783 return NULL;
1784 asdl_seq_SET(decorator_seq, i, d);
1785 }
1786 return decorator_seq;
1787 }
1788
1789 static stmt_ty
ast_for_funcdef_impl(struct compiling * c,const node * n0,asdl_seq * decorator_seq,bool is_async)1790 ast_for_funcdef_impl(struct compiling *c, const node *n0,
1791 asdl_seq *decorator_seq, bool is_async)
1792 {
1793 /* funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] suite */
1794 const node * const n = is_async ? CHILD(n0, 1) : n0;
1795 identifier name;
1796 arguments_ty args;
1797 asdl_seq *body;
1798 expr_ty returns = NULL;
1799 int name_i = 1;
1800 int end_lineno, end_col_offset;
1801 node *tc;
1802 string type_comment = NULL;
1803
1804 if (is_async && c->c_feature_version < 5) {
1805 ast_error(c, n,
1806 "Async functions are only supported in Python 3.5 and greater");
1807 return NULL;
1808 }
1809
1810 REQ(n, funcdef);
1811
1812 name = NEW_IDENTIFIER(CHILD(n, name_i));
1813 if (!name)
1814 return NULL;
1815 if (forbidden_name(c, name, CHILD(n, name_i), 0))
1816 return NULL;
1817 args = ast_for_arguments(c, CHILD(n, name_i + 1));
1818 if (!args)
1819 return NULL;
1820 if (TYPE(CHILD(n, name_i+2)) == RARROW) {
1821 returns = ast_for_expr(c, CHILD(n, name_i + 3));
1822 if (!returns)
1823 return NULL;
1824 name_i += 2;
1825 }
1826 if (TYPE(CHILD(n, name_i + 3)) == TYPE_COMMENT) {
1827 type_comment = NEW_TYPE_COMMENT(CHILD(n, name_i + 3));
1828 if (!type_comment)
1829 return NULL;
1830 name_i += 1;
1831 }
1832 body = ast_for_suite(c, CHILD(n, name_i + 3));
1833 if (!body)
1834 return NULL;
1835 get_last_end_pos(body, &end_lineno, &end_col_offset);
1836
1837 if (NCH(CHILD(n, name_i + 3)) > 1) {
1838 /* Check if the suite has a type comment in it. */
1839 tc = CHILD(CHILD(n, name_i + 3), 1);
1840
1841 if (TYPE(tc) == TYPE_COMMENT) {
1842 if (type_comment != NULL) {
1843 ast_error(c, n, "Cannot have two type comments on def");
1844 return NULL;
1845 }
1846 type_comment = NEW_TYPE_COMMENT(tc);
1847 if (!type_comment)
1848 return NULL;
1849 }
1850 }
1851
1852 if (is_async)
1853 return AsyncFunctionDef(name, args, body, decorator_seq, returns, type_comment,
1854 LINENO(n0), n0->n_col_offset, end_lineno, end_col_offset, c->c_arena);
1855 else
1856 return FunctionDef(name, args, body, decorator_seq, returns, type_comment,
1857 LINENO(n), n->n_col_offset, end_lineno, end_col_offset, c->c_arena);
1858 }
1859
1860 static stmt_ty
ast_for_async_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1861 ast_for_async_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1862 {
1863 /* async_funcdef: ASYNC funcdef */
1864 REQ(n, async_funcdef);
1865 REQ(CHILD(n, 0), ASYNC);
1866 REQ(CHILD(n, 1), funcdef);
1867
1868 return ast_for_funcdef_impl(c, n, decorator_seq,
1869 true /* is_async */);
1870 }
1871
1872 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1873 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1874 {
1875 /* funcdef: 'def' NAME parameters ['->' test] ':' suite */
1876 return ast_for_funcdef_impl(c, n, decorator_seq,
1877 false /* is_async */);
1878 }
1879
1880
1881 static stmt_ty
ast_for_async_stmt(struct compiling * c,const node * n)1882 ast_for_async_stmt(struct compiling *c, const node *n)
1883 {
1884 /* async_stmt: ASYNC (funcdef | with_stmt | for_stmt) */
1885 REQ(n, async_stmt);
1886 REQ(CHILD(n, 0), ASYNC);
1887
1888 switch (TYPE(CHILD(n, 1))) {
1889 case funcdef:
1890 return ast_for_funcdef_impl(c, n, NULL,
1891 true /* is_async */);
1892 case with_stmt:
1893 return ast_for_with_stmt(c, n,
1894 true /* is_async */);
1895
1896 case for_stmt:
1897 return ast_for_for_stmt(c, n,
1898 true /* is_async */);
1899
1900 default:
1901 PyErr_Format(PyExc_SystemError,
1902 "invalid async stament: %s",
1903 STR(CHILD(n, 1)));
1904 return NULL;
1905 }
1906 }
1907
1908 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)1909 ast_for_decorated(struct compiling *c, const node *n)
1910 {
1911 /* decorated: decorators (classdef | funcdef | async_funcdef) */
1912 stmt_ty thing = NULL;
1913 asdl_seq *decorator_seq = NULL;
1914
1915 REQ(n, decorated);
1916
1917 decorator_seq = ast_for_decorators(c, CHILD(n, 0));
1918 if (!decorator_seq)
1919 return NULL;
1920
1921 assert(TYPE(CHILD(n, 1)) == funcdef ||
1922 TYPE(CHILD(n, 1)) == async_funcdef ||
1923 TYPE(CHILD(n, 1)) == classdef);
1924
1925 if (TYPE(CHILD(n, 1)) == funcdef) {
1926 thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
1927 } else if (TYPE(CHILD(n, 1)) == classdef) {
1928 thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
1929 } else if (TYPE(CHILD(n, 1)) == async_funcdef) {
1930 thing = ast_for_async_funcdef(c, CHILD(n, 1), decorator_seq);
1931 }
1932 return thing;
1933 }
1934
1935 static expr_ty
ast_for_namedexpr(struct compiling * c,const node * n)1936 ast_for_namedexpr(struct compiling *c, const node *n)
1937 {
1938 /* namedexpr_test: test [':=' test]
1939 argument: ( test [comp_for] |
1940 test ':=' test |
1941 test '=' test |
1942 '**' test |
1943 '*' test )
1944 */
1945 expr_ty target, value;
1946
1947 target = ast_for_expr(c, CHILD(n, 0));
1948 if (!target)
1949 return NULL;
1950
1951 value = ast_for_expr(c, CHILD(n, 2));
1952 if (!value)
1953 return NULL;
1954
1955 if (target->kind != Name_kind) {
1956 const char *expr_name = get_expr_name(target);
1957 if (expr_name != NULL) {
1958 ast_error(c, n, "cannot use named assignment with %s", expr_name);
1959 }
1960 return NULL;
1961 }
1962
1963 if (!set_context(c, target, Store, n))
1964 return NULL;
1965
1966 return NamedExpr(target, value, LINENO(n), n->n_col_offset, n->n_end_lineno,
1967 n->n_end_col_offset, c->c_arena);
1968 }
1969
1970 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)1971 ast_for_lambdef(struct compiling *c, const node *n)
1972 {
1973 /* lambdef: 'lambda' [varargslist] ':' test
1974 lambdef_nocond: 'lambda' [varargslist] ':' test_nocond */
1975 arguments_ty args;
1976 expr_ty expression;
1977
1978 if (NCH(n) == 3) {
1979 args = arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1980 if (!args)
1981 return NULL;
1982 expression = ast_for_expr(c, CHILD(n, 2));
1983 if (!expression)
1984 return NULL;
1985 }
1986 else {
1987 args = ast_for_arguments(c, CHILD(n, 1));
1988 if (!args)
1989 return NULL;
1990 expression = ast_for_expr(c, CHILD(n, 3));
1991 if (!expression)
1992 return NULL;
1993 }
1994
1995 return Lambda(args, expression, LINENO(n), n->n_col_offset,
1996 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
1997 }
1998
1999 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)2000 ast_for_ifexpr(struct compiling *c, const node *n)
2001 {
2002 /* test: or_test 'if' or_test 'else' test */
2003 expr_ty expression, body, orelse;
2004
2005 assert(NCH(n) == 5);
2006 body = ast_for_expr(c, CHILD(n, 0));
2007 if (!body)
2008 return NULL;
2009 expression = ast_for_expr(c, CHILD(n, 2));
2010 if (!expression)
2011 return NULL;
2012 orelse = ast_for_expr(c, CHILD(n, 4));
2013 if (!orelse)
2014 return NULL;
2015 return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
2016 n->n_end_lineno, n->n_end_col_offset,
2017 c->c_arena);
2018 }
2019
2020 /*
2021 Count the number of 'for' loops in a comprehension.
2022
2023 Helper for ast_for_comprehension().
2024 */
2025
2026 static int
count_comp_fors(struct compiling * c,const node * n)2027 count_comp_fors(struct compiling *c, const node *n)
2028 {
2029 int n_fors = 0;
2030
2031 count_comp_for:
2032 n_fors++;
2033 REQ(n, comp_for);
2034 if (NCH(n) == 2) {
2035 REQ(CHILD(n, 0), ASYNC);
2036 n = CHILD(n, 1);
2037 }
2038 else if (NCH(n) == 1) {
2039 n = CHILD(n, 0);
2040 }
2041 else {
2042 goto error;
2043 }
2044 if (NCH(n) == (5)) {
2045 n = CHILD(n, 4);
2046 }
2047 else {
2048 return n_fors;
2049 }
2050 count_comp_iter:
2051 REQ(n, comp_iter);
2052 n = CHILD(n, 0);
2053 if (TYPE(n) == comp_for)
2054 goto count_comp_for;
2055 else if (TYPE(n) == comp_if) {
2056 if (NCH(n) == 3) {
2057 n = CHILD(n, 2);
2058 goto count_comp_iter;
2059 }
2060 else
2061 return n_fors;
2062 }
2063
2064 error:
2065 /* Should never be reached */
2066 PyErr_SetString(PyExc_SystemError,
2067 "logic error in count_comp_fors");
2068 return -1;
2069 }
2070
2071 /* Count the number of 'if' statements in a comprehension.
2072
2073 Helper for ast_for_comprehension().
2074 */
2075
2076 static int
count_comp_ifs(struct compiling * c,const node * n)2077 count_comp_ifs(struct compiling *c, const node *n)
2078 {
2079 int n_ifs = 0;
2080
2081 while (1) {
2082 REQ(n, comp_iter);
2083 if (TYPE(CHILD(n, 0)) == comp_for)
2084 return n_ifs;
2085 n = CHILD(n, 0);
2086 REQ(n, comp_if);
2087 n_ifs++;
2088 if (NCH(n) == 2)
2089 return n_ifs;
2090 n = CHILD(n, 2);
2091 }
2092 }
2093
2094 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)2095 ast_for_comprehension(struct compiling *c, const node *n)
2096 {
2097 int i, n_fors;
2098 asdl_seq *comps;
2099
2100 n_fors = count_comp_fors(c, n);
2101 if (n_fors == -1)
2102 return NULL;
2103
2104 comps = _Py_asdl_seq_new(n_fors, c->c_arena);
2105 if (!comps)
2106 return NULL;
2107
2108 for (i = 0; i < n_fors; i++) {
2109 comprehension_ty comp;
2110 asdl_seq *t;
2111 expr_ty expression, first;
2112 node *for_ch;
2113 node *sync_n;
2114 int is_async = 0;
2115
2116 REQ(n, comp_for);
2117
2118 if (NCH(n) == 2) {
2119 is_async = 1;
2120 REQ(CHILD(n, 0), ASYNC);
2121 sync_n = CHILD(n, 1);
2122 }
2123 else {
2124 sync_n = CHILD(n, 0);
2125 }
2126 REQ(sync_n, sync_comp_for);
2127
2128 /* Async comprehensions only allowed in Python 3.6 and greater */
2129 if (is_async && c->c_feature_version < 6) {
2130 ast_error(c, n,
2131 "Async comprehensions are only supported in Python 3.6 and greater");
2132 return NULL;
2133 }
2134
2135 for_ch = CHILD(sync_n, 1);
2136 t = ast_for_exprlist(c, for_ch, Store);
2137 if (!t)
2138 return NULL;
2139 expression = ast_for_expr(c, CHILD(sync_n, 3));
2140 if (!expression)
2141 return NULL;
2142
2143 /* Check the # of children rather than the length of t, since
2144 (x for x, in ...) has 1 element in t, but still requires a Tuple. */
2145 first = (expr_ty)asdl_seq_GET(t, 0);
2146 if (NCH(for_ch) == 1)
2147 comp = comprehension(first, expression, NULL,
2148 is_async, c->c_arena);
2149 else
2150 comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
2151 for_ch->n_end_lineno, for_ch->n_end_col_offset,
2152 c->c_arena),
2153 expression, NULL, is_async, c->c_arena);
2154 if (!comp)
2155 return NULL;
2156
2157 if (NCH(sync_n) == 5) {
2158 int j, n_ifs;
2159 asdl_seq *ifs;
2160
2161 n = CHILD(sync_n, 4);
2162 n_ifs = count_comp_ifs(c, n);
2163 if (n_ifs == -1)
2164 return NULL;
2165
2166 ifs = _Py_asdl_seq_new(n_ifs, c->c_arena);
2167 if (!ifs)
2168 return NULL;
2169
2170 for (j = 0; j < n_ifs; j++) {
2171 REQ(n, comp_iter);
2172 n = CHILD(n, 0);
2173 REQ(n, comp_if);
2174
2175 expression = ast_for_expr(c, CHILD(n, 1));
2176 if (!expression)
2177 return NULL;
2178 asdl_seq_SET(ifs, j, expression);
2179 if (NCH(n) == 3)
2180 n = CHILD(n, 2);
2181 }
2182 /* on exit, must guarantee that n is a comp_for */
2183 if (TYPE(n) == comp_iter)
2184 n = CHILD(n, 0);
2185 comp->ifs = ifs;
2186 }
2187 asdl_seq_SET(comps, i, comp);
2188 }
2189 return comps;
2190 }
2191
2192 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)2193 ast_for_itercomp(struct compiling *c, const node *n, int type)
2194 {
2195 /* testlist_comp: (test|star_expr)
2196 * ( comp_for | (',' (test|star_expr))* [','] ) */
2197 expr_ty elt;
2198 asdl_seq *comps;
2199 node *ch;
2200
2201 assert(NCH(n) > 1);
2202
2203 ch = CHILD(n, 0);
2204 elt = ast_for_expr(c, ch);
2205 if (!elt)
2206 return NULL;
2207 if (elt->kind == Starred_kind) {
2208 ast_error(c, ch, "iterable unpacking cannot be used in comprehension");
2209 return NULL;
2210 }
2211
2212 comps = ast_for_comprehension(c, CHILD(n, 1));
2213 if (!comps)
2214 return NULL;
2215
2216 if (type == COMP_GENEXP)
2217 return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset,
2218 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2219 else if (type == COMP_LISTCOMP)
2220 return ListComp(elt, comps, LINENO(n), n->n_col_offset,
2221 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2222 else if (type == COMP_SETCOMP)
2223 return SetComp(elt, comps, LINENO(n), n->n_col_offset,
2224 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2225 else
2226 /* Should never happen */
2227 return NULL;
2228 }
2229
2230 /* Fills in the key, value pair corresponding to the dict element. In case
2231 * of an unpacking, key is NULL. *i is advanced by the number of ast
2232 * elements. Iff successful, nonzero is returned.
2233 */
2234 static int
ast_for_dictelement(struct compiling * c,const node * n,int * i,expr_ty * key,expr_ty * value)2235 ast_for_dictelement(struct compiling *c, const node *n, int *i,
2236 expr_ty *key, expr_ty *value)
2237 {
2238 expr_ty expression;
2239 if (TYPE(CHILD(n, *i)) == DOUBLESTAR) {
2240 assert(NCH(n) - *i >= 2);
2241
2242 expression = ast_for_expr(c, CHILD(n, *i + 1));
2243 if (!expression)
2244 return 0;
2245 *key = NULL;
2246 *value = expression;
2247
2248 *i += 2;
2249 }
2250 else {
2251 assert(NCH(n) - *i >= 3);
2252
2253 expression = ast_for_expr(c, CHILD(n, *i));
2254 if (!expression)
2255 return 0;
2256 *key = expression;
2257
2258 REQ(CHILD(n, *i + 1), COLON);
2259
2260 expression = ast_for_expr(c, CHILD(n, *i + 2));
2261 if (!expression)
2262 return 0;
2263 *value = expression;
2264
2265 *i += 3;
2266 }
2267 return 1;
2268 }
2269
2270 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)2271 ast_for_dictcomp(struct compiling *c, const node *n)
2272 {
2273 expr_ty key, value;
2274 asdl_seq *comps;
2275 int i = 0;
2276
2277 if (!ast_for_dictelement(c, n, &i, &key, &value))
2278 return NULL;
2279 assert(key);
2280 assert(NCH(n) - i >= 1);
2281
2282 comps = ast_for_comprehension(c, CHILD(n, i));
2283 if (!comps)
2284 return NULL;
2285
2286 return DictComp(key, value, comps, LINENO(n), n->n_col_offset,
2287 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2288 }
2289
2290 static expr_ty
ast_for_dictdisplay(struct compiling * c,const node * n)2291 ast_for_dictdisplay(struct compiling *c, const node *n)
2292 {
2293 int i;
2294 int j;
2295 int size;
2296 asdl_seq *keys, *values;
2297
2298 size = (NCH(n) + 1) / 3; /* +1 in case no trailing comma */
2299 keys = _Py_asdl_seq_new(size, c->c_arena);
2300 if (!keys)
2301 return NULL;
2302
2303 values = _Py_asdl_seq_new(size, c->c_arena);
2304 if (!values)
2305 return NULL;
2306
2307 j = 0;
2308 for (i = 0; i < NCH(n); i++) {
2309 expr_ty key, value;
2310
2311 if (!ast_for_dictelement(c, n, &i, &key, &value))
2312 return NULL;
2313 asdl_seq_SET(keys, j, key);
2314 asdl_seq_SET(values, j, value);
2315
2316 j++;
2317 }
2318 keys->size = j;
2319 values->size = j;
2320 return Dict(keys, values, LINENO(n), n->n_col_offset,
2321 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2322 }
2323
2324 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)2325 ast_for_genexp(struct compiling *c, const node *n)
2326 {
2327 assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
2328 return ast_for_itercomp(c, n, COMP_GENEXP);
2329 }
2330
2331 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)2332 ast_for_listcomp(struct compiling *c, const node *n)
2333 {
2334 assert(TYPE(n) == (testlist_comp));
2335 return ast_for_itercomp(c, n, COMP_LISTCOMP);
2336 }
2337
2338 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)2339 ast_for_setcomp(struct compiling *c, const node *n)
2340 {
2341 assert(TYPE(n) == (dictorsetmaker));
2342 return ast_for_itercomp(c, n, COMP_SETCOMP);
2343 }
2344
2345 static expr_ty
ast_for_setdisplay(struct compiling * c,const node * n)2346 ast_for_setdisplay(struct compiling *c, const node *n)
2347 {
2348 int i;
2349 int size;
2350 asdl_seq *elts;
2351
2352 assert(TYPE(n) == (dictorsetmaker));
2353 size = (NCH(n) + 1) / 2; /* +1 in case no trailing comma */
2354 elts = _Py_asdl_seq_new(size, c->c_arena);
2355 if (!elts)
2356 return NULL;
2357 for (i = 0; i < NCH(n); i += 2) {
2358 expr_ty expression;
2359 expression = ast_for_expr(c, CHILD(n, i));
2360 if (!expression)
2361 return NULL;
2362 asdl_seq_SET(elts, i / 2, expression);
2363 }
2364 return Set(elts, LINENO(n), n->n_col_offset,
2365 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2366 }
2367
2368 static expr_ty
ast_for_atom(struct compiling * c,const node * n)2369 ast_for_atom(struct compiling *c, const node *n)
2370 {
2371 /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']'
2372 | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+
2373 | '...' | 'None' | 'True' | 'False'
2374 */
2375 node *ch = CHILD(n, 0);
2376
2377 switch (TYPE(ch)) {
2378 case NAME: {
2379 PyObject *name;
2380 const char *s = STR(ch);
2381 size_t len = strlen(s);
2382 if (len >= 4 && len <= 5) {
2383 if (!strcmp(s, "None"))
2384 return Constant(Py_None, NULL, LINENO(n), n->n_col_offset,
2385 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2386 if (!strcmp(s, "True"))
2387 return Constant(Py_True, NULL, LINENO(n), n->n_col_offset,
2388 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2389 if (!strcmp(s, "False"))
2390 return Constant(Py_False, NULL, LINENO(n), n->n_col_offset,
2391 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2392 }
2393 name = new_identifier(s, c);
2394 if (!name)
2395 return NULL;
2396 /* All names start in Load context, but may later be changed. */
2397 return Name(name, Load, LINENO(n), n->n_col_offset,
2398 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2399 }
2400 case STRING: {
2401 expr_ty str = parsestrplus(c, n);
2402 if (!str) {
2403 const char *errtype = NULL;
2404 if (PyErr_ExceptionMatches(PyExc_UnicodeError))
2405 errtype = "unicode error";
2406 else if (PyErr_ExceptionMatches(PyExc_ValueError))
2407 errtype = "value error";
2408 if (errtype) {
2409 PyObject *type, *value, *tback, *errstr;
2410 PyErr_Fetch(&type, &value, &tback);
2411 errstr = PyObject_Str(value);
2412 if (errstr) {
2413 ast_error(c, n, "(%s) %U", errtype, errstr);
2414 Py_DECREF(errstr);
2415 }
2416 else {
2417 PyErr_Clear();
2418 ast_error(c, n, "(%s) unknown error", errtype);
2419 }
2420 Py_DECREF(type);
2421 Py_XDECREF(value);
2422 Py_XDECREF(tback);
2423 }
2424 return NULL;
2425 }
2426 return str;
2427 }
2428 case NUMBER: {
2429 PyObject *pynum;
2430 /* Underscores in numeric literals are only allowed in Python 3.6 or greater */
2431 /* Check for underscores here rather than in parse_number so we can report a line number on error */
2432 if (c->c_feature_version < 6 && strchr(STR(ch), '_') != NULL) {
2433 ast_error(c, ch,
2434 "Underscores in numeric literals are only supported in Python 3.6 and greater");
2435 return NULL;
2436 }
2437 pynum = parsenumber(c, STR(ch));
2438 if (!pynum)
2439 return NULL;
2440
2441 if (PyArena_AddPyObject(c->c_arena, pynum) < 0) {
2442 Py_DECREF(pynum);
2443 return NULL;
2444 }
2445 return Constant(pynum, NULL, LINENO(n), n->n_col_offset,
2446 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2447 }
2448 case ELLIPSIS: /* Ellipsis */
2449 return Constant(Py_Ellipsis, NULL, LINENO(n), n->n_col_offset,
2450 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2451 case LPAR: /* some parenthesized expressions */
2452 ch = CHILD(n, 1);
2453
2454 if (TYPE(ch) == RPAR)
2455 return Tuple(NULL, Load, LINENO(n), n->n_col_offset,
2456 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2457
2458 if (TYPE(ch) == yield_expr)
2459 return ast_for_expr(c, ch);
2460
2461 /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2462 if (NCH(ch) == 1) {
2463 return ast_for_testlist(c, ch);
2464 }
2465
2466 if (TYPE(CHILD(ch, 1)) == comp_for) {
2467 return copy_location(ast_for_genexp(c, ch), n);
2468 }
2469 else {
2470 return copy_location(ast_for_testlist(c, ch), n);
2471 }
2472 case LSQB: /* list (or list comprehension) */
2473 ch = CHILD(n, 1);
2474
2475 if (TYPE(ch) == RSQB)
2476 return List(NULL, Load, LINENO(n), n->n_col_offset,
2477 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2478
2479 REQ(ch, testlist_comp);
2480 if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
2481 asdl_seq *elts = seq_for_testlist(c, ch);
2482 if (!elts)
2483 return NULL;
2484
2485 return List(elts, Load, LINENO(n), n->n_col_offset,
2486 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2487 }
2488 else {
2489 return copy_location(ast_for_listcomp(c, ch), n);
2490 }
2491 case LBRACE: {
2492 /* dictorsetmaker: ( ((test ':' test | '**' test)
2493 * (comp_for | (',' (test ':' test | '**' test))* [','])) |
2494 * ((test | '*' test)
2495 * (comp_for | (',' (test | '*' test))* [','])) ) */
2496 expr_ty res;
2497 ch = CHILD(n, 1);
2498 if (TYPE(ch) == RBRACE) {
2499 /* It's an empty dict. */
2500 return Dict(NULL, NULL, LINENO(n), n->n_col_offset,
2501 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2502 }
2503 else {
2504 int is_dict = (TYPE(CHILD(ch, 0)) == DOUBLESTAR);
2505 if (NCH(ch) == 1 ||
2506 (NCH(ch) > 1 &&
2507 TYPE(CHILD(ch, 1)) == COMMA)) {
2508 /* It's a set display. */
2509 res = ast_for_setdisplay(c, ch);
2510 }
2511 else if (NCH(ch) > 1 &&
2512 TYPE(CHILD(ch, 1)) == comp_for) {
2513 /* It's a set comprehension. */
2514 res = ast_for_setcomp(c, ch);
2515 }
2516 else if (NCH(ch) > 3 - is_dict &&
2517 TYPE(CHILD(ch, 3 - is_dict)) == comp_for) {
2518 /* It's a dictionary comprehension. */
2519 if (is_dict) {
2520 ast_error(c, n,
2521 "dict unpacking cannot be used in dict comprehension");
2522 return NULL;
2523 }
2524 res = ast_for_dictcomp(c, ch);
2525 }
2526 else {
2527 /* It's a dictionary display. */
2528 res = ast_for_dictdisplay(c, ch);
2529 }
2530 return copy_location(res, n);
2531 }
2532 }
2533 default:
2534 PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
2535 return NULL;
2536 }
2537 }
2538
2539 static slice_ty
ast_for_slice(struct compiling * c,const node * n)2540 ast_for_slice(struct compiling *c, const node *n)
2541 {
2542 node *ch;
2543 expr_ty lower = NULL, upper = NULL, step = NULL;
2544
2545 REQ(n, subscript);
2546
2547 /*
2548 subscript: test | [test] ':' [test] [sliceop]
2549 sliceop: ':' [test]
2550 */
2551 ch = CHILD(n, 0);
2552 if (NCH(n) == 1 && TYPE(ch) == test) {
2553 /* 'step' variable hold no significance in terms of being used over
2554 other vars */
2555 step = ast_for_expr(c, ch);
2556 if (!step)
2557 return NULL;
2558
2559 return Index(step, c->c_arena);
2560 }
2561
2562 if (TYPE(ch) == test) {
2563 lower = ast_for_expr(c, ch);
2564 if (!lower)
2565 return NULL;
2566 }
2567
2568 /* If there's an upper bound it's in the second or third position. */
2569 if (TYPE(ch) == COLON) {
2570 if (NCH(n) > 1) {
2571 node *n2 = CHILD(n, 1);
2572
2573 if (TYPE(n2) == test) {
2574 upper = ast_for_expr(c, n2);
2575 if (!upper)
2576 return NULL;
2577 }
2578 }
2579 } else if (NCH(n) > 2) {
2580 node *n2 = CHILD(n, 2);
2581
2582 if (TYPE(n2) == test) {
2583 upper = ast_for_expr(c, n2);
2584 if (!upper)
2585 return NULL;
2586 }
2587 }
2588
2589 ch = CHILD(n, NCH(n) - 1);
2590 if (TYPE(ch) == sliceop) {
2591 if (NCH(ch) != 1) {
2592 ch = CHILD(ch, 1);
2593 if (TYPE(ch) == test) {
2594 step = ast_for_expr(c, ch);
2595 if (!step)
2596 return NULL;
2597 }
2598 }
2599 }
2600
2601 return Slice(lower, upper, step, c->c_arena);
2602 }
2603
2604 static expr_ty
ast_for_binop(struct compiling * c,const node * n)2605 ast_for_binop(struct compiling *c, const node *n)
2606 {
2607 /* Must account for a sequence of expressions.
2608 How should A op B op C by represented?
2609 BinOp(BinOp(A, op, B), op, C).
2610 */
2611
2612 int i, nops;
2613 expr_ty expr1, expr2, result;
2614 operator_ty newoperator;
2615
2616 expr1 = ast_for_expr(c, CHILD(n, 0));
2617 if (!expr1)
2618 return NULL;
2619
2620 expr2 = ast_for_expr(c, CHILD(n, 2));
2621 if (!expr2)
2622 return NULL;
2623
2624 newoperator = get_operator(c, CHILD(n, 1));
2625 if (!newoperator)
2626 return NULL;
2627
2628 result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2629 CHILD(n, 2)->n_end_lineno, CHILD(n, 2)->n_end_col_offset,
2630 c->c_arena);
2631 if (!result)
2632 return NULL;
2633
2634 nops = (NCH(n) - 1) / 2;
2635 for (i = 1; i < nops; i++) {
2636 expr_ty tmp_result, tmp;
2637 const node* next_oper = CHILD(n, i * 2 + 1);
2638
2639 newoperator = get_operator(c, next_oper);
2640 if (!newoperator)
2641 return NULL;
2642
2643 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
2644 if (!tmp)
2645 return NULL;
2646
2647 tmp_result = BinOp(result, newoperator, tmp,
2648 LINENO(n), n->n_col_offset,
2649 CHILD(n, i * 2 + 2)->n_end_lineno,
2650 CHILD(n, i * 2 + 2)->n_end_col_offset,
2651 c->c_arena);
2652 if (!tmp_result)
2653 return NULL;
2654 result = tmp_result;
2655 }
2656 return result;
2657 }
2658
2659 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr)2660 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr)
2661 {
2662 /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
2663 subscriptlist: subscript (',' subscript)* [',']
2664 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
2665 */
2666 const node *n_copy = n;
2667 REQ(n, trailer);
2668 if (TYPE(CHILD(n, 0)) == LPAR) {
2669 if (NCH(n) == 2)
2670 return Call(left_expr, NULL, NULL, LINENO(n), n->n_col_offset,
2671 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2672 else
2673 return ast_for_call(c, CHILD(n, 1), left_expr, CHILD(n, 0), CHILD(n, 2));
2674 }
2675 else if (TYPE(CHILD(n, 0)) == DOT) {
2676 PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
2677 if (!attr_id)
2678 return NULL;
2679 return Attribute(left_expr, attr_id, Load,
2680 LINENO(n), n->n_col_offset,
2681 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2682 }
2683 else {
2684 REQ(CHILD(n, 0), LSQB);
2685 REQ(CHILD(n, 2), RSQB);
2686 n = CHILD(n, 1);
2687 if (NCH(n) == 1) {
2688 slice_ty slc = ast_for_slice(c, CHILD(n, 0));
2689 if (!slc)
2690 return NULL;
2691 return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset,
2692 n_copy->n_end_lineno, n_copy->n_end_col_offset,
2693 c->c_arena);
2694 }
2695 else {
2696 /* The grammar is ambiguous here. The ambiguity is resolved
2697 by treating the sequence as a tuple literal if there are
2698 no slice features.
2699 */
2700 Py_ssize_t j;
2701 slice_ty slc;
2702 expr_ty e;
2703 int simple = 1;
2704 asdl_seq *slices, *elts;
2705 slices = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2706 if (!slices)
2707 return NULL;
2708 for (j = 0; j < NCH(n); j += 2) {
2709 slc = ast_for_slice(c, CHILD(n, j));
2710 if (!slc)
2711 return NULL;
2712 if (slc->kind != Index_kind)
2713 simple = 0;
2714 asdl_seq_SET(slices, j / 2, slc);
2715 }
2716 if (!simple) {
2717 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
2718 Load, LINENO(n), n->n_col_offset,
2719 n_copy->n_end_lineno, n_copy->n_end_col_offset, c->c_arena);
2720 }
2721 /* extract Index values and put them in a Tuple */
2722 elts = _Py_asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
2723 if (!elts)
2724 return NULL;
2725 for (j = 0; j < asdl_seq_LEN(slices); ++j) {
2726 slc = (slice_ty)asdl_seq_GET(slices, j);
2727 assert(slc->kind == Index_kind && slc->v.Index.value);
2728 asdl_seq_SET(elts, j, slc->v.Index.value);
2729 }
2730 e = Tuple(elts, Load, LINENO(n), n->n_col_offset,
2731 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2732 if (!e)
2733 return NULL;
2734 return Subscript(left_expr, Index(e, c->c_arena),
2735 Load, LINENO(n), n->n_col_offset,
2736 n_copy->n_end_lineno, n_copy->n_end_col_offset, c->c_arena);
2737 }
2738 }
2739 }
2740
2741 static expr_ty
ast_for_factor(struct compiling * c,const node * n)2742 ast_for_factor(struct compiling *c, const node *n)
2743 {
2744 expr_ty expression;
2745
2746 expression = ast_for_expr(c, CHILD(n, 1));
2747 if (!expression)
2748 return NULL;
2749
2750 switch (TYPE(CHILD(n, 0))) {
2751 case PLUS:
2752 return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
2753 n->n_end_lineno, n->n_end_col_offset,
2754 c->c_arena);
2755 case MINUS:
2756 return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
2757 n->n_end_lineno, n->n_end_col_offset,
2758 c->c_arena);
2759 case TILDE:
2760 return UnaryOp(Invert, expression, LINENO(n), n->n_col_offset,
2761 n->n_end_lineno, n->n_end_col_offset,
2762 c->c_arena);
2763 }
2764 PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
2765 TYPE(CHILD(n, 0)));
2766 return NULL;
2767 }
2768
2769 static expr_ty
ast_for_atom_expr(struct compiling * c,const node * n)2770 ast_for_atom_expr(struct compiling *c, const node *n)
2771 {
2772 int i, nch, start = 0;
2773 expr_ty e, tmp;
2774
2775 REQ(n, atom_expr);
2776 nch = NCH(n);
2777
2778 if (TYPE(CHILD(n, 0)) == AWAIT) {
2779 if (c->c_feature_version < 5) {
2780 ast_error(c, n,
2781 "Await expressions are only supported in Python 3.5 and greater");
2782 return NULL;
2783 }
2784 start = 1;
2785 assert(nch > 1);
2786 }
2787
2788 e = ast_for_atom(c, CHILD(n, start));
2789 if (!e)
2790 return NULL;
2791 if (nch == 1)
2792 return e;
2793 if (start && nch == 2) {
2794 return Await(e, LINENO(n), n->n_col_offset,
2795 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2796 }
2797
2798 for (i = start + 1; i < nch; i++) {
2799 node *ch = CHILD(n, i);
2800 if (TYPE(ch) != trailer)
2801 break;
2802 tmp = ast_for_trailer(c, ch, e);
2803 if (!tmp)
2804 return NULL;
2805 tmp->lineno = e->lineno;
2806 tmp->col_offset = e->col_offset;
2807 e = tmp;
2808 }
2809
2810 if (start) {
2811 /* there was an 'await' */
2812 return Await(e, LINENO(n), n->n_col_offset,
2813 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2814 }
2815 else {
2816 return e;
2817 }
2818 }
2819
2820 static expr_ty
ast_for_power(struct compiling * c,const node * n)2821 ast_for_power(struct compiling *c, const node *n)
2822 {
2823 /* power: atom trailer* ('**' factor)*
2824 */
2825 expr_ty e;
2826 REQ(n, power);
2827 e = ast_for_atom_expr(c, CHILD(n, 0));
2828 if (!e)
2829 return NULL;
2830 if (NCH(n) == 1)
2831 return e;
2832 if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
2833 expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
2834 if (!f)
2835 return NULL;
2836 e = BinOp(e, Pow, f, LINENO(n), n->n_col_offset,
2837 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2838 }
2839 return e;
2840 }
2841
2842 static expr_ty
ast_for_starred(struct compiling * c,const node * n)2843 ast_for_starred(struct compiling *c, const node *n)
2844 {
2845 expr_ty tmp;
2846 REQ(n, star_expr);
2847
2848 tmp = ast_for_expr(c, CHILD(n, 1));
2849 if (!tmp)
2850 return NULL;
2851
2852 /* The Load context is changed later. */
2853 return Starred(tmp, Load, LINENO(n), n->n_col_offset,
2854 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2855 }
2856
2857
2858 /* Do not name a variable 'expr'! Will cause a compile error.
2859 */
2860
2861 static expr_ty
ast_for_expr(struct compiling * c,const node * n)2862 ast_for_expr(struct compiling *c, const node *n)
2863 {
2864 /* handle the full range of simple expressions
2865 namedexpr_test: test [':=' test]
2866 test: or_test ['if' or_test 'else' test] | lambdef
2867 test_nocond: or_test | lambdef_nocond
2868 or_test: and_test ('or' and_test)*
2869 and_test: not_test ('and' not_test)*
2870 not_test: 'not' not_test | comparison
2871 comparison: expr (comp_op expr)*
2872 expr: xor_expr ('|' xor_expr)*
2873 xor_expr: and_expr ('^' and_expr)*
2874 and_expr: shift_expr ('&' shift_expr)*
2875 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
2876 arith_expr: term (('+'|'-') term)*
2877 term: factor (('*'|'@'|'/'|'%'|'//') factor)*
2878 factor: ('+'|'-'|'~') factor | power
2879 power: atom_expr ['**' factor]
2880 atom_expr: [AWAIT] atom trailer*
2881 yield_expr: 'yield' [yield_arg]
2882 */
2883
2884 asdl_seq *seq;
2885 int i;
2886
2887 loop:
2888 switch (TYPE(n)) {
2889 case namedexpr_test:
2890 if (NCH(n) == 3)
2891 return ast_for_namedexpr(c, n);
2892 /* Fallthrough */
2893 case test:
2894 case test_nocond:
2895 if (TYPE(CHILD(n, 0)) == lambdef ||
2896 TYPE(CHILD(n, 0)) == lambdef_nocond)
2897 return ast_for_lambdef(c, CHILD(n, 0));
2898 else if (NCH(n) > 1)
2899 return ast_for_ifexpr(c, n);
2900 /* Fallthrough */
2901 case or_test:
2902 case and_test:
2903 if (NCH(n) == 1) {
2904 n = CHILD(n, 0);
2905 goto loop;
2906 }
2907 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2908 if (!seq)
2909 return NULL;
2910 for (i = 0; i < NCH(n); i += 2) {
2911 expr_ty e = ast_for_expr(c, CHILD(n, i));
2912 if (!e)
2913 return NULL;
2914 asdl_seq_SET(seq, i / 2, e);
2915 }
2916 if (!strcmp(STR(CHILD(n, 1)), "and"))
2917 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
2918 n->n_end_lineno, n->n_end_col_offset,
2919 c->c_arena);
2920 assert(!strcmp(STR(CHILD(n, 1)), "or"));
2921 return BoolOp(Or, seq, LINENO(n), n->n_col_offset,
2922 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2923 case not_test:
2924 if (NCH(n) == 1) {
2925 n = CHILD(n, 0);
2926 goto loop;
2927 }
2928 else {
2929 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2930 if (!expression)
2931 return NULL;
2932
2933 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
2934 n->n_end_lineno, n->n_end_col_offset,
2935 c->c_arena);
2936 }
2937 case comparison:
2938 if (NCH(n) == 1) {
2939 n = CHILD(n, 0);
2940 goto loop;
2941 }
2942 else {
2943 expr_ty expression;
2944 asdl_int_seq *ops;
2945 asdl_seq *cmps;
2946 ops = _Py_asdl_int_seq_new(NCH(n) / 2, c->c_arena);
2947 if (!ops)
2948 return NULL;
2949 cmps = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
2950 if (!cmps) {
2951 return NULL;
2952 }
2953 for (i = 1; i < NCH(n); i += 2) {
2954 cmpop_ty newoperator;
2955
2956 newoperator = ast_for_comp_op(c, CHILD(n, i));
2957 if (!newoperator) {
2958 return NULL;
2959 }
2960
2961 expression = ast_for_expr(c, CHILD(n, i + 1));
2962 if (!expression) {
2963 return NULL;
2964 }
2965
2966 asdl_seq_SET(ops, i / 2, newoperator);
2967 asdl_seq_SET(cmps, i / 2, expression);
2968 }
2969 expression = ast_for_expr(c, CHILD(n, 0));
2970 if (!expression) {
2971 return NULL;
2972 }
2973
2974 return Compare(expression, ops, cmps, LINENO(n), n->n_col_offset,
2975 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2976 }
2977
2978 case star_expr:
2979 return ast_for_starred(c, n);
2980 /* The next five cases all handle BinOps. The main body of code
2981 is the same in each case, but the switch turned inside out to
2982 reuse the code for each type of operator.
2983 */
2984 case expr:
2985 case xor_expr:
2986 case and_expr:
2987 case shift_expr:
2988 case arith_expr:
2989 case term:
2990 if (NCH(n) == 1) {
2991 n = CHILD(n, 0);
2992 goto loop;
2993 }
2994 return ast_for_binop(c, n);
2995 case yield_expr: {
2996 node *an = NULL;
2997 node *en = NULL;
2998 int is_from = 0;
2999 expr_ty exp = NULL;
3000 if (NCH(n) > 1)
3001 an = CHILD(n, 1); /* yield_arg */
3002 if (an) {
3003 en = CHILD(an, NCH(an) - 1);
3004 if (NCH(an) == 2) {
3005 is_from = 1;
3006 exp = ast_for_expr(c, en);
3007 }
3008 else
3009 exp = ast_for_testlist(c, en);
3010 if (!exp)
3011 return NULL;
3012 }
3013 if (is_from)
3014 return YieldFrom(exp, LINENO(n), n->n_col_offset,
3015 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3016 return Yield(exp, LINENO(n), n->n_col_offset,
3017 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3018 }
3019 case factor:
3020 if (NCH(n) == 1) {
3021 n = CHILD(n, 0);
3022 goto loop;
3023 }
3024 return ast_for_factor(c, n);
3025 case power:
3026 return ast_for_power(c, n);
3027 default:
3028 PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
3029 return NULL;
3030 }
3031 /* should never get here unless if error is set */
3032 return NULL;
3033 }
3034
3035 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func,const node * maybegenbeg,const node * closepar)3036 ast_for_call(struct compiling *c, const node *n, expr_ty func,
3037 const node *maybegenbeg, const node *closepar)
3038 {
3039 /*
3040 arglist: argument (',' argument)* [',']
3041 argument: ( test [comp_for] | '*' test | test '=' test | '**' test )
3042 */
3043
3044 int i, nargs, nkeywords;
3045 int ndoublestars;
3046 asdl_seq *args;
3047 asdl_seq *keywords;
3048
3049 REQ(n, arglist);
3050
3051 nargs = 0;
3052 nkeywords = 0;
3053 for (i = 0; i < NCH(n); i++) {
3054 node *ch = CHILD(n, i);
3055 if (TYPE(ch) == argument) {
3056 if (NCH(ch) == 1)
3057 nargs++;
3058 else if (TYPE(CHILD(ch, 1)) == comp_for) {
3059 nargs++;
3060 if (!maybegenbeg) {
3061 ast_error(c, ch, "invalid syntax");
3062 return NULL;
3063 }
3064 if (NCH(n) > 1) {
3065 ast_error(c, ch, "Generator expression must be parenthesized");
3066 return NULL;
3067 }
3068 }
3069 else if (TYPE(CHILD(ch, 0)) == STAR)
3070 nargs++;
3071 else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
3072 nargs++;
3073 }
3074 else
3075 /* TYPE(CHILD(ch, 0)) == DOUBLESTAR or keyword argument */
3076 nkeywords++;
3077 }
3078 }
3079
3080 args = _Py_asdl_seq_new(nargs, c->c_arena);
3081 if (!args)
3082 return NULL;
3083 keywords = _Py_asdl_seq_new(nkeywords, c->c_arena);
3084 if (!keywords)
3085 return NULL;
3086
3087 nargs = 0; /* positional arguments + iterable argument unpackings */
3088 nkeywords = 0; /* keyword arguments + keyword argument unpackings */
3089 ndoublestars = 0; /* just keyword argument unpackings */
3090 for (i = 0; i < NCH(n); i++) {
3091 node *ch = CHILD(n, i);
3092 if (TYPE(ch) == argument) {
3093 expr_ty e;
3094 node *chch = CHILD(ch, 0);
3095 if (NCH(ch) == 1) {
3096 /* a positional argument */
3097 if (nkeywords) {
3098 if (ndoublestars) {
3099 ast_error(c, chch,
3100 "positional argument follows "
3101 "keyword argument unpacking");
3102 }
3103 else {
3104 ast_error(c, chch,
3105 "positional argument follows "
3106 "keyword argument");
3107 }
3108 return NULL;
3109 }
3110 e = ast_for_expr(c, chch);
3111 if (!e)
3112 return NULL;
3113 asdl_seq_SET(args, nargs++, e);
3114 }
3115 else if (TYPE(chch) == STAR) {
3116 /* an iterable argument unpacking */
3117 expr_ty starred;
3118 if (ndoublestars) {
3119 ast_error(c, chch,
3120 "iterable argument unpacking follows "
3121 "keyword argument unpacking");
3122 return NULL;
3123 }
3124 e = ast_for_expr(c, CHILD(ch, 1));
3125 if (!e)
3126 return NULL;
3127 starred = Starred(e, Load, LINENO(chch),
3128 chch->n_col_offset,
3129 e->end_lineno, e->end_col_offset,
3130 c->c_arena);
3131 if (!starred)
3132 return NULL;
3133 asdl_seq_SET(args, nargs++, starred);
3134
3135 }
3136 else if (TYPE(chch) == DOUBLESTAR) {
3137 /* a keyword argument unpacking */
3138 keyword_ty kw;
3139 i++;
3140 e = ast_for_expr(c, CHILD(ch, 1));
3141 if (!e)
3142 return NULL;
3143 kw = keyword(NULL, e, c->c_arena);
3144 asdl_seq_SET(keywords, nkeywords++, kw);
3145 ndoublestars++;
3146 }
3147 else if (TYPE(CHILD(ch, 1)) == comp_for) {
3148 /* the lone generator expression */
3149 e = copy_location(ast_for_genexp(c, ch), maybegenbeg);
3150 if (!e)
3151 return NULL;
3152 asdl_seq_SET(args, nargs++, e);
3153 }
3154 else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
3155 /* treat colon equal as positional argument */
3156 if (nkeywords) {
3157 if (ndoublestars) {
3158 ast_error(c, chch,
3159 "positional argument follows "
3160 "keyword argument unpacking");
3161 }
3162 else {
3163 ast_error(c, chch,
3164 "positional argument follows "
3165 "keyword argument");
3166 }
3167 return NULL;
3168 }
3169 e = ast_for_namedexpr(c, ch);
3170 if (!e)
3171 return NULL;
3172 asdl_seq_SET(args, nargs++, e);
3173 }
3174 else {
3175 /* a keyword argument */
3176 keyword_ty kw;
3177 identifier key, tmp;
3178 int k;
3179
3180 // To remain LL(1), the grammar accepts any test (basically, any
3181 // expression) in the keyword slot of a call site. So, we need
3182 // to manually enforce that the keyword is a NAME here.
3183 static const int name_tree[] = {
3184 test,
3185 or_test,
3186 and_test,
3187 not_test,
3188 comparison,
3189 expr,
3190 xor_expr,
3191 and_expr,
3192 shift_expr,
3193 arith_expr,
3194 term,
3195 factor,
3196 power,
3197 atom_expr,
3198 atom,
3199 0,
3200 };
3201 node *expr_node = chch;
3202 for (int i = 0; name_tree[i]; i++) {
3203 if (TYPE(expr_node) != name_tree[i])
3204 break;
3205 if (NCH(expr_node) != 1)
3206 break;
3207 expr_node = CHILD(expr_node, 0);
3208 }
3209 if (TYPE(expr_node) != NAME) {
3210 ast_error(c, chch,
3211 "expression cannot contain assignment, "
3212 "perhaps you meant \"==\"?");
3213 return NULL;
3214 }
3215 key = new_identifier(STR(expr_node), c);
3216 if (key == NULL) {
3217 return NULL;
3218 }
3219 if (forbidden_name(c, key, chch, 1)) {
3220 return NULL;
3221 }
3222 for (k = 0; k < nkeywords; k++) {
3223 tmp = ((keyword_ty)asdl_seq_GET(keywords, k))->arg;
3224 if (tmp && !PyUnicode_Compare(tmp, key)) {
3225 ast_error(c, chch,
3226 "keyword argument repeated");
3227 return NULL;
3228 }
3229 }
3230 e = ast_for_expr(c, CHILD(ch, 2));
3231 if (!e)
3232 return NULL;
3233 kw = keyword(key, e, c->c_arena);
3234 if (!kw)
3235 return NULL;
3236 asdl_seq_SET(keywords, nkeywords++, kw);
3237 }
3238 }
3239 }
3240
3241 return Call(func, args, keywords, func->lineno, func->col_offset,
3242 closepar->n_end_lineno, closepar->n_end_col_offset, c->c_arena);
3243 }
3244
3245 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)3246 ast_for_testlist(struct compiling *c, const node* n)
3247 {
3248 /* testlist_comp: test (comp_for | (',' test)* [',']) */
3249 /* testlist: test (',' test)* [','] */
3250 assert(NCH(n) > 0);
3251 if (TYPE(n) == testlist_comp) {
3252 if (NCH(n) > 1)
3253 assert(TYPE(CHILD(n, 1)) != comp_for);
3254 }
3255 else {
3256 assert(TYPE(n) == testlist ||
3257 TYPE(n) == testlist_star_expr);
3258 }
3259 if (NCH(n) == 1)
3260 return ast_for_expr(c, CHILD(n, 0));
3261 else {
3262 asdl_seq *tmp = seq_for_testlist(c, n);
3263 if (!tmp)
3264 return NULL;
3265 return Tuple(tmp, Load, LINENO(n), n->n_col_offset,
3266 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3267 }
3268 }
3269
3270 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)3271 ast_for_expr_stmt(struct compiling *c, const node *n)
3272 {
3273 REQ(n, expr_stmt);
3274 /* expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
3275 [('=' (yield_expr|testlist_star_expr))+ [TYPE_COMMENT]] )
3276 annassign: ':' test ['=' (yield_expr|testlist)]
3277 testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
3278 augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
3279 '<<=' | '>>=' | '**=' | '//=')
3280 test: ... here starts the operator precedence dance
3281 */
3282 int num = NCH(n);
3283
3284 if (num == 1) {
3285 expr_ty e = ast_for_testlist(c, CHILD(n, 0));
3286 if (!e)
3287 return NULL;
3288
3289 return Expr(e, LINENO(n), n->n_col_offset,
3290 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3291 }
3292 else if (TYPE(CHILD(n, 1)) == augassign) {
3293 expr_ty expr1, expr2;
3294 operator_ty newoperator;
3295 node *ch = CHILD(n, 0);
3296
3297 expr1 = ast_for_testlist(c, ch);
3298 if (!expr1)
3299 return NULL;
3300 if(!set_context(c, expr1, Store, ch))
3301 return NULL;
3302 /* set_context checks that most expressions are not the left side.
3303 Augmented assignments can only have a name, a subscript, or an
3304 attribute on the left, though, so we have to explicitly check for
3305 those. */
3306 switch (expr1->kind) {
3307 case Name_kind:
3308 case Attribute_kind:
3309 case Subscript_kind:
3310 break;
3311 default:
3312 ast_error(c, ch, "illegal expression for augmented assignment");
3313 return NULL;
3314 }
3315
3316 ch = CHILD(n, 2);
3317 if (TYPE(ch) == testlist)
3318 expr2 = ast_for_testlist(c, ch);
3319 else
3320 expr2 = ast_for_expr(c, ch);
3321 if (!expr2)
3322 return NULL;
3323
3324 newoperator = ast_for_augassign(c, CHILD(n, 1));
3325 if (!newoperator)
3326 return NULL;
3327
3328 return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
3329 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3330 }
3331 else if (TYPE(CHILD(n, 1)) == annassign) {
3332 expr_ty expr1, expr2, expr3;
3333 node *ch = CHILD(n, 0);
3334 node *deep, *ann = CHILD(n, 1);
3335 int simple = 1;
3336
3337 /* AnnAssigns are only allowed in Python 3.6 or greater */
3338 if (c->c_feature_version < 6) {
3339 ast_error(c, ch,
3340 "Variable annotation syntax is only supported in Python 3.6 and greater");
3341 return NULL;
3342 }
3343
3344 /* we keep track of parens to qualify (x) as expression not name */
3345 deep = ch;
3346 while (NCH(deep) == 1) {
3347 deep = CHILD(deep, 0);
3348 }
3349 if (NCH(deep) > 0 && TYPE(CHILD(deep, 0)) == LPAR) {
3350 simple = 0;
3351 }
3352 expr1 = ast_for_testlist(c, ch);
3353 if (!expr1) {
3354 return NULL;
3355 }
3356 switch (expr1->kind) {
3357 case Name_kind:
3358 if (forbidden_name(c, expr1->v.Name.id, n, 0)) {
3359 return NULL;
3360 }
3361 expr1->v.Name.ctx = Store;
3362 break;
3363 case Attribute_kind:
3364 if (forbidden_name(c, expr1->v.Attribute.attr, n, 1)) {
3365 return NULL;
3366 }
3367 expr1->v.Attribute.ctx = Store;
3368 break;
3369 case Subscript_kind:
3370 expr1->v.Subscript.ctx = Store;
3371 break;
3372 case List_kind:
3373 ast_error(c, ch,
3374 "only single target (not list) can be annotated");
3375 return NULL;
3376 case Tuple_kind:
3377 ast_error(c, ch,
3378 "only single target (not tuple) can be annotated");
3379 return NULL;
3380 default:
3381 ast_error(c, ch,
3382 "illegal target for annotation");
3383 return NULL;
3384 }
3385
3386 if (expr1->kind != Name_kind) {
3387 simple = 0;
3388 }
3389 ch = CHILD(ann, 1);
3390 expr2 = ast_for_expr(c, ch);
3391 if (!expr2) {
3392 return NULL;
3393 }
3394 if (NCH(ann) == 2) {
3395 return AnnAssign(expr1, expr2, NULL, simple,
3396 LINENO(n), n->n_col_offset,
3397 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3398 }
3399 else {
3400 ch = CHILD(ann, 3);
3401 if (TYPE(ch) == testlist_star_expr) {
3402 expr3 = ast_for_testlist(c, ch);
3403 }
3404 else {
3405 expr3 = ast_for_expr(c, ch);
3406 }
3407 if (!expr3) {
3408 return NULL;
3409 }
3410 return AnnAssign(expr1, expr2, expr3, simple,
3411 LINENO(n), n->n_col_offset,
3412 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3413 }
3414 }
3415 else {
3416 int i, nch_minus_type, has_type_comment;
3417 asdl_seq *targets;
3418 node *value;
3419 expr_ty expression;
3420 string type_comment;
3421
3422 /* a normal assignment */
3423 REQ(CHILD(n, 1), EQUAL);
3424
3425 has_type_comment = TYPE(CHILD(n, num - 1)) == TYPE_COMMENT;
3426 nch_minus_type = num - has_type_comment;
3427
3428 targets = _Py_asdl_seq_new(nch_minus_type / 2, c->c_arena);
3429 if (!targets)
3430 return NULL;
3431 for (i = 0; i < nch_minus_type - 2; i += 2) {
3432 expr_ty e;
3433 node *ch = CHILD(n, i);
3434 if (TYPE(ch) == yield_expr) {
3435 ast_error(c, ch, "assignment to yield expression not possible");
3436 return NULL;
3437 }
3438 e = ast_for_testlist(c, ch);
3439 if (!e)
3440 return NULL;
3441
3442 /* set context to assign */
3443 if (!set_context(c, e, Store, CHILD(n, i)))
3444 return NULL;
3445
3446 asdl_seq_SET(targets, i / 2, e);
3447 }
3448 value = CHILD(n, nch_minus_type - 1);
3449 if (TYPE(value) == testlist_star_expr)
3450 expression = ast_for_testlist(c, value);
3451 else
3452 expression = ast_for_expr(c, value);
3453 if (!expression)
3454 return NULL;
3455 if (has_type_comment) {
3456 type_comment = NEW_TYPE_COMMENT(CHILD(n, nch_minus_type));
3457 if (!type_comment)
3458 return NULL;
3459 }
3460 else
3461 type_comment = NULL;
3462 return Assign(targets, expression, type_comment, LINENO(n), n->n_col_offset,
3463 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3464 }
3465 }
3466
3467
3468 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)3469 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
3470 {
3471 asdl_seq *seq;
3472 int i;
3473 expr_ty e;
3474
3475 REQ(n, exprlist);
3476
3477 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3478 if (!seq)
3479 return NULL;
3480 for (i = 0; i < NCH(n); i += 2) {
3481 e = ast_for_expr(c, CHILD(n, i));
3482 if (!e)
3483 return NULL;
3484 asdl_seq_SET(seq, i / 2, e);
3485 if (context && !set_context(c, e, context, CHILD(n, i)))
3486 return NULL;
3487 }
3488 return seq;
3489 }
3490
3491 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)3492 ast_for_del_stmt(struct compiling *c, const node *n)
3493 {
3494 asdl_seq *expr_list;
3495
3496 /* del_stmt: 'del' exprlist */
3497 REQ(n, del_stmt);
3498
3499 expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
3500 if (!expr_list)
3501 return NULL;
3502 return Delete(expr_list, LINENO(n), n->n_col_offset,
3503 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3504 }
3505
3506 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)3507 ast_for_flow_stmt(struct compiling *c, const node *n)
3508 {
3509 /*
3510 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
3511 | yield_stmt
3512 break_stmt: 'break'
3513 continue_stmt: 'continue'
3514 return_stmt: 'return' [testlist]
3515 yield_stmt: yield_expr
3516 yield_expr: 'yield' testlist | 'yield' 'from' test
3517 raise_stmt: 'raise' [test [',' test [',' test]]]
3518 */
3519 node *ch;
3520
3521 REQ(n, flow_stmt);
3522 ch = CHILD(n, 0);
3523 switch (TYPE(ch)) {
3524 case break_stmt:
3525 return Break(LINENO(n), n->n_col_offset,
3526 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3527 case continue_stmt:
3528 return Continue(LINENO(n), n->n_col_offset,
3529 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3530 case yield_stmt: { /* will reduce to yield_expr */
3531 expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
3532 if (!exp)
3533 return NULL;
3534 return Expr(exp, LINENO(n), n->n_col_offset,
3535 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3536 }
3537 case return_stmt:
3538 if (NCH(ch) == 1)
3539 return Return(NULL, LINENO(n), n->n_col_offset,
3540 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3541 else {
3542 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
3543 if (!expression)
3544 return NULL;
3545 return Return(expression, LINENO(n), n->n_col_offset,
3546 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3547 }
3548 case raise_stmt:
3549 if (NCH(ch) == 1)
3550 return Raise(NULL, NULL, LINENO(n), n->n_col_offset,
3551 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3552 else if (NCH(ch) >= 2) {
3553 expr_ty cause = NULL;
3554 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
3555 if (!expression)
3556 return NULL;
3557 if (NCH(ch) == 4) {
3558 cause = ast_for_expr(c, CHILD(ch, 3));
3559 if (!cause)
3560 return NULL;
3561 }
3562 return Raise(expression, cause, LINENO(n), n->n_col_offset,
3563 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3564 }
3565 /* fall through */
3566 default:
3567 PyErr_Format(PyExc_SystemError,
3568 "unexpected flow_stmt: %d", TYPE(ch));
3569 return NULL;
3570 }
3571 }
3572
3573 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)3574 alias_for_import_name(struct compiling *c, const node *n, int store)
3575 {
3576 /*
3577 import_as_name: NAME ['as' NAME]
3578 dotted_as_name: dotted_name ['as' NAME]
3579 dotted_name: NAME ('.' NAME)*
3580 */
3581 identifier str, name;
3582
3583 loop:
3584 switch (TYPE(n)) {
3585 case import_as_name: {
3586 node *name_node = CHILD(n, 0);
3587 str = NULL;
3588 name = NEW_IDENTIFIER(name_node);
3589 if (!name)
3590 return NULL;
3591 if (NCH(n) == 3) {
3592 node *str_node = CHILD(n, 2);
3593 str = NEW_IDENTIFIER(str_node);
3594 if (!str)
3595 return NULL;
3596 if (store && forbidden_name(c, str, str_node, 0))
3597 return NULL;
3598 }
3599 else {
3600 if (forbidden_name(c, name, name_node, 0))
3601 return NULL;
3602 }
3603 return alias(name, str, c->c_arena);
3604 }
3605 case dotted_as_name:
3606 if (NCH(n) == 1) {
3607 n = CHILD(n, 0);
3608 goto loop;
3609 }
3610 else {
3611 node *asname_node = CHILD(n, 2);
3612 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
3613 if (!a)
3614 return NULL;
3615 assert(!a->asname);
3616 a->asname = NEW_IDENTIFIER(asname_node);
3617 if (!a->asname)
3618 return NULL;
3619 if (forbidden_name(c, a->asname, asname_node, 0))
3620 return NULL;
3621 return a;
3622 }
3623 case dotted_name:
3624 if (NCH(n) == 1) {
3625 node *name_node = CHILD(n, 0);
3626 name = NEW_IDENTIFIER(name_node);
3627 if (!name)
3628 return NULL;
3629 if (store && forbidden_name(c, name, name_node, 0))
3630 return NULL;
3631 return alias(name, NULL, c->c_arena);
3632 }
3633 else {
3634 /* Create a string of the form "a.b.c" */
3635 int i;
3636 size_t len;
3637 char *s;
3638 PyObject *uni;
3639
3640 len = 0;
3641 for (i = 0; i < NCH(n); i += 2)
3642 /* length of string plus one for the dot */
3643 len += strlen(STR(CHILD(n, i))) + 1;
3644 len--; /* the last name doesn't have a dot */
3645 str = PyBytes_FromStringAndSize(NULL, len);
3646 if (!str)
3647 return NULL;
3648 s = PyBytes_AS_STRING(str);
3649 if (!s)
3650 return NULL;
3651 for (i = 0; i < NCH(n); i += 2) {
3652 char *sch = STR(CHILD(n, i));
3653 strcpy(s, STR(CHILD(n, i)));
3654 s += strlen(sch);
3655 *s++ = '.';
3656 }
3657 --s;
3658 *s = '\0';
3659 uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str),
3660 PyBytes_GET_SIZE(str),
3661 NULL);
3662 Py_DECREF(str);
3663 if (!uni)
3664 return NULL;
3665 str = uni;
3666 PyUnicode_InternInPlace(&str);
3667 if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3668 Py_DECREF(str);
3669 return NULL;
3670 }
3671 return alias(str, NULL, c->c_arena);
3672 }
3673 case STAR:
3674 str = PyUnicode_InternFromString("*");
3675 if (!str)
3676 return NULL;
3677 if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3678 Py_DECREF(str);
3679 return NULL;
3680 }
3681 return alias(str, NULL, c->c_arena);
3682 default:
3683 PyErr_Format(PyExc_SystemError,
3684 "unexpected import name: %d", TYPE(n));
3685 return NULL;
3686 }
3687
3688 PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
3689 return NULL;
3690 }
3691
3692 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)3693 ast_for_import_stmt(struct compiling *c, const node *n)
3694 {
3695 /*
3696 import_stmt: import_name | import_from
3697 import_name: 'import' dotted_as_names
3698 import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+)
3699 'import' ('*' | '(' import_as_names ')' | import_as_names)
3700 */
3701 int lineno;
3702 int col_offset;
3703 int i;
3704 asdl_seq *aliases;
3705
3706 REQ(n, import_stmt);
3707 lineno = LINENO(n);
3708 col_offset = n->n_col_offset;
3709 n = CHILD(n, 0);
3710 if (TYPE(n) == import_name) {
3711 n = CHILD(n, 1);
3712 REQ(n, dotted_as_names);
3713 aliases = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3714 if (!aliases)
3715 return NULL;
3716 for (i = 0; i < NCH(n); i += 2) {
3717 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3718 if (!import_alias)
3719 return NULL;
3720 asdl_seq_SET(aliases, i / 2, import_alias);
3721 }
3722 // Even though n is modified above, the end position is not changed
3723 return Import(aliases, lineno, col_offset,
3724 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3725 }
3726 else if (TYPE(n) == import_from) {
3727 int n_children;
3728 int idx, ndots = 0;
3729 const node *n_copy = n;
3730 alias_ty mod = NULL;
3731 identifier modname = NULL;
3732
3733 /* Count the number of dots (for relative imports) and check for the
3734 optional module name */
3735 for (idx = 1; idx < NCH(n); idx++) {
3736 if (TYPE(CHILD(n, idx)) == dotted_name) {
3737 mod = alias_for_import_name(c, CHILD(n, idx), 0);
3738 if (!mod)
3739 return NULL;
3740 idx++;
3741 break;
3742 } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) {
3743 /* three consecutive dots are tokenized as one ELLIPSIS */
3744 ndots += 3;
3745 continue;
3746 } else if (TYPE(CHILD(n, idx)) != DOT) {
3747 break;
3748 }
3749 ndots++;
3750 }
3751 idx++; /* skip over the 'import' keyword */
3752 switch (TYPE(CHILD(n, idx))) {
3753 case STAR:
3754 /* from ... import * */
3755 n = CHILD(n, idx);
3756 n_children = 1;
3757 break;
3758 case LPAR:
3759 /* from ... import (x, y, z) */
3760 n = CHILD(n, idx + 1);
3761 n_children = NCH(n);
3762 break;
3763 case import_as_names:
3764 /* from ... import x, y, z */
3765 n = CHILD(n, idx);
3766 n_children = NCH(n);
3767 if (n_children % 2 == 0) {
3768 ast_error(c, n,
3769 "trailing comma not allowed without"
3770 " surrounding parentheses");
3771 return NULL;
3772 }
3773 break;
3774 default:
3775 ast_error(c, n, "Unexpected node-type in from-import");
3776 return NULL;
3777 }
3778
3779 aliases = _Py_asdl_seq_new((n_children + 1) / 2, c->c_arena);
3780 if (!aliases)
3781 return NULL;
3782
3783 /* handle "from ... import *" special b/c there's no children */
3784 if (TYPE(n) == STAR) {
3785 alias_ty import_alias = alias_for_import_name(c, n, 1);
3786 if (!import_alias)
3787 return NULL;
3788 asdl_seq_SET(aliases, 0, import_alias);
3789 }
3790 else {
3791 for (i = 0; i < NCH(n); i += 2) {
3792 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3793 if (!import_alias)
3794 return NULL;
3795 asdl_seq_SET(aliases, i / 2, import_alias);
3796 }
3797 }
3798 if (mod != NULL)
3799 modname = mod->name;
3800 return ImportFrom(modname, aliases, ndots, lineno, col_offset,
3801 n_copy->n_end_lineno, n_copy->n_end_col_offset,
3802 c->c_arena);
3803 }
3804 PyErr_Format(PyExc_SystemError,
3805 "unknown import statement: starts with command '%s'",
3806 STR(CHILD(n, 0)));
3807 return NULL;
3808 }
3809
3810 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)3811 ast_for_global_stmt(struct compiling *c, const node *n)
3812 {
3813 /* global_stmt: 'global' NAME (',' NAME)* */
3814 identifier name;
3815 asdl_seq *s;
3816 int i;
3817
3818 REQ(n, global_stmt);
3819 s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3820 if (!s)
3821 return NULL;
3822 for (i = 1; i < NCH(n); i += 2) {
3823 name = NEW_IDENTIFIER(CHILD(n, i));
3824 if (!name)
3825 return NULL;
3826 asdl_seq_SET(s, i / 2, name);
3827 }
3828 return Global(s, LINENO(n), n->n_col_offset,
3829 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3830 }
3831
3832 static stmt_ty
ast_for_nonlocal_stmt(struct compiling * c,const node * n)3833 ast_for_nonlocal_stmt(struct compiling *c, const node *n)
3834 {
3835 /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */
3836 identifier name;
3837 asdl_seq *s;
3838 int i;
3839
3840 REQ(n, nonlocal_stmt);
3841 s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3842 if (!s)
3843 return NULL;
3844 for (i = 1; i < NCH(n); i += 2) {
3845 name = NEW_IDENTIFIER(CHILD(n, i));
3846 if (!name)
3847 return NULL;
3848 asdl_seq_SET(s, i / 2, name);
3849 }
3850 return Nonlocal(s, LINENO(n), n->n_col_offset,
3851 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3852 }
3853
3854 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)3855 ast_for_assert_stmt(struct compiling *c, const node *n)
3856 {
3857 /* assert_stmt: 'assert' test [',' test] */
3858 REQ(n, assert_stmt);
3859 if (NCH(n) == 2) {
3860 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
3861 if (!expression)
3862 return NULL;
3863 return Assert(expression, NULL, LINENO(n), n->n_col_offset,
3864 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3865 }
3866 else if (NCH(n) == 4) {
3867 expr_ty expr1, expr2;
3868
3869 expr1 = ast_for_expr(c, CHILD(n, 1));
3870 if (!expr1)
3871 return NULL;
3872 expr2 = ast_for_expr(c, CHILD(n, 3));
3873 if (!expr2)
3874 return NULL;
3875
3876 return Assert(expr1, expr2, LINENO(n), n->n_col_offset,
3877 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3878 }
3879 PyErr_Format(PyExc_SystemError,
3880 "improper number of parts to 'assert' statement: %d",
3881 NCH(n));
3882 return NULL;
3883 }
3884
3885 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)3886 ast_for_suite(struct compiling *c, const node *n)
3887 {
3888 /* suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
3889 asdl_seq *seq;
3890 stmt_ty s;
3891 int i, total, num, end, pos = 0;
3892 node *ch;
3893
3894 if (TYPE(n) != func_body_suite) {
3895 REQ(n, suite);
3896 }
3897
3898 total = num_stmts(n);
3899 seq = _Py_asdl_seq_new(total, c->c_arena);
3900 if (!seq)
3901 return NULL;
3902 if (TYPE(CHILD(n, 0)) == simple_stmt) {
3903 n = CHILD(n, 0);
3904 /* simple_stmt always ends with a NEWLINE,
3905 and may have a trailing SEMI
3906 */
3907 end = NCH(n) - 1;
3908 if (TYPE(CHILD(n, end - 1)) == SEMI)
3909 end--;
3910 /* loop by 2 to skip semi-colons */
3911 for (i = 0; i < end; i += 2) {
3912 ch = CHILD(n, i);
3913 s = ast_for_stmt(c, ch);
3914 if (!s)
3915 return NULL;
3916 asdl_seq_SET(seq, pos++, s);
3917 }
3918 }
3919 else {
3920 i = 2;
3921 if (TYPE(CHILD(n, 1)) == TYPE_COMMENT) {
3922 i += 2;
3923 REQ(CHILD(n, 2), NEWLINE);
3924 }
3925
3926 for (; i < (NCH(n) - 1); i++) {
3927 ch = CHILD(n, i);
3928 REQ(ch, stmt);
3929 num = num_stmts(ch);
3930 if (num == 1) {
3931 /* small_stmt or compound_stmt with only one child */
3932 s = ast_for_stmt(c, ch);
3933 if (!s)
3934 return NULL;
3935 asdl_seq_SET(seq, pos++, s);
3936 }
3937 else {
3938 int j;
3939 ch = CHILD(ch, 0);
3940 REQ(ch, simple_stmt);
3941 for (j = 0; j < NCH(ch); j += 2) {
3942 /* statement terminates with a semi-colon ';' */
3943 if (NCH(CHILD(ch, j)) == 0) {
3944 assert((j + 1) == NCH(ch));
3945 break;
3946 }
3947 s = ast_for_stmt(c, CHILD(ch, j));
3948 if (!s)
3949 return NULL;
3950 asdl_seq_SET(seq, pos++, s);
3951 }
3952 }
3953 }
3954 }
3955 assert(pos == seq->size);
3956 return seq;
3957 }
3958
3959 static void
get_last_end_pos(asdl_seq * s,int * end_lineno,int * end_col_offset)3960 get_last_end_pos(asdl_seq *s, int *end_lineno, int *end_col_offset)
3961 {
3962 Py_ssize_t tot = asdl_seq_LEN(s);
3963 // There must be no empty suites.
3964 assert(tot > 0);
3965 stmt_ty last = asdl_seq_GET(s, tot - 1);
3966 *end_lineno = last->end_lineno;
3967 *end_col_offset = last->end_col_offset;
3968 }
3969
3970 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)3971 ast_for_if_stmt(struct compiling *c, const node *n)
3972 {
3973 /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
3974 ['else' ':' suite]
3975 */
3976 char *s;
3977 int end_lineno, end_col_offset;
3978
3979 REQ(n, if_stmt);
3980
3981 if (NCH(n) == 4) {
3982 expr_ty expression;
3983 asdl_seq *suite_seq;
3984
3985 expression = ast_for_expr(c, CHILD(n, 1));
3986 if (!expression)
3987 return NULL;
3988 suite_seq = ast_for_suite(c, CHILD(n, 3));
3989 if (!suite_seq)
3990 return NULL;
3991 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
3992
3993 return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
3994 end_lineno, end_col_offset, c->c_arena);
3995 }
3996
3997 s = STR(CHILD(n, 4));
3998 /* s[2], the third character in the string, will be
3999 's' for el_s_e, or
4000 'i' for el_i_f
4001 */
4002 if (s[2] == 's') {
4003 expr_ty expression;
4004 asdl_seq *seq1, *seq2;
4005
4006 expression = ast_for_expr(c, CHILD(n, 1));
4007 if (!expression)
4008 return NULL;
4009 seq1 = ast_for_suite(c, CHILD(n, 3));
4010 if (!seq1)
4011 return NULL;
4012 seq2 = ast_for_suite(c, CHILD(n, 6));
4013 if (!seq2)
4014 return NULL;
4015 get_last_end_pos(seq2, &end_lineno, &end_col_offset);
4016
4017 return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
4018 end_lineno, end_col_offset, c->c_arena);
4019 }
4020 else if (s[2] == 'i') {
4021 int i, n_elif, has_else = 0;
4022 expr_ty expression;
4023 asdl_seq *suite_seq;
4024 asdl_seq *orelse = NULL;
4025 n_elif = NCH(n) - 4;
4026 /* must reference the child n_elif+1 since 'else' token is third,
4027 not fourth, child from the end. */
4028 if (TYPE(CHILD(n, (n_elif + 1))) == NAME
4029 && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
4030 has_else = 1;
4031 n_elif -= 3;
4032 }
4033 n_elif /= 4;
4034
4035 if (has_else) {
4036 asdl_seq *suite_seq2;
4037
4038 orelse = _Py_asdl_seq_new(1, c->c_arena);
4039 if (!orelse)
4040 return NULL;
4041 expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
4042 if (!expression)
4043 return NULL;
4044 suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
4045 if (!suite_seq)
4046 return NULL;
4047 suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
4048 if (!suite_seq2)
4049 return NULL;
4050 get_last_end_pos(suite_seq2, &end_lineno, &end_col_offset);
4051
4052 asdl_seq_SET(orelse, 0,
4053 If(expression, suite_seq, suite_seq2,
4054 LINENO(CHILD(n, NCH(n) - 7)),
4055 CHILD(n, NCH(n) - 7)->n_col_offset,
4056 end_lineno, end_col_offset, c->c_arena));
4057 /* the just-created orelse handled the last elif */
4058 n_elif--;
4059 }
4060
4061 for (i = 0; i < n_elif; i++) {
4062 int off = 5 + (n_elif - i - 1) * 4;
4063 asdl_seq *newobj = _Py_asdl_seq_new(1, c->c_arena);
4064 if (!newobj)
4065 return NULL;
4066 expression = ast_for_expr(c, CHILD(n, off));
4067 if (!expression)
4068 return NULL;
4069 suite_seq = ast_for_suite(c, CHILD(n, off + 2));
4070 if (!suite_seq)
4071 return NULL;
4072
4073 if (orelse != NULL) {
4074 get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4075 } else {
4076 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4077 }
4078 asdl_seq_SET(newobj, 0,
4079 If(expression, suite_seq, orelse,
4080 LINENO(CHILD(n, off - 1)),
4081 CHILD(n, off - 1)->n_col_offset,
4082 end_lineno, end_col_offset, c->c_arena));
4083 orelse = newobj;
4084 }
4085 expression = ast_for_expr(c, CHILD(n, 1));
4086 if (!expression)
4087 return NULL;
4088 suite_seq = ast_for_suite(c, CHILD(n, 3));
4089 if (!suite_seq)
4090 return NULL;
4091 get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4092 return If(expression, suite_seq, orelse,
4093 LINENO(n), n->n_col_offset,
4094 end_lineno, end_col_offset, c->c_arena);
4095 }
4096
4097 PyErr_Format(PyExc_SystemError,
4098 "unexpected token in 'if' statement: %s", s);
4099 return NULL;
4100 }
4101
4102 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)4103 ast_for_while_stmt(struct compiling *c, const node *n)
4104 {
4105 /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
4106 REQ(n, while_stmt);
4107 int end_lineno, end_col_offset;
4108
4109 if (NCH(n) == 4) {
4110 expr_ty expression;
4111 asdl_seq *suite_seq;
4112
4113 expression = ast_for_expr(c, CHILD(n, 1));
4114 if (!expression)
4115 return NULL;
4116 suite_seq = ast_for_suite(c, CHILD(n, 3));
4117 if (!suite_seq)
4118 return NULL;
4119 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4120 return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
4121 end_lineno, end_col_offset, c->c_arena);
4122 }
4123 else if (NCH(n) == 7) {
4124 expr_ty expression;
4125 asdl_seq *seq1, *seq2;
4126
4127 expression = ast_for_expr(c, CHILD(n, 1));
4128 if (!expression)
4129 return NULL;
4130 seq1 = ast_for_suite(c, CHILD(n, 3));
4131 if (!seq1)
4132 return NULL;
4133 seq2 = ast_for_suite(c, CHILD(n, 6));
4134 if (!seq2)
4135 return NULL;
4136 get_last_end_pos(seq2, &end_lineno, &end_col_offset);
4137
4138 return While(expression, seq1, seq2, LINENO(n), n->n_col_offset,
4139 end_lineno, end_col_offset, c->c_arena);
4140 }
4141
4142 PyErr_Format(PyExc_SystemError,
4143 "wrong number of tokens for 'while' statement: %d",
4144 NCH(n));
4145 return NULL;
4146 }
4147
4148 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n0,bool is_async)4149 ast_for_for_stmt(struct compiling *c, const node *n0, bool is_async)
4150 {
4151 const node * const n = is_async ? CHILD(n0, 1) : n0;
4152 asdl_seq *_target, *seq = NULL, *suite_seq;
4153 expr_ty expression;
4154 expr_ty target, first;
4155 const node *node_target;
4156 int end_lineno, end_col_offset;
4157 int has_type_comment;
4158 string type_comment;
4159
4160 if (is_async && c->c_feature_version < 5) {
4161 ast_error(c, n,
4162 "Async for loops are only supported in Python 3.5 and greater");
4163 return NULL;
4164 }
4165
4166 /* for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite] */
4167 REQ(n, for_stmt);
4168
4169 has_type_comment = TYPE(CHILD(n, 5)) == TYPE_COMMENT;
4170
4171 if (NCH(n) == 9 + has_type_comment) {
4172 seq = ast_for_suite(c, CHILD(n, 8 + has_type_comment));
4173 if (!seq)
4174 return NULL;
4175 }
4176
4177 node_target = CHILD(n, 1);
4178 _target = ast_for_exprlist(c, node_target, Store);
4179 if (!_target)
4180 return NULL;
4181 /* Check the # of children rather than the length of _target, since
4182 for x, in ... has 1 element in _target, but still requires a Tuple. */
4183 first = (expr_ty)asdl_seq_GET(_target, 0);
4184 if (NCH(node_target) == 1)
4185 target = first;
4186 else
4187 target = Tuple(_target, Store, first->lineno, first->col_offset,
4188 node_target->n_end_lineno, node_target->n_end_col_offset,
4189 c->c_arena);
4190
4191 expression = ast_for_testlist(c, CHILD(n, 3));
4192 if (!expression)
4193 return NULL;
4194 suite_seq = ast_for_suite(c, CHILD(n, 5 + has_type_comment));
4195 if (!suite_seq)
4196 return NULL;
4197
4198 if (seq != NULL) {
4199 get_last_end_pos(seq, &end_lineno, &end_col_offset);
4200 } else {
4201 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4202 }
4203
4204 if (has_type_comment) {
4205 type_comment = NEW_TYPE_COMMENT(CHILD(n, 5));
4206 if (!type_comment)
4207 return NULL;
4208 }
4209 else
4210 type_comment = NULL;
4211
4212 if (is_async)
4213 return AsyncFor(target, expression, suite_seq, seq, type_comment,
4214 LINENO(n0), n0->n_col_offset,
4215 end_lineno, end_col_offset, c->c_arena);
4216 else
4217 return For(target, expression, suite_seq, seq, type_comment,
4218 LINENO(n), n->n_col_offset,
4219 end_lineno, end_col_offset, c->c_arena);
4220 }
4221
4222 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)4223 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
4224 {
4225 /* except_clause: 'except' [test ['as' test]] */
4226 int end_lineno, end_col_offset;
4227 REQ(exc, except_clause);
4228 REQ(body, suite);
4229
4230 if (NCH(exc) == 1) {
4231 asdl_seq *suite_seq = ast_for_suite(c, body);
4232 if (!suite_seq)
4233 return NULL;
4234 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4235
4236 return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
4237 exc->n_col_offset,
4238 end_lineno, end_col_offset, c->c_arena);
4239 }
4240 else if (NCH(exc) == 2) {
4241 expr_ty expression;
4242 asdl_seq *suite_seq;
4243
4244 expression = ast_for_expr(c, CHILD(exc, 1));
4245 if (!expression)
4246 return NULL;
4247 suite_seq = ast_for_suite(c, body);
4248 if (!suite_seq)
4249 return NULL;
4250 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4251
4252 return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
4253 exc->n_col_offset,
4254 end_lineno, end_col_offset, c->c_arena);
4255 }
4256 else if (NCH(exc) == 4) {
4257 asdl_seq *suite_seq;
4258 expr_ty expression;
4259 identifier e = NEW_IDENTIFIER(CHILD(exc, 3));
4260 if (!e)
4261 return NULL;
4262 if (forbidden_name(c, e, CHILD(exc, 3), 0))
4263 return NULL;
4264 expression = ast_for_expr(c, CHILD(exc, 1));
4265 if (!expression)
4266 return NULL;
4267 suite_seq = ast_for_suite(c, body);
4268 if (!suite_seq)
4269 return NULL;
4270 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4271
4272 return ExceptHandler(expression, e, suite_seq, LINENO(exc),
4273 exc->n_col_offset,
4274 end_lineno, end_col_offset, c->c_arena);
4275 }
4276
4277 PyErr_Format(PyExc_SystemError,
4278 "wrong number of children for 'except' clause: %d",
4279 NCH(exc));
4280 return NULL;
4281 }
4282
4283 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)4284 ast_for_try_stmt(struct compiling *c, const node *n)
4285 {
4286 const int nch = NCH(n);
4287 int end_lineno, end_col_offset, n_except = (nch - 3)/3;
4288 asdl_seq *body, *handlers = NULL, *orelse = NULL, *finally = NULL;
4289 excepthandler_ty last_handler;
4290
4291 REQ(n, try_stmt);
4292
4293 body = ast_for_suite(c, CHILD(n, 2));
4294 if (body == NULL)
4295 return NULL;
4296
4297 if (TYPE(CHILD(n, nch - 3)) == NAME) {
4298 if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
4299 if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
4300 /* we can assume it's an "else",
4301 because nch >= 9 for try-else-finally and
4302 it would otherwise have a type of except_clause */
4303 orelse = ast_for_suite(c, CHILD(n, nch - 4));
4304 if (orelse == NULL)
4305 return NULL;
4306 n_except--;
4307 }
4308
4309 finally = ast_for_suite(c, CHILD(n, nch - 1));
4310 if (finally == NULL)
4311 return NULL;
4312 n_except--;
4313 }
4314 else {
4315 /* we can assume it's an "else",
4316 otherwise it would have a type of except_clause */
4317 orelse = ast_for_suite(c, CHILD(n, nch - 1));
4318 if (orelse == NULL)
4319 return NULL;
4320 n_except--;
4321 }
4322 }
4323 else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
4324 ast_error(c, n, "malformed 'try' statement");
4325 return NULL;
4326 }
4327
4328 if (n_except > 0) {
4329 int i;
4330 /* process except statements to create a try ... except */
4331 handlers = _Py_asdl_seq_new(n_except, c->c_arena);
4332 if (handlers == NULL)
4333 return NULL;
4334
4335 for (i = 0; i < n_except; i++) {
4336 excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
4337 CHILD(n, 5 + i * 3));
4338 if (!e)
4339 return NULL;
4340 asdl_seq_SET(handlers, i, e);
4341 }
4342 }
4343
4344 assert(finally != NULL || asdl_seq_LEN(handlers));
4345 if (finally != NULL) {
4346 // finally is always last
4347 get_last_end_pos(finally, &end_lineno, &end_col_offset);
4348 } else if (orelse != NULL) {
4349 // otherwise else is last
4350 get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4351 } else {
4352 // inline the get_last_end_pos logic due to layout mismatch
4353 last_handler = (excepthandler_ty) asdl_seq_GET(handlers, n_except - 1);
4354 end_lineno = last_handler->end_lineno;
4355 end_col_offset = last_handler->end_col_offset;
4356 }
4357 return Try(body, handlers, orelse, finally, LINENO(n), n->n_col_offset,
4358 end_lineno, end_col_offset, c->c_arena);
4359 }
4360
4361 /* with_item: test ['as' expr] */
4362 static withitem_ty
ast_for_with_item(struct compiling * c,const node * n)4363 ast_for_with_item(struct compiling *c, const node *n)
4364 {
4365 expr_ty context_expr, optional_vars = NULL;
4366
4367 REQ(n, with_item);
4368 context_expr = ast_for_expr(c, CHILD(n, 0));
4369 if (!context_expr)
4370 return NULL;
4371 if (NCH(n) == 3) {
4372 optional_vars = ast_for_expr(c, CHILD(n, 2));
4373
4374 if (!optional_vars) {
4375 return NULL;
4376 }
4377 if (!set_context(c, optional_vars, Store, n)) {
4378 return NULL;
4379 }
4380 }
4381
4382 return withitem(context_expr, optional_vars, c->c_arena);
4383 }
4384
4385 /* with_stmt: 'with' with_item (',' with_item)* ':' [TYPE_COMMENT] suite */
4386 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n0,bool is_async)4387 ast_for_with_stmt(struct compiling *c, const node *n0, bool is_async)
4388 {
4389 const node * const n = is_async ? CHILD(n0, 1) : n0;
4390 int i, n_items, nch_minus_type, has_type_comment, end_lineno, end_col_offset;
4391 asdl_seq *items, *body;
4392 string type_comment;
4393
4394 if (is_async && c->c_feature_version < 5) {
4395 ast_error(c, n,
4396 "Async with statements are only supported in Python 3.5 and greater");
4397 return NULL;
4398 }
4399
4400 REQ(n, with_stmt);
4401
4402 has_type_comment = TYPE(CHILD(n, NCH(n) - 2)) == TYPE_COMMENT;
4403 nch_minus_type = NCH(n) - has_type_comment;
4404
4405 n_items = (nch_minus_type - 2) / 2;
4406 items = _Py_asdl_seq_new(n_items, c->c_arena);
4407 if (!items)
4408 return NULL;
4409 for (i = 1; i < nch_minus_type - 2; i += 2) {
4410 withitem_ty item = ast_for_with_item(c, CHILD(n, i));
4411 if (!item)
4412 return NULL;
4413 asdl_seq_SET(items, (i - 1) / 2, item);
4414 }
4415
4416 body = ast_for_suite(c, CHILD(n, NCH(n) - 1));
4417 if (!body)
4418 return NULL;
4419 get_last_end_pos(body, &end_lineno, &end_col_offset);
4420
4421 if (has_type_comment) {
4422 type_comment = NEW_TYPE_COMMENT(CHILD(n, NCH(n) - 2));
4423 if (!type_comment)
4424 return NULL;
4425 }
4426 else
4427 type_comment = NULL;
4428
4429 if (is_async)
4430 return AsyncWith(items, body, type_comment, LINENO(n0), n0->n_col_offset,
4431 end_lineno, end_col_offset, c->c_arena);
4432 else
4433 return With(items, body, type_comment, LINENO(n), n->n_col_offset,
4434 end_lineno, end_col_offset, c->c_arena);
4435 }
4436
4437 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)4438 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
4439 {
4440 /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */
4441 PyObject *classname;
4442 asdl_seq *s;
4443 expr_ty call;
4444 int end_lineno, end_col_offset;
4445
4446 REQ(n, classdef);
4447
4448 if (NCH(n) == 4) { /* class NAME ':' suite */
4449 s = ast_for_suite(c, CHILD(n, 3));
4450 if (!s)
4451 return NULL;
4452 get_last_end_pos(s, &end_lineno, &end_col_offset);
4453
4454 classname = NEW_IDENTIFIER(CHILD(n, 1));
4455 if (!classname)
4456 return NULL;
4457 if (forbidden_name(c, classname, CHILD(n, 3), 0))
4458 return NULL;
4459 return ClassDef(classname, NULL, NULL, s, decorator_seq,
4460 LINENO(n), n->n_col_offset,
4461 end_lineno, end_col_offset, c->c_arena);
4462 }
4463
4464 if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */
4465 s = ast_for_suite(c, CHILD(n, 5));
4466 if (!s)
4467 return NULL;
4468 get_last_end_pos(s, &end_lineno, &end_col_offset);
4469
4470 classname = NEW_IDENTIFIER(CHILD(n, 1));
4471 if (!classname)
4472 return NULL;
4473 if (forbidden_name(c, classname, CHILD(n, 3), 0))
4474 return NULL;
4475 return ClassDef(classname, NULL, NULL, s, decorator_seq,
4476 LINENO(n), n->n_col_offset,
4477 end_lineno, end_col_offset, c->c_arena);
4478 }
4479
4480 /* class NAME '(' arglist ')' ':' suite */
4481 /* build up a fake Call node so we can extract its pieces */
4482 {
4483 PyObject *dummy_name;
4484 expr_ty dummy;
4485 dummy_name = NEW_IDENTIFIER(CHILD(n, 1));
4486 if (!dummy_name)
4487 return NULL;
4488 dummy = Name(dummy_name, Load, LINENO(n), n->n_col_offset,
4489 CHILD(n, 1)->n_end_lineno, CHILD(n, 1)->n_end_col_offset,
4490 c->c_arena);
4491 call = ast_for_call(c, CHILD(n, 3), dummy, NULL, CHILD(n, 4));
4492 if (!call)
4493 return NULL;
4494 }
4495 s = ast_for_suite(c, CHILD(n, 6));
4496 if (!s)
4497 return NULL;
4498 get_last_end_pos(s, &end_lineno, &end_col_offset);
4499
4500 classname = NEW_IDENTIFIER(CHILD(n, 1));
4501 if (!classname)
4502 return NULL;
4503 if (forbidden_name(c, classname, CHILD(n, 1), 0))
4504 return NULL;
4505
4506 return ClassDef(classname, call->v.Call.args, call->v.Call.keywords, s,
4507 decorator_seq, LINENO(n), n->n_col_offset,
4508 end_lineno, end_col_offset, c->c_arena);
4509 }
4510
4511 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)4512 ast_for_stmt(struct compiling *c, const node *n)
4513 {
4514 if (TYPE(n) == stmt) {
4515 assert(NCH(n) == 1);
4516 n = CHILD(n, 0);
4517 }
4518 if (TYPE(n) == simple_stmt) {
4519 assert(num_stmts(n) == 1);
4520 n = CHILD(n, 0);
4521 }
4522 if (TYPE(n) == small_stmt) {
4523 n = CHILD(n, 0);
4524 /* small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt
4525 | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
4526 */
4527 switch (TYPE(n)) {
4528 case expr_stmt:
4529 return ast_for_expr_stmt(c, n);
4530 case del_stmt:
4531 return ast_for_del_stmt(c, n);
4532 case pass_stmt:
4533 return Pass(LINENO(n), n->n_col_offset,
4534 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
4535 case flow_stmt:
4536 return ast_for_flow_stmt(c, n);
4537 case import_stmt:
4538 return ast_for_import_stmt(c, n);
4539 case global_stmt:
4540 return ast_for_global_stmt(c, n);
4541 case nonlocal_stmt:
4542 return ast_for_nonlocal_stmt(c, n);
4543 case assert_stmt:
4544 return ast_for_assert_stmt(c, n);
4545 default:
4546 PyErr_Format(PyExc_SystemError,
4547 "unhandled small_stmt: TYPE=%d NCH=%d\n",
4548 TYPE(n), NCH(n));
4549 return NULL;
4550 }
4551 }
4552 else {
4553 /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
4554 | funcdef | classdef | decorated | async_stmt
4555 */
4556 node *ch = CHILD(n, 0);
4557 REQ(n, compound_stmt);
4558 switch (TYPE(ch)) {
4559 case if_stmt:
4560 return ast_for_if_stmt(c, ch);
4561 case while_stmt:
4562 return ast_for_while_stmt(c, ch);
4563 case for_stmt:
4564 return ast_for_for_stmt(c, ch, 0);
4565 case try_stmt:
4566 return ast_for_try_stmt(c, ch);
4567 case with_stmt:
4568 return ast_for_with_stmt(c, ch, 0);
4569 case funcdef:
4570 return ast_for_funcdef(c, ch, NULL);
4571 case classdef:
4572 return ast_for_classdef(c, ch, NULL);
4573 case decorated:
4574 return ast_for_decorated(c, ch);
4575 case async_stmt:
4576 return ast_for_async_stmt(c, ch);
4577 default:
4578 PyErr_Format(PyExc_SystemError,
4579 "unhandled compound_stmt: TYPE=%d NCH=%d\n",
4580 TYPE(n), NCH(n));
4581 return NULL;
4582 }
4583 }
4584 }
4585
4586 static PyObject *
parsenumber_raw(struct compiling * c,const char * s)4587 parsenumber_raw(struct compiling *c, const char *s)
4588 {
4589 const char *end;
4590 long x;
4591 double dx;
4592 Py_complex compl;
4593 int imflag;
4594
4595 assert(s != NULL);
4596 errno = 0;
4597 end = s + strlen(s) - 1;
4598 imflag = *end == 'j' || *end == 'J';
4599 if (s[0] == '0') {
4600 x = (long) PyOS_strtoul(s, (char **)&end, 0);
4601 if (x < 0 && errno == 0) {
4602 return PyLong_FromString(s, (char **)0, 0);
4603 }
4604 }
4605 else
4606 x = PyOS_strtol(s, (char **)&end, 0);
4607 if (*end == '\0') {
4608 if (errno != 0)
4609 return PyLong_FromString(s, (char **)0, 0);
4610 return PyLong_FromLong(x);
4611 }
4612 /* XXX Huge floats may silently fail */
4613 if (imflag) {
4614 compl.real = 0.;
4615 compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
4616 if (compl.imag == -1.0 && PyErr_Occurred())
4617 return NULL;
4618 return PyComplex_FromCComplex(compl);
4619 }
4620 else
4621 {
4622 dx = PyOS_string_to_double(s, NULL, NULL);
4623 if (dx == -1.0 && PyErr_Occurred())
4624 return NULL;
4625 return PyFloat_FromDouble(dx);
4626 }
4627 }
4628
4629 static PyObject *
parsenumber(struct compiling * c,const char * s)4630 parsenumber(struct compiling *c, const char *s)
4631 {
4632 char *dup, *end;
4633 PyObject *res = NULL;
4634
4635 assert(s != NULL);
4636
4637 if (strchr(s, '_') == NULL) {
4638 return parsenumber_raw(c, s);
4639 }
4640 /* Create a duplicate without underscores. */
4641 dup = PyMem_Malloc(strlen(s) + 1);
4642 if (dup == NULL) {
4643 return PyErr_NoMemory();
4644 }
4645 end = dup;
4646 for (; *s; s++) {
4647 if (*s != '_') {
4648 *end++ = *s;
4649 }
4650 }
4651 *end = '\0';
4652 res = parsenumber_raw(c, dup);
4653 PyMem_Free(dup);
4654 return res;
4655 }
4656
4657 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end)4658 decode_utf8(struct compiling *c, const char **sPtr, const char *end)
4659 {
4660 const char *s, *t;
4661 t = s = *sPtr;
4662 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
4663 while (s < end && (*s & 0x80)) s++;
4664 *sPtr = s;
4665 return PyUnicode_DecodeUTF8(t, s - t, NULL);
4666 }
4667
4668 static int
warn_invalid_escape_sequence(struct compiling * c,const node * n,unsigned char first_invalid_escape_char)4669 warn_invalid_escape_sequence(struct compiling *c, const node *n,
4670 unsigned char first_invalid_escape_char)
4671 {
4672 PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
4673 first_invalid_escape_char);
4674 if (msg == NULL) {
4675 return -1;
4676 }
4677 if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
4678 c->c_filename, LINENO(n),
4679 NULL, NULL) < 0)
4680 {
4681 if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
4682 /* Replace the DeprecationWarning exception with a SyntaxError
4683 to get a more accurate error report */
4684 PyErr_Clear();
4685 ast_error(c, n, "%U", msg);
4686 }
4687 Py_DECREF(msg);
4688 return -1;
4689 }
4690 Py_DECREF(msg);
4691 return 0;
4692 }
4693
4694 static PyObject *
decode_unicode_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4695 decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
4696 size_t len)
4697 {
4698 PyObject *v, *u;
4699 char *buf;
4700 char *p;
4701 const char *end;
4702
4703 /* check for integer overflow */
4704 if (len > SIZE_MAX / 6)
4705 return NULL;
4706 /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
4707 "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
4708 u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
4709 if (u == NULL)
4710 return NULL;
4711 p = buf = PyBytes_AsString(u);
4712 end = s + len;
4713 while (s < end) {
4714 if (*s == '\\') {
4715 *p++ = *s++;
4716 if (s >= end || *s & 0x80) {
4717 strcpy(p, "u005c");
4718 p += 5;
4719 if (s >= end)
4720 break;
4721 }
4722 }
4723 if (*s & 0x80) { /* XXX inefficient */
4724 PyObject *w;
4725 int kind;
4726 void *data;
4727 Py_ssize_t len, i;
4728 w = decode_utf8(c, &s, end);
4729 if (w == NULL) {
4730 Py_DECREF(u);
4731 return NULL;
4732 }
4733 kind = PyUnicode_KIND(w);
4734 data = PyUnicode_DATA(w);
4735 len = PyUnicode_GET_LENGTH(w);
4736 for (i = 0; i < len; i++) {
4737 Py_UCS4 chr = PyUnicode_READ(kind, data, i);
4738 sprintf(p, "\\U%08x", chr);
4739 p += 10;
4740 }
4741 /* Should be impossible to overflow */
4742 assert(p - buf <= PyBytes_GET_SIZE(u));
4743 Py_DECREF(w);
4744 } else {
4745 *p++ = *s++;
4746 }
4747 }
4748 len = p - buf;
4749 s = buf;
4750
4751 const char *first_invalid_escape;
4752 v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
4753
4754 if (v != NULL && first_invalid_escape != NULL) {
4755 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4756 /* We have not decref u before because first_invalid_escape points
4757 inside u. */
4758 Py_XDECREF(u);
4759 Py_DECREF(v);
4760 return NULL;
4761 }
4762 }
4763 Py_XDECREF(u);
4764 return v;
4765 }
4766
4767 static PyObject *
decode_bytes_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4768 decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
4769 size_t len)
4770 {
4771 const char *first_invalid_escape;
4772 PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
4773 &first_invalid_escape);
4774 if (result == NULL)
4775 return NULL;
4776
4777 if (first_invalid_escape != NULL) {
4778 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4779 Py_DECREF(result);
4780 return NULL;
4781 }
4782 }
4783 return result;
4784 }
4785
4786 /* Shift locations for the given node and all its children by adding `lineno`
4787 and `col_offset` to existing locations. */
fstring_shift_node_locations(node * n,int lineno,int col_offset)4788 static void fstring_shift_node_locations(node *n, int lineno, int col_offset)
4789 {
4790 n->n_col_offset = n->n_col_offset + col_offset;
4791 n->n_end_col_offset = n->n_end_col_offset + col_offset;
4792 for (int i = 0; i < NCH(n); ++i) {
4793 if (n->n_lineno && n->n_lineno < CHILD(n, i)->n_lineno) {
4794 /* Shifting column offsets unnecessary if there's been newlines. */
4795 col_offset = 0;
4796 }
4797 fstring_shift_node_locations(CHILD(n, i), lineno, col_offset);
4798 }
4799 n->n_lineno = n->n_lineno + lineno;
4800 n->n_end_lineno = n->n_end_lineno + lineno;
4801 }
4802
4803 /* Fix locations for the given node and its children.
4804
4805 `parent` is the enclosing node.
4806 `n` is the node which locations are going to be fixed relative to parent.
4807 `expr_str` is the child node's string representation, including braces.
4808 */
4809 static void
fstring_fix_node_location(const node * parent,node * n,char * expr_str)4810 fstring_fix_node_location(const node *parent, node *n, char *expr_str)
4811 {
4812 char *substr = NULL;
4813 char *start;
4814 int lines = LINENO(parent) - 1;
4815 int cols = parent->n_col_offset;
4816 /* Find the full fstring to fix location information in `n`. */
4817 while (parent && parent->n_type != STRING)
4818 parent = parent->n_child;
4819 if (parent && parent->n_str) {
4820 substr = strstr(parent->n_str, expr_str);
4821 if (substr) {
4822 start = substr;
4823 while (start > parent->n_str) {
4824 if (start[0] == '\n')
4825 break;
4826 start--;
4827 }
4828 cols += (int)(substr - start);
4829 /* adjust the start based on the number of newlines encountered
4830 before the f-string expression */
4831 for (char* p = parent->n_str; p < substr; p++) {
4832 if (*p == '\n') {
4833 lines++;
4834 }
4835 }
4836 }
4837 }
4838 fstring_shift_node_locations(n, lines, cols);
4839 }
4840
4841 /* Compile this expression in to an expr_ty. Add parens around the
4842 expression, in order to allow leading spaces in the expression. */
4843 static expr_ty
fstring_compile_expr(const char * expr_start,const char * expr_end,struct compiling * c,const node * n)4844 fstring_compile_expr(const char *expr_start, const char *expr_end,
4845 struct compiling *c, const node *n)
4846
4847 {
4848 node *mod_n;
4849 mod_ty mod;
4850 char *str;
4851 Py_ssize_t len;
4852 const char *s;
4853
4854 assert(expr_end >= expr_start);
4855 assert(*(expr_start-1) == '{');
4856 assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':' ||
4857 *expr_end == '=');
4858
4859 /* If the substring is all whitespace, it's an error. We need to catch this
4860 here, and not when we call PyParser_SimpleParseStringFlagsFilename,
4861 because turning the expression '' in to '()' would go from being invalid
4862 to valid. */
4863 for (s = expr_start; s != expr_end; s++) {
4864 char c = *s;
4865 /* The Python parser ignores only the following whitespace
4866 characters (\r already is converted to \n). */
4867 if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) {
4868 break;
4869 }
4870 }
4871 if (s == expr_end) {
4872 ast_error(c, n, "f-string: empty expression not allowed");
4873 return NULL;
4874 }
4875
4876 len = expr_end - expr_start;
4877 /* Allocate 3 extra bytes: open paren, close paren, null byte. */
4878 str = PyMem_RawMalloc(len + 3);
4879 if (str == NULL) {
4880 PyErr_NoMemory();
4881 return NULL;
4882 }
4883
4884 str[0] = '(';
4885 memcpy(str+1, expr_start, len);
4886 str[len+1] = ')';
4887 str[len+2] = 0;
4888
4889 PyCompilerFlags cf = _PyCompilerFlags_INIT;
4890 cf.cf_flags = PyCF_ONLY_AST;
4891 mod_n = PyParser_SimpleParseStringFlagsFilename(str, "<fstring>",
4892 Py_eval_input, 0);
4893 if (!mod_n) {
4894 PyMem_RawFree(str);
4895 return NULL;
4896 }
4897 /* Reuse str to find the correct column offset. */
4898 str[0] = '{';
4899 str[len+1] = '}';
4900 fstring_fix_node_location(n, mod_n, str);
4901 mod = PyAST_FromNode(mod_n, &cf, "<fstring>", c->c_arena);
4902 PyMem_RawFree(str);
4903 PyNode_Free(mod_n);
4904 if (!mod)
4905 return NULL;
4906 return mod->v.Expression.body;
4907 }
4908
4909 /* Return -1 on error.
4910
4911 Return 0 if we reached the end of the literal.
4912
4913 Return 1 if we haven't reached the end of the literal, but we want
4914 the caller to process the literal up to this point. Used for
4915 doubled braces.
4916 */
4917 static int
fstring_find_literal(const char ** str,const char * end,int raw,PyObject ** literal,int recurse_lvl,struct compiling * c,const node * n)4918 fstring_find_literal(const char **str, const char *end, int raw,
4919 PyObject **literal, int recurse_lvl,
4920 struct compiling *c, const node *n)
4921 {
4922 /* Get any literal string. It ends when we hit an un-doubled left
4923 brace (which isn't part of a unicode name escape such as
4924 "\N{EULER CONSTANT}"), or the end of the string. */
4925
4926 const char *s = *str;
4927 const char *literal_start = s;
4928 int result = 0;
4929
4930 assert(*literal == NULL);
4931 while (s < end) {
4932 char ch = *s++;
4933 if (!raw && ch == '\\' && s < end) {
4934 ch = *s++;
4935 if (ch == 'N') {
4936 if (s < end && *s++ == '{') {
4937 while (s < end && *s++ != '}') {
4938 }
4939 continue;
4940 }
4941 break;
4942 }
4943 if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
4944 return -1;
4945 }
4946 }
4947 if (ch == '{' || ch == '}') {
4948 /* Check for doubled braces, but only at the top level. If
4949 we checked at every level, then f'{0:{3}}' would fail
4950 with the two closing braces. */
4951 if (recurse_lvl == 0) {
4952 if (s < end && *s == ch) {
4953 /* We're going to tell the caller that the literal ends
4954 here, but that they should continue scanning. But also
4955 skip over the second brace when we resume scanning. */
4956 *str = s + 1;
4957 result = 1;
4958 goto done;
4959 }
4960
4961 /* Where a single '{' is the start of a new expression, a
4962 single '}' is not allowed. */
4963 if (ch == '}') {
4964 *str = s - 1;
4965 ast_error(c, n, "f-string: single '}' is not allowed");
4966 return -1;
4967 }
4968 }
4969 /* We're either at a '{', which means we're starting another
4970 expression; or a '}', which means we're at the end of this
4971 f-string (for a nested format_spec). */
4972 s--;
4973 break;
4974 }
4975 }
4976 *str = s;
4977 assert(s <= end);
4978 assert(s == end || *s == '{' || *s == '}');
4979 done:
4980 if (literal_start != s) {
4981 if (raw)
4982 *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
4983 s - literal_start,
4984 NULL, NULL);
4985 else
4986 *literal = decode_unicode_with_escapes(c, n, literal_start,
4987 s - literal_start);
4988 if (!*literal)
4989 return -1;
4990 }
4991 return result;
4992 }
4993
4994 /* Forward declaration because parsing is recursive. */
4995 static expr_ty
4996 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
4997 struct compiling *c, const node *n);
4998
4999 /* Parse the f-string at *str, ending at end. We know *str starts an
5000 expression (so it must be a '{'). Returns the FormattedValue node, which
5001 includes the expression, conversion character, format_spec expression, and
5002 optionally the text of the expression (if = is used).
5003
5004 Note that I don't do a perfect job here: I don't make sure that a
5005 closing brace doesn't match an opening paren, for example. It
5006 doesn't need to error on all invalid expressions, just correctly
5007 find the end of all valid ones. Any errors inside the expression
5008 will be caught when we parse it later.
5009
5010 *expression is set to the expression. For an '=' "debug" expression,
5011 *expr_text is set to the debug text (the original text of the expression,
5012 including the '=' and any whitespace around it, as a string object). If
5013 not a debug expression, *expr_text set to NULL. */
5014 static int
fstring_find_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** expr_text,expr_ty * expression,struct compiling * c,const node * n)5015 fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
5016 PyObject **expr_text, expr_ty *expression,
5017 struct compiling *c, const node *n)
5018 {
5019 /* Return -1 on error, else 0. */
5020
5021 const char *expr_start;
5022 const char *expr_end;
5023 expr_ty simple_expression;
5024 expr_ty format_spec = NULL; /* Optional format specifier. */
5025 int conversion = -1; /* The conversion char. Use default if not
5026 specified, or !r if using = and no format
5027 spec. */
5028
5029 /* 0 if we're not in a string, else the quote char we're trying to
5030 match (single or double quote). */
5031 char quote_char = 0;
5032
5033 /* If we're inside a string, 1=normal, 3=triple-quoted. */
5034 int string_type = 0;
5035
5036 /* Keep track of nesting level for braces/parens/brackets in
5037 expressions. */
5038 Py_ssize_t nested_depth = 0;
5039 char parenstack[MAXLEVEL];
5040
5041 *expr_text = NULL;
5042
5043 /* Can only nest one level deep. */
5044 if (recurse_lvl >= 2) {
5045 ast_error(c, n, "f-string: expressions nested too deeply");
5046 goto error;
5047 }
5048
5049 /* The first char must be a left brace, or we wouldn't have gotten
5050 here. Skip over it. */
5051 assert(**str == '{');
5052 *str += 1;
5053
5054 expr_start = *str;
5055 for (; *str < end; (*str)++) {
5056 char ch;
5057
5058 /* Loop invariants. */
5059 assert(nested_depth >= 0);
5060 assert(*str >= expr_start && *str < end);
5061 if (quote_char)
5062 assert(string_type == 1 || string_type == 3);
5063 else
5064 assert(string_type == 0);
5065
5066 ch = **str;
5067 /* Nowhere inside an expression is a backslash allowed. */
5068 if (ch == '\\') {
5069 /* Error: can't include a backslash character, inside
5070 parens or strings or not. */
5071 ast_error(c, n,
5072 "f-string expression part "
5073 "cannot include a backslash");
5074 goto error;
5075 }
5076 if (quote_char) {
5077 /* We're inside a string. See if we're at the end. */
5078 /* This code needs to implement the same non-error logic
5079 as tok_get from tokenizer.c, at the letter_quote
5080 label. To actually share that code would be a
5081 nightmare. But, it's unlikely to change and is small,
5082 so duplicate it here. Note we don't need to catch all
5083 of the errors, since they'll be caught when parsing the
5084 expression. We just need to match the non-error
5085 cases. Thus we can ignore \n in single-quoted strings,
5086 for example. Or non-terminated strings. */
5087 if (ch == quote_char) {
5088 /* Does this match the string_type (single or triple
5089 quoted)? */
5090 if (string_type == 3) {
5091 if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
5092 /* We're at the end of a triple quoted string. */
5093 *str += 2;
5094 string_type = 0;
5095 quote_char = 0;
5096 continue;
5097 }
5098 } else {
5099 /* We're at the end of a normal string. */
5100 quote_char = 0;
5101 string_type = 0;
5102 continue;
5103 }
5104 }
5105 } else if (ch == '\'' || ch == '"') {
5106 /* Is this a triple quoted string? */
5107 if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
5108 string_type = 3;
5109 *str += 2;
5110 } else {
5111 /* Start of a normal string. */
5112 string_type = 1;
5113 }
5114 /* Start looking for the end of the string. */
5115 quote_char = ch;
5116 } else if (ch == '[' || ch == '{' || ch == '(') {
5117 if (nested_depth >= MAXLEVEL) {
5118 ast_error(c, n, "f-string: too many nested parenthesis");
5119 goto error;
5120 }
5121 parenstack[nested_depth] = ch;
5122 nested_depth++;
5123 } else if (ch == '#') {
5124 /* Error: can't include a comment character, inside parens
5125 or not. */
5126 ast_error(c, n, "f-string expression part cannot include '#'");
5127 goto error;
5128 } else if (nested_depth == 0 &&
5129 (ch == '!' || ch == ':' || ch == '}' ||
5130 ch == '=' || ch == '>' || ch == '<')) {
5131 /* See if there's a next character. */
5132 if (*str+1 < end) {
5133 char next = *(*str+1);
5134
5135 /* For "!=". since '=' is not an allowed conversion character,
5136 nothing is lost in this test. */
5137 if ((ch == '!' && next == '=') || /* != */
5138 (ch == '=' && next == '=') || /* == */
5139 (ch == '<' && next == '=') || /* <= */
5140 (ch == '>' && next == '=') /* >= */
5141 ) {
5142 *str += 1;
5143 continue;
5144 }
5145 /* Don't get out of the loop for these, if they're single
5146 chars (not part of 2-char tokens). If by themselves, they
5147 don't end an expression (unlike say '!'). */
5148 if (ch == '>' || ch == '<') {
5149 continue;
5150 }
5151 }
5152
5153 /* Normal way out of this loop. */
5154 break;
5155 } else if (ch == ']' || ch == '}' || ch == ')') {
5156 if (!nested_depth) {
5157 ast_error(c, n, "f-string: unmatched '%c'", ch);
5158 goto error;
5159 }
5160 nested_depth--;
5161 int opening = parenstack[nested_depth];
5162 if (!((opening == '(' && ch == ')') ||
5163 (opening == '[' && ch == ']') ||
5164 (opening == '{' && ch == '}')))
5165 {
5166 ast_error(c, n,
5167 "f-string: closing parenthesis '%c' "
5168 "does not match opening parenthesis '%c'",
5169 ch, opening);
5170 goto error;
5171 }
5172 } else {
5173 /* Just consume this char and loop around. */
5174 }
5175 }
5176 expr_end = *str;
5177 /* If we leave this loop in a string or with mismatched parens, we
5178 don't care. We'll get a syntax error when compiling the
5179 expression. But, we can produce a better error message, so
5180 let's just do that.*/
5181 if (quote_char) {
5182 ast_error(c, n, "f-string: unterminated string");
5183 goto error;
5184 }
5185 if (nested_depth) {
5186 int opening = parenstack[nested_depth - 1];
5187 ast_error(c, n, "f-string: unmatched '%c'", opening);
5188 goto error;
5189 }
5190
5191 if (*str >= end)
5192 goto unexpected_end_of_string;
5193
5194 /* Compile the expression as soon as possible, so we show errors
5195 related to the expression before errors related to the
5196 conversion or format_spec. */
5197 simple_expression = fstring_compile_expr(expr_start, expr_end, c, n);
5198 if (!simple_expression)
5199 goto error;
5200
5201 /* Check for =, which puts the text value of the expression in
5202 expr_text. */
5203 if (**str == '=') {
5204 *str += 1;
5205
5206 /* Skip over ASCII whitespace. No need to test for end of string
5207 here, since we know there's at least a trailing quote somewhere
5208 ahead. */
5209 while (Py_ISSPACE(**str)) {
5210 *str += 1;
5211 }
5212
5213 /* Set *expr_text to the text of the expression. */
5214 *expr_text = PyUnicode_FromStringAndSize(expr_start, *str-expr_start);
5215 if (!*expr_text) {
5216 goto error;
5217 }
5218 }
5219
5220 /* Check for a conversion char, if present. */
5221 if (**str == '!') {
5222 *str += 1;
5223 if (*str >= end)
5224 goto unexpected_end_of_string;
5225
5226 conversion = **str;
5227 *str += 1;
5228
5229 /* Validate the conversion. */
5230 if (!(conversion == 's' || conversion == 'r' || conversion == 'a')) {
5231 ast_error(c, n,
5232 "f-string: invalid conversion character: "
5233 "expected 's', 'r', or 'a'");
5234 goto error;
5235 }
5236
5237 }
5238
5239 /* Check for the format spec, if present. */
5240 if (*str >= end)
5241 goto unexpected_end_of_string;
5242 if (**str == ':') {
5243 *str += 1;
5244 if (*str >= end)
5245 goto unexpected_end_of_string;
5246
5247 /* Parse the format spec. */
5248 format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n);
5249 if (!format_spec)
5250 goto error;
5251 }
5252
5253 if (*str >= end || **str != '}')
5254 goto unexpected_end_of_string;
5255
5256 /* We're at a right brace. Consume it. */
5257 assert(*str < end);
5258 assert(**str == '}');
5259 *str += 1;
5260
5261 /* If we're in = mode (detected by non-NULL expr_text), and have no format
5262 spec and no explict conversion, set the conversion to 'r'. */
5263 if (*expr_text && format_spec == NULL && conversion == -1) {
5264 conversion = 'r';
5265 }
5266
5267 /* And now create the FormattedValue node that represents this
5268 entire expression with the conversion and format spec. */
5269 *expression = FormattedValue(simple_expression, conversion,
5270 format_spec, LINENO(n),
5271 n->n_col_offset, n->n_end_lineno,
5272 n->n_end_col_offset, c->c_arena);
5273 if (!*expression)
5274 goto error;
5275
5276 return 0;
5277
5278 unexpected_end_of_string:
5279 ast_error(c, n, "f-string: expecting '}'");
5280 /* Falls through to error. */
5281
5282 error:
5283 Py_XDECREF(*expr_text);
5284 return -1;
5285
5286 }
5287
5288 /* Return -1 on error.
5289
5290 Return 0 if we have a literal (possible zero length) and an
5291 expression (zero length if at the end of the string.
5292
5293 Return 1 if we have a literal, but no expression, and we want the
5294 caller to call us again. This is used to deal with doubled
5295 braces.
5296
5297 When called multiple times on the string 'a{{b{0}c', this function
5298 will return:
5299
5300 1. the literal 'a{' with no expression, and a return value
5301 of 1. Despite the fact that there's no expression, the return
5302 value of 1 means we're not finished yet.
5303
5304 2. the literal 'b' and the expression '0', with a return value of
5305 0. The fact that there's an expression means we're not finished.
5306
5307 3. literal 'c' with no expression and a return value of 0. The
5308 combination of the return value of 0 with no expression means
5309 we're finished.
5310 */
5311 static int
fstring_find_literal_and_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** literal,PyObject ** expr_text,expr_ty * expression,struct compiling * c,const node * n)5312 fstring_find_literal_and_expr(const char **str, const char *end, int raw,
5313 int recurse_lvl, PyObject **literal,
5314 PyObject **expr_text, expr_ty *expression,
5315 struct compiling *c, const node *n)
5316 {
5317 int result;
5318
5319 assert(*literal == NULL && *expression == NULL);
5320
5321 /* Get any literal string. */
5322 result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n);
5323 if (result < 0)
5324 goto error;
5325
5326 assert(result == 0 || result == 1);
5327
5328 if (result == 1)
5329 /* We have a literal, but don't look at the expression. */
5330 return 1;
5331
5332 if (*str >= end || **str == '}')
5333 /* We're at the end of the string or the end of a nested
5334 f-string: no expression. The top-level error case where we
5335 expect to be at the end of the string but we're at a '}' is
5336 handled later. */
5337 return 0;
5338
5339 /* We must now be the start of an expression, on a '{'. */
5340 assert(**str == '{');
5341
5342 if (fstring_find_expr(str, end, raw, recurse_lvl, expr_text,
5343 expression, c, n) < 0)
5344 goto error;
5345
5346 return 0;
5347
5348 error:
5349 Py_CLEAR(*literal);
5350 return -1;
5351 }
5352
5353 #define EXPRLIST_N_CACHED 64
5354
5355 typedef struct {
5356 /* Incrementally build an array of expr_ty, so be used in an
5357 asdl_seq. Cache some small but reasonably sized number of
5358 expr_ty's, and then after that start dynamically allocating,
5359 doubling the number allocated each time. Note that the f-string
5360 f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
5361 Constant for the literal 'a'. So you add expr_ty's about twice as
5362 fast as you add expressions in an f-string. */
5363
5364 Py_ssize_t allocated; /* Number we've allocated. */
5365 Py_ssize_t size; /* Number we've used. */
5366 expr_ty *p; /* Pointer to the memory we're actually
5367 using. Will point to 'data' until we
5368 start dynamically allocating. */
5369 expr_ty data[EXPRLIST_N_CACHED];
5370 } ExprList;
5371
5372 #ifdef NDEBUG
5373 #define ExprList_check_invariants(l)
5374 #else
5375 static void
ExprList_check_invariants(ExprList * l)5376 ExprList_check_invariants(ExprList *l)
5377 {
5378 /* Check our invariants. Make sure this object is "live", and
5379 hasn't been deallocated. */
5380 assert(l->size >= 0);
5381 assert(l->p != NULL);
5382 if (l->size <= EXPRLIST_N_CACHED)
5383 assert(l->data == l->p);
5384 }
5385 #endif
5386
5387 static void
ExprList_Init(ExprList * l)5388 ExprList_Init(ExprList *l)
5389 {
5390 l->allocated = EXPRLIST_N_CACHED;
5391 l->size = 0;
5392
5393 /* Until we start allocating dynamically, p points to data. */
5394 l->p = l->data;
5395
5396 ExprList_check_invariants(l);
5397 }
5398
5399 static int
ExprList_Append(ExprList * l,expr_ty exp)5400 ExprList_Append(ExprList *l, expr_ty exp)
5401 {
5402 ExprList_check_invariants(l);
5403 if (l->size >= l->allocated) {
5404 /* We need to alloc (or realloc) the memory. */
5405 Py_ssize_t new_size = l->allocated * 2;
5406
5407 /* See if we've ever allocated anything dynamically. */
5408 if (l->p == l->data) {
5409 Py_ssize_t i;
5410 /* We're still using the cached data. Switch to
5411 alloc-ing. */
5412 l->p = PyMem_RawMalloc(sizeof(expr_ty) * new_size);
5413 if (!l->p)
5414 return -1;
5415 /* Copy the cached data into the new buffer. */
5416 for (i = 0; i < l->size; i++)
5417 l->p[i] = l->data[i];
5418 } else {
5419 /* Just realloc. */
5420 expr_ty *tmp = PyMem_RawRealloc(l->p, sizeof(expr_ty) * new_size);
5421 if (!tmp) {
5422 PyMem_RawFree(l->p);
5423 l->p = NULL;
5424 return -1;
5425 }
5426 l->p = tmp;
5427 }
5428
5429 l->allocated = new_size;
5430 assert(l->allocated == 2 * l->size);
5431 }
5432
5433 l->p[l->size++] = exp;
5434
5435 ExprList_check_invariants(l);
5436 return 0;
5437 }
5438
5439 static void
ExprList_Dealloc(ExprList * l)5440 ExprList_Dealloc(ExprList *l)
5441 {
5442 ExprList_check_invariants(l);
5443
5444 /* If there's been an error, or we've never dynamically allocated,
5445 do nothing. */
5446 if (!l->p || l->p == l->data) {
5447 /* Do nothing. */
5448 } else {
5449 /* We have dynamically allocated. Free the memory. */
5450 PyMem_RawFree(l->p);
5451 }
5452 l->p = NULL;
5453 l->size = -1;
5454 }
5455
5456 static asdl_seq *
ExprList_Finish(ExprList * l,PyArena * arena)5457 ExprList_Finish(ExprList *l, PyArena *arena)
5458 {
5459 asdl_seq *seq;
5460
5461 ExprList_check_invariants(l);
5462
5463 /* Allocate the asdl_seq and copy the expressions in to it. */
5464 seq = _Py_asdl_seq_new(l->size, arena);
5465 if (seq) {
5466 Py_ssize_t i;
5467 for (i = 0; i < l->size; i++)
5468 asdl_seq_SET(seq, i, l->p[i]);
5469 }
5470 ExprList_Dealloc(l);
5471 return seq;
5472 }
5473
5474 /* The FstringParser is designed to add a mix of strings and
5475 f-strings, and concat them together as needed. Ultimately, it
5476 generates an expr_ty. */
5477 typedef struct {
5478 PyObject *last_str;
5479 ExprList expr_list;
5480 int fmode;
5481 } FstringParser;
5482
5483 #ifdef NDEBUG
5484 #define FstringParser_check_invariants(state)
5485 #else
5486 static void
FstringParser_check_invariants(FstringParser * state)5487 FstringParser_check_invariants(FstringParser *state)
5488 {
5489 if (state->last_str)
5490 assert(PyUnicode_CheckExact(state->last_str));
5491 ExprList_check_invariants(&state->expr_list);
5492 }
5493 #endif
5494
5495 static void
FstringParser_Init(FstringParser * state)5496 FstringParser_Init(FstringParser *state)
5497 {
5498 state->last_str = NULL;
5499 state->fmode = 0;
5500 ExprList_Init(&state->expr_list);
5501 FstringParser_check_invariants(state);
5502 }
5503
5504 static void
FstringParser_Dealloc(FstringParser * state)5505 FstringParser_Dealloc(FstringParser *state)
5506 {
5507 FstringParser_check_invariants(state);
5508
5509 Py_XDECREF(state->last_str);
5510 ExprList_Dealloc(&state->expr_list);
5511 }
5512
5513 /* Constants for the following */
5514 static PyObject *u_kind;
5515
5516 /* Compute 'kind' field for string Constant (either 'u' or None) */
5517 static PyObject *
make_kind(struct compiling * c,const node * n)5518 make_kind(struct compiling *c, const node *n)
5519 {
5520 char *s = NULL;
5521 PyObject *kind = NULL;
5522
5523 /* Find the first string literal, if any */
5524 while (TYPE(n) != STRING) {
5525 if (NCH(n) == 0)
5526 return NULL;
5527 n = CHILD(n, 0);
5528 }
5529 REQ(n, STRING);
5530
5531 /* If it starts with 'u', return a PyUnicode "u" string */
5532 s = STR(n);
5533 if (s && *s == 'u') {
5534 if (!u_kind) {
5535 u_kind = PyUnicode_InternFromString("u");
5536 if (!u_kind)
5537 return NULL;
5538 }
5539 kind = u_kind;
5540 if (PyArena_AddPyObject(c->c_arena, kind) < 0) {
5541 return NULL;
5542 }
5543 Py_INCREF(kind);
5544 }
5545 return kind;
5546 }
5547
5548 /* Make a Constant node, but decref the PyUnicode object being added. */
5549 static expr_ty
make_str_node_and_del(PyObject ** str,struct compiling * c,const node * n)5550 make_str_node_and_del(PyObject **str, struct compiling *c, const node* n)
5551 {
5552 PyObject *s = *str;
5553 PyObject *kind = NULL;
5554 *str = NULL;
5555 assert(PyUnicode_CheckExact(s));
5556 if (PyArena_AddPyObject(c->c_arena, s) < 0) {
5557 Py_DECREF(s);
5558 return NULL;
5559 }
5560 kind = make_kind(c, n);
5561 if (kind == NULL && PyErr_Occurred())
5562 return NULL;
5563 return Constant(s, kind, LINENO(n), n->n_col_offset,
5564 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5565 }
5566
5567 /* Add a non-f-string (that is, a regular literal string). str is
5568 decref'd. */
5569 static int
FstringParser_ConcatAndDel(FstringParser * state,PyObject * str)5570 FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
5571 {
5572 FstringParser_check_invariants(state);
5573
5574 assert(PyUnicode_CheckExact(str));
5575
5576 if (PyUnicode_GET_LENGTH(str) == 0) {
5577 Py_DECREF(str);
5578 return 0;
5579 }
5580
5581 if (!state->last_str) {
5582 /* We didn't have a string before, so just remember this one. */
5583 state->last_str = str;
5584 } else {
5585 /* Concatenate this with the previous string. */
5586 PyUnicode_AppendAndDel(&state->last_str, str);
5587 if (!state->last_str)
5588 return -1;
5589 }
5590 FstringParser_check_invariants(state);
5591 return 0;
5592 }
5593
5594 /* Parse an f-string. The f-string is in *str to end, with no
5595 'f' or quotes. */
5596 static int
FstringParser_ConcatFstring(FstringParser * state,const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5597 FstringParser_ConcatFstring(FstringParser *state, const char **str,
5598 const char *end, int raw, int recurse_lvl,
5599 struct compiling *c, const node *n)
5600 {
5601 FstringParser_check_invariants(state);
5602 state->fmode = 1;
5603
5604 /* Parse the f-string. */
5605 while (1) {
5606 PyObject *literal = NULL;
5607 PyObject *expr_text = NULL;
5608 expr_ty expression = NULL;
5609
5610 /* If there's a zero length literal in front of the
5611 expression, literal will be NULL. If we're at the end of
5612 the f-string, expression will be NULL (unless result == 1,
5613 see below). */
5614 int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
5615 &literal, &expr_text,
5616 &expression, c, n);
5617 if (result < 0)
5618 return -1;
5619
5620 /* Add the literal, if any. */
5621 if (literal && FstringParser_ConcatAndDel(state, literal) < 0) {
5622 Py_XDECREF(expr_text);
5623 return -1;
5624 }
5625 /* Add the expr_text, if any. */
5626 if (expr_text && FstringParser_ConcatAndDel(state, expr_text) < 0) {
5627 return -1;
5628 }
5629
5630 /* We've dealt with the literal and expr_text, their ownership has
5631 been transferred to the state object. Don't look at them again. */
5632
5633 /* See if we should just loop around to get the next literal
5634 and expression, while ignoring the expression this
5635 time. This is used for un-doubling braces, as an
5636 optimization. */
5637 if (result == 1)
5638 continue;
5639
5640 if (!expression)
5641 /* We're done with this f-string. */
5642 break;
5643
5644 /* We know we have an expression. Convert any existing string
5645 to a Constant node. */
5646 if (!state->last_str) {
5647 /* Do nothing. No previous literal. */
5648 } else {
5649 /* Convert the existing last_str literal to a Constant node. */
5650 expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5651 if (!str || ExprList_Append(&state->expr_list, str) < 0)
5652 return -1;
5653 }
5654
5655 if (ExprList_Append(&state->expr_list, expression) < 0)
5656 return -1;
5657 }
5658
5659 /* If recurse_lvl is zero, then we must be at the end of the
5660 string. Otherwise, we must be at a right brace. */
5661
5662 if (recurse_lvl == 0 && *str < end-1) {
5663 ast_error(c, n, "f-string: unexpected end of string");
5664 return -1;
5665 }
5666 if (recurse_lvl != 0 && **str != '}') {
5667 ast_error(c, n, "f-string: expecting '}'");
5668 return -1;
5669 }
5670
5671 FstringParser_check_invariants(state);
5672 return 0;
5673 }
5674
5675 /* Convert the partial state reflected in last_str and expr_list to an
5676 expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
5677 static expr_ty
FstringParser_Finish(FstringParser * state,struct compiling * c,const node * n)5678 FstringParser_Finish(FstringParser *state, struct compiling *c,
5679 const node *n)
5680 {
5681 asdl_seq *seq;
5682
5683 FstringParser_check_invariants(state);
5684
5685 /* If we're just a constant string with no expressions, return
5686 that. */
5687 if (!state->fmode) {
5688 assert(!state->expr_list.size);
5689 if (!state->last_str) {
5690 /* Create a zero length string. */
5691 state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
5692 if (!state->last_str)
5693 goto error;
5694 }
5695 return make_str_node_and_del(&state->last_str, c, n);
5696 }
5697
5698 /* Create a Constant node out of last_str, if needed. It will be the
5699 last node in our expression list. */
5700 if (state->last_str) {
5701 expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5702 if (!str || ExprList_Append(&state->expr_list, str) < 0)
5703 goto error;
5704 }
5705 /* This has already been freed. */
5706 assert(state->last_str == NULL);
5707
5708 seq = ExprList_Finish(&state->expr_list, c->c_arena);
5709 if (!seq)
5710 goto error;
5711
5712 return JoinedStr(seq, LINENO(n), n->n_col_offset,
5713 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5714
5715 error:
5716 FstringParser_Dealloc(state);
5717 return NULL;
5718 }
5719
5720 /* Given an f-string (with no 'f' or quotes) that's in *str and ends
5721 at end, parse it into an expr_ty. Return NULL on error. Adjust
5722 str to point past the parsed portion. */
5723 static expr_ty
fstring_parse(const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5724 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
5725 struct compiling *c, const node *n)
5726 {
5727 FstringParser state;
5728
5729 FstringParser_Init(&state);
5730 if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl,
5731 c, n) < 0) {
5732 FstringParser_Dealloc(&state);
5733 return NULL;
5734 }
5735
5736 return FstringParser_Finish(&state, c, n);
5737 }
5738
5739 /* n is a Python string literal, including the bracketing quote
5740 characters, and r, b, u, &/or f prefixes (if any), and embedded
5741 escape sequences (if any). parsestr parses it, and sets *result to
5742 decoded Python string object. If the string is an f-string, set
5743 *fstr and *fstrlen to the unparsed string object. Return 0 if no
5744 errors occurred.
5745 */
5746 static int
parsestr(struct compiling * c,const node * n,int * bytesmode,int * rawmode,PyObject ** result,const char ** fstr,Py_ssize_t * fstrlen)5747 parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
5748 PyObject **result, const char **fstr, Py_ssize_t *fstrlen)
5749 {
5750 size_t len;
5751 const char *s = STR(n);
5752 int quote = Py_CHARMASK(*s);
5753 int fmode = 0;
5754 *bytesmode = 0;
5755 *rawmode = 0;
5756 *result = NULL;
5757 *fstr = NULL;
5758 if (Py_ISALPHA(quote)) {
5759 while (!*bytesmode || !*rawmode) {
5760 if (quote == 'b' || quote == 'B') {
5761 quote = *++s;
5762 *bytesmode = 1;
5763 }
5764 else if (quote == 'u' || quote == 'U') {
5765 quote = *++s;
5766 }
5767 else if (quote == 'r' || quote == 'R') {
5768 quote = *++s;
5769 *rawmode = 1;
5770 }
5771 else if (quote == 'f' || quote == 'F') {
5772 quote = *++s;
5773 fmode = 1;
5774 }
5775 else {
5776 break;
5777 }
5778 }
5779 }
5780
5781 /* fstrings are only allowed in Python 3.6 and greater */
5782 if (fmode && c->c_feature_version < 6) {
5783 ast_error(c, n, "Format strings are only supported in Python 3.6 and greater");
5784 return -1;
5785 }
5786
5787 if (fmode && *bytesmode) {
5788 PyErr_BadInternalCall();
5789 return -1;
5790 }
5791 if (quote != '\'' && quote != '\"') {
5792 PyErr_BadInternalCall();
5793 return -1;
5794 }
5795 /* Skip the leading quote char. */
5796 s++;
5797 len = strlen(s);
5798 if (len > INT_MAX) {
5799 PyErr_SetString(PyExc_OverflowError,
5800 "string to parse is too long");
5801 return -1;
5802 }
5803 if (s[--len] != quote) {
5804 /* Last quote char must match the first. */
5805 PyErr_BadInternalCall();
5806 return -1;
5807 }
5808 if (len >= 4 && s[0] == quote && s[1] == quote) {
5809 /* A triple quoted string. We've already skipped one quote at
5810 the start and one at the end of the string. Now skip the
5811 two at the start. */
5812 s += 2;
5813 len -= 2;
5814 /* And check that the last two match. */
5815 if (s[--len] != quote || s[--len] != quote) {
5816 PyErr_BadInternalCall();
5817 return -1;
5818 }
5819 }
5820
5821 if (fmode) {
5822 /* Just return the bytes. The caller will parse the resulting
5823 string. */
5824 *fstr = s;
5825 *fstrlen = len;
5826 return 0;
5827 }
5828
5829 /* Not an f-string. */
5830 /* Avoid invoking escape decoding routines if possible. */
5831 *rawmode = *rawmode || strchr(s, '\\') == NULL;
5832 if (*bytesmode) {
5833 /* Disallow non-ASCII characters. */
5834 const char *ch;
5835 for (ch = s; *ch; ch++) {
5836 if (Py_CHARMASK(*ch) >= 0x80) {
5837 ast_error(c, n,
5838 "bytes can only contain ASCII "
5839 "literal characters.");
5840 return -1;
5841 }
5842 }
5843 if (*rawmode)
5844 *result = PyBytes_FromStringAndSize(s, len);
5845 else
5846 *result = decode_bytes_with_escapes(c, n, s, len);
5847 } else {
5848 if (*rawmode)
5849 *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
5850 else
5851 *result = decode_unicode_with_escapes(c, n, s, len);
5852 }
5853 return *result == NULL ? -1 : 0;
5854 }
5855
5856 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through
5857 each STRING atom, and process it as needed. For bytes, just
5858 concatenate them together, and the result will be a Constant node. For
5859 normal strings and f-strings, concatenate them together. The result
5860 will be a Constant node if there were no f-strings; a FormattedValue
5861 node if there's just an f-string (with no leading or trailing
5862 literals), or a JoinedStr node if there are multiple f-strings or
5863 any literals involved. */
5864 static expr_ty
parsestrplus(struct compiling * c,const node * n)5865 parsestrplus(struct compiling *c, const node *n)
5866 {
5867 int bytesmode = 0;
5868 PyObject *bytes_str = NULL;
5869 int i;
5870
5871 FstringParser state;
5872 FstringParser_Init(&state);
5873
5874 for (i = 0; i < NCH(n); i++) {
5875 int this_bytesmode;
5876 int this_rawmode;
5877 PyObject *s;
5878 const char *fstr;
5879 Py_ssize_t fstrlen = -1; /* Silence a compiler warning. */
5880
5881 REQ(CHILD(n, i), STRING);
5882 if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s,
5883 &fstr, &fstrlen) != 0)
5884 goto error;
5885
5886 /* Check that we're not mixing bytes with unicode. */
5887 if (i != 0 && bytesmode != this_bytesmode) {
5888 ast_error(c, n, "cannot mix bytes and nonbytes literals");
5889 /* s is NULL if the current string part is an f-string. */
5890 Py_XDECREF(s);
5891 goto error;
5892 }
5893 bytesmode = this_bytesmode;
5894
5895 if (fstr != NULL) {
5896 int result;
5897 assert(s == NULL && !bytesmode);
5898 /* This is an f-string. Parse and concatenate it. */
5899 result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen,
5900 this_rawmode, 0, c, n);
5901 if (result < 0)
5902 goto error;
5903 } else {
5904 /* A string or byte string. */
5905 assert(s != NULL && fstr == NULL);
5906
5907 assert(bytesmode ? PyBytes_CheckExact(s) :
5908 PyUnicode_CheckExact(s));
5909
5910 if (bytesmode) {
5911 /* For bytes, concat as we go. */
5912 if (i == 0) {
5913 /* First time, just remember this value. */
5914 bytes_str = s;
5915 } else {
5916 PyBytes_ConcatAndDel(&bytes_str, s);
5917 if (!bytes_str)
5918 goto error;
5919 }
5920 } else {
5921 /* This is a regular string. Concatenate it. */
5922 if (FstringParser_ConcatAndDel(&state, s) < 0)
5923 goto error;
5924 }
5925 }
5926 }
5927 if (bytesmode) {
5928 /* Just return the bytes object and we're done. */
5929 if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)
5930 goto error;
5931 return Constant(bytes_str, NULL, LINENO(n), n->n_col_offset,
5932 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5933 }
5934
5935 /* We're not a bytes string, bytes_str should never have been set. */
5936 assert(bytes_str == NULL);
5937
5938 return FstringParser_Finish(&state, c, n);
5939
5940 error:
5941 Py_XDECREF(bytes_str);
5942 FstringParser_Dealloc(&state);
5943 return NULL;
5944 }
5945
5946 PyObject *
_PyAST_GetDocString(asdl_seq * body)5947 _PyAST_GetDocString(asdl_seq *body)
5948 {
5949 if (!asdl_seq_LEN(body)) {
5950 return NULL;
5951 }
5952 stmt_ty st = (stmt_ty)asdl_seq_GET(body, 0);
5953 if (st->kind != Expr_kind) {
5954 return NULL;
5955 }
5956 expr_ty e = st->v.Expr.value;
5957 if (e->kind == Constant_kind && PyUnicode_CheckExact(e->v.Constant.value)) {
5958 return e->v.Constant.value;
5959 }
5960 return NULL;
5961 }
5962