1 /*
2 * This file includes functions to transform a concrete syntax tree (CST) to
3 * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4 *
5 */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "node.h"
9 #include "ast.h"
10 #include "token.h"
11 #include "pythonrun.h"
12
13 #include <assert.h>
14 #include <stdbool.h>
15
16 #define MAXLEVEL 200 /* Max parentheses level */
17
18 static int validate_stmts(asdl_seq *);
19 static int validate_exprs(asdl_seq *, expr_context_ty, int);
20 static int validate_nonempty_seq(asdl_seq *, const char *, const char *);
21 static int validate_stmt(stmt_ty);
22 static int validate_expr(expr_ty, expr_context_ty);
23
24 static int
validate_name(PyObject * name)25 validate_name(PyObject *name)
26 {
27 assert(PyUnicode_Check(name));
28 static const char * const forbidden[] = {
29 "None",
30 "True",
31 "False",
32 NULL
33 };
34 for (int i = 0; forbidden[i] != NULL; i++) {
35 if (_PyUnicode_EqualToASCIIString(name, forbidden[i])) {
36 PyErr_Format(PyExc_ValueError, "Name node can't be used with '%s' constant", forbidden[i]);
37 return 0;
38 }
39 }
40 return 1;
41 }
42
43 static int
validate_comprehension(asdl_seq * gens)44 validate_comprehension(asdl_seq *gens)
45 {
46 Py_ssize_t i;
47 if (!asdl_seq_LEN(gens)) {
48 PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
49 return 0;
50 }
51 for (i = 0; i < asdl_seq_LEN(gens); i++) {
52 comprehension_ty comp = asdl_seq_GET(gens, i);
53 if (!validate_expr(comp->target, Store) ||
54 !validate_expr(comp->iter, Load) ||
55 !validate_exprs(comp->ifs, Load, 0))
56 return 0;
57 }
58 return 1;
59 }
60
61 static int
validate_keywords(asdl_seq * keywords)62 validate_keywords(asdl_seq *keywords)
63 {
64 Py_ssize_t i;
65 for (i = 0; i < asdl_seq_LEN(keywords); i++)
66 if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load))
67 return 0;
68 return 1;
69 }
70
71 static int
validate_args(asdl_seq * args)72 validate_args(asdl_seq *args)
73 {
74 Py_ssize_t i;
75 for (i = 0; i < asdl_seq_LEN(args); i++) {
76 arg_ty arg = asdl_seq_GET(args, i);
77 if (arg->annotation && !validate_expr(arg->annotation, Load))
78 return 0;
79 }
80 return 1;
81 }
82
83 static const char *
expr_context_name(expr_context_ty ctx)84 expr_context_name(expr_context_ty ctx)
85 {
86 switch (ctx) {
87 case Load:
88 return "Load";
89 case Store:
90 return "Store";
91 case Del:
92 return "Del";
93 default:
94 Py_UNREACHABLE();
95 }
96 }
97
98 static int
validate_arguments(arguments_ty args)99 validate_arguments(arguments_ty args)
100 {
101 if (!validate_args(args->posonlyargs) || !validate_args(args->args)) {
102 return 0;
103 }
104 if (args->vararg && args->vararg->annotation
105 && !validate_expr(args->vararg->annotation, Load)) {
106 return 0;
107 }
108 if (!validate_args(args->kwonlyargs))
109 return 0;
110 if (args->kwarg && args->kwarg->annotation
111 && !validate_expr(args->kwarg->annotation, Load)) {
112 return 0;
113 }
114 if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->posonlyargs) + asdl_seq_LEN(args->args)) {
115 PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
116 return 0;
117 }
118 if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
119 PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
120 "kw_defaults on arguments");
121 return 0;
122 }
123 return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1);
124 }
125
126 static int
validate_constant(PyObject * value)127 validate_constant(PyObject *value)
128 {
129 if (value == Py_None || value == Py_Ellipsis)
130 return 1;
131
132 if (PyLong_CheckExact(value)
133 || PyFloat_CheckExact(value)
134 || PyComplex_CheckExact(value)
135 || PyBool_Check(value)
136 || PyUnicode_CheckExact(value)
137 || PyBytes_CheckExact(value))
138 return 1;
139
140 if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
141 PyObject *it;
142
143 it = PyObject_GetIter(value);
144 if (it == NULL)
145 return 0;
146
147 while (1) {
148 PyObject *item = PyIter_Next(it);
149 if (item == NULL) {
150 if (PyErr_Occurred()) {
151 Py_DECREF(it);
152 return 0;
153 }
154 break;
155 }
156
157 if (!validate_constant(item)) {
158 Py_DECREF(it);
159 Py_DECREF(item);
160 return 0;
161 }
162 Py_DECREF(item);
163 }
164
165 Py_DECREF(it);
166 return 1;
167 }
168
169 if (!PyErr_Occurred()) {
170 PyErr_Format(PyExc_TypeError,
171 "got an invalid type in Constant: %s",
172 _PyType_Name(Py_TYPE(value)));
173 }
174 return 0;
175 }
176
177 static int
validate_expr(expr_ty exp,expr_context_ty ctx)178 validate_expr(expr_ty exp, expr_context_ty ctx)
179 {
180 int check_ctx = 1;
181 expr_context_ty actual_ctx;
182
183 /* First check expression context. */
184 switch (exp->kind) {
185 case Attribute_kind:
186 actual_ctx = exp->v.Attribute.ctx;
187 break;
188 case Subscript_kind:
189 actual_ctx = exp->v.Subscript.ctx;
190 break;
191 case Starred_kind:
192 actual_ctx = exp->v.Starred.ctx;
193 break;
194 case Name_kind:
195 if (!validate_name(exp->v.Name.id)) {
196 return 0;
197 }
198 actual_ctx = exp->v.Name.ctx;
199 break;
200 case List_kind:
201 actual_ctx = exp->v.List.ctx;
202 break;
203 case Tuple_kind:
204 actual_ctx = exp->v.Tuple.ctx;
205 break;
206 default:
207 if (ctx != Load) {
208 PyErr_Format(PyExc_ValueError, "expression which can't be "
209 "assigned to in %s context", expr_context_name(ctx));
210 return 0;
211 }
212 check_ctx = 0;
213 /* set actual_ctx to prevent gcc warning */
214 actual_ctx = 0;
215 }
216 if (check_ctx && actual_ctx != ctx) {
217 PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
218 expr_context_name(ctx), expr_context_name(actual_ctx));
219 return 0;
220 }
221
222 /* Now validate expression. */
223 switch (exp->kind) {
224 case BoolOp_kind:
225 if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
226 PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
227 return 0;
228 }
229 return validate_exprs(exp->v.BoolOp.values, Load, 0);
230 case BinOp_kind:
231 return validate_expr(exp->v.BinOp.left, Load) &&
232 validate_expr(exp->v.BinOp.right, Load);
233 case UnaryOp_kind:
234 return validate_expr(exp->v.UnaryOp.operand, Load);
235 case Lambda_kind:
236 return validate_arguments(exp->v.Lambda.args) &&
237 validate_expr(exp->v.Lambda.body, Load);
238 case IfExp_kind:
239 return validate_expr(exp->v.IfExp.test, Load) &&
240 validate_expr(exp->v.IfExp.body, Load) &&
241 validate_expr(exp->v.IfExp.orelse, Load);
242 case Dict_kind:
243 if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
244 PyErr_SetString(PyExc_ValueError,
245 "Dict doesn't have the same number of keys as values");
246 return 0;
247 }
248 /* null_ok=1 for keys expressions to allow dict unpacking to work in
249 dict literals, i.e. ``{**{a:b}}`` */
250 return validate_exprs(exp->v.Dict.keys, Load, /*null_ok=*/ 1) &&
251 validate_exprs(exp->v.Dict.values, Load, /*null_ok=*/ 0);
252 case Set_kind:
253 return validate_exprs(exp->v.Set.elts, Load, 0);
254 #define COMP(NAME) \
255 case NAME ## _kind: \
256 return validate_comprehension(exp->v.NAME.generators) && \
257 validate_expr(exp->v.NAME.elt, Load);
258 COMP(ListComp)
259 COMP(SetComp)
260 COMP(GeneratorExp)
261 #undef COMP
262 case DictComp_kind:
263 return validate_comprehension(exp->v.DictComp.generators) &&
264 validate_expr(exp->v.DictComp.key, Load) &&
265 validate_expr(exp->v.DictComp.value, Load);
266 case Yield_kind:
267 return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load);
268 case YieldFrom_kind:
269 return validate_expr(exp->v.YieldFrom.value, Load);
270 case Await_kind:
271 return validate_expr(exp->v.Await.value, Load);
272 case Compare_kind:
273 if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
274 PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
275 return 0;
276 }
277 if (asdl_seq_LEN(exp->v.Compare.comparators) !=
278 asdl_seq_LEN(exp->v.Compare.ops)) {
279 PyErr_SetString(PyExc_ValueError, "Compare has a different number "
280 "of comparators and operands");
281 return 0;
282 }
283 return validate_exprs(exp->v.Compare.comparators, Load, 0) &&
284 validate_expr(exp->v.Compare.left, Load);
285 case Call_kind:
286 return validate_expr(exp->v.Call.func, Load) &&
287 validate_exprs(exp->v.Call.args, Load, 0) &&
288 validate_keywords(exp->v.Call.keywords);
289 case Constant_kind:
290 if (!validate_constant(exp->v.Constant.value)) {
291 return 0;
292 }
293 return 1;
294 case JoinedStr_kind:
295 return validate_exprs(exp->v.JoinedStr.values, Load, 0);
296 case FormattedValue_kind:
297 if (validate_expr(exp->v.FormattedValue.value, Load) == 0)
298 return 0;
299 if (exp->v.FormattedValue.format_spec)
300 return validate_expr(exp->v.FormattedValue.format_spec, Load);
301 return 1;
302 case Attribute_kind:
303 return validate_expr(exp->v.Attribute.value, Load);
304 case Subscript_kind:
305 return validate_expr(exp->v.Subscript.slice, Load) &&
306 validate_expr(exp->v.Subscript.value, Load);
307 case Starred_kind:
308 return validate_expr(exp->v.Starred.value, ctx);
309 case Slice_kind:
310 return (!exp->v.Slice.lower || validate_expr(exp->v.Slice.lower, Load)) &&
311 (!exp->v.Slice.upper || validate_expr(exp->v.Slice.upper, Load)) &&
312 (!exp->v.Slice.step || validate_expr(exp->v.Slice.step, Load));
313 case List_kind:
314 return validate_exprs(exp->v.List.elts, ctx, 0);
315 case Tuple_kind:
316 return validate_exprs(exp->v.Tuple.elts, ctx, 0);
317 case NamedExpr_kind:
318 return validate_expr(exp->v.NamedExpr.value, Load);
319 /* This last case doesn't have any checking. */
320 case Name_kind:
321 return 1;
322 }
323 PyErr_SetString(PyExc_SystemError, "unexpected expression");
324 return 0;
325 }
326
327 static int
validate_nonempty_seq(asdl_seq * seq,const char * what,const char * owner)328 validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
329 {
330 if (asdl_seq_LEN(seq))
331 return 1;
332 PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
333 return 0;
334 }
335
336 static int
validate_assignlist(asdl_seq * targets,expr_context_ty ctx)337 validate_assignlist(asdl_seq *targets, expr_context_ty ctx)
338 {
339 return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
340 validate_exprs(targets, ctx, 0);
341 }
342
343 static int
validate_body(asdl_seq * body,const char * owner)344 validate_body(asdl_seq *body, const char *owner)
345 {
346 return validate_nonempty_seq(body, "body", owner) && validate_stmts(body);
347 }
348
349 static int
validate_stmt(stmt_ty stmt)350 validate_stmt(stmt_ty stmt)
351 {
352 Py_ssize_t i;
353 switch (stmt->kind) {
354 case FunctionDef_kind:
355 return validate_body(stmt->v.FunctionDef.body, "FunctionDef") &&
356 validate_arguments(stmt->v.FunctionDef.args) &&
357 validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) &&
358 (!stmt->v.FunctionDef.returns ||
359 validate_expr(stmt->v.FunctionDef.returns, Load));
360 case ClassDef_kind:
361 return validate_body(stmt->v.ClassDef.body, "ClassDef") &&
362 validate_exprs(stmt->v.ClassDef.bases, Load, 0) &&
363 validate_keywords(stmt->v.ClassDef.keywords) &&
364 validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0);
365 case Return_kind:
366 return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load);
367 case Delete_kind:
368 return validate_assignlist(stmt->v.Delete.targets, Del);
369 case Assign_kind:
370 return validate_assignlist(stmt->v.Assign.targets, Store) &&
371 validate_expr(stmt->v.Assign.value, Load);
372 case AugAssign_kind:
373 return validate_expr(stmt->v.AugAssign.target, Store) &&
374 validate_expr(stmt->v.AugAssign.value, Load);
375 case AnnAssign_kind:
376 if (stmt->v.AnnAssign.target->kind != Name_kind &&
377 stmt->v.AnnAssign.simple) {
378 PyErr_SetString(PyExc_TypeError,
379 "AnnAssign with simple non-Name target");
380 return 0;
381 }
382 return validate_expr(stmt->v.AnnAssign.target, Store) &&
383 (!stmt->v.AnnAssign.value ||
384 validate_expr(stmt->v.AnnAssign.value, Load)) &&
385 validate_expr(stmt->v.AnnAssign.annotation, Load);
386 case For_kind:
387 return validate_expr(stmt->v.For.target, Store) &&
388 validate_expr(stmt->v.For.iter, Load) &&
389 validate_body(stmt->v.For.body, "For") &&
390 validate_stmts(stmt->v.For.orelse);
391 case AsyncFor_kind:
392 return validate_expr(stmt->v.AsyncFor.target, Store) &&
393 validate_expr(stmt->v.AsyncFor.iter, Load) &&
394 validate_body(stmt->v.AsyncFor.body, "AsyncFor") &&
395 validate_stmts(stmt->v.AsyncFor.orelse);
396 case While_kind:
397 return validate_expr(stmt->v.While.test, Load) &&
398 validate_body(stmt->v.While.body, "While") &&
399 validate_stmts(stmt->v.While.orelse);
400 case If_kind:
401 return validate_expr(stmt->v.If.test, Load) &&
402 validate_body(stmt->v.If.body, "If") &&
403 validate_stmts(stmt->v.If.orelse);
404 case With_kind:
405 if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
406 return 0;
407 for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
408 withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
409 if (!validate_expr(item->context_expr, Load) ||
410 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
411 return 0;
412 }
413 return validate_body(stmt->v.With.body, "With");
414 case AsyncWith_kind:
415 if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith"))
416 return 0;
417 for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) {
418 withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i);
419 if (!validate_expr(item->context_expr, Load) ||
420 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
421 return 0;
422 }
423 return validate_body(stmt->v.AsyncWith.body, "AsyncWith");
424 case Raise_kind:
425 if (stmt->v.Raise.exc) {
426 return validate_expr(stmt->v.Raise.exc, Load) &&
427 (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load));
428 }
429 if (stmt->v.Raise.cause) {
430 PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
431 return 0;
432 }
433 return 1;
434 case Try_kind:
435 if (!validate_body(stmt->v.Try.body, "Try"))
436 return 0;
437 if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
438 !asdl_seq_LEN(stmt->v.Try.finalbody)) {
439 PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
440 return 0;
441 }
442 if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
443 asdl_seq_LEN(stmt->v.Try.orelse)) {
444 PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
445 return 0;
446 }
447 for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
448 excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
449 if ((handler->v.ExceptHandler.type &&
450 !validate_expr(handler->v.ExceptHandler.type, Load)) ||
451 !validate_body(handler->v.ExceptHandler.body, "ExceptHandler"))
452 return 0;
453 }
454 return (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
455 validate_stmts(stmt->v.Try.finalbody)) &&
456 (!asdl_seq_LEN(stmt->v.Try.orelse) ||
457 validate_stmts(stmt->v.Try.orelse));
458 case Assert_kind:
459 return validate_expr(stmt->v.Assert.test, Load) &&
460 (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load));
461 case Import_kind:
462 return validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
463 case ImportFrom_kind:
464 if (stmt->v.ImportFrom.level < 0) {
465 PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level");
466 return 0;
467 }
468 return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
469 case Global_kind:
470 return validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
471 case Nonlocal_kind:
472 return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
473 case Expr_kind:
474 return validate_expr(stmt->v.Expr.value, Load);
475 case AsyncFunctionDef_kind:
476 return validate_body(stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") &&
477 validate_arguments(stmt->v.AsyncFunctionDef.args) &&
478 validate_exprs(stmt->v.AsyncFunctionDef.decorator_list, Load, 0) &&
479 (!stmt->v.AsyncFunctionDef.returns ||
480 validate_expr(stmt->v.AsyncFunctionDef.returns, Load));
481 case Pass_kind:
482 case Break_kind:
483 case Continue_kind:
484 return 1;
485 default:
486 PyErr_SetString(PyExc_SystemError, "unexpected statement");
487 return 0;
488 }
489 }
490
491 static int
validate_stmts(asdl_seq * seq)492 validate_stmts(asdl_seq *seq)
493 {
494 Py_ssize_t i;
495 for (i = 0; i < asdl_seq_LEN(seq); i++) {
496 stmt_ty stmt = asdl_seq_GET(seq, i);
497 if (stmt) {
498 if (!validate_stmt(stmt))
499 return 0;
500 }
501 else {
502 PyErr_SetString(PyExc_ValueError,
503 "None disallowed in statement list");
504 return 0;
505 }
506 }
507 return 1;
508 }
509
510 static int
validate_exprs(asdl_seq * exprs,expr_context_ty ctx,int null_ok)511 validate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok)
512 {
513 Py_ssize_t i;
514 for (i = 0; i < asdl_seq_LEN(exprs); i++) {
515 expr_ty expr = asdl_seq_GET(exprs, i);
516 if (expr) {
517 if (!validate_expr(expr, ctx))
518 return 0;
519 }
520 else if (!null_ok) {
521 PyErr_SetString(PyExc_ValueError,
522 "None disallowed in expression list");
523 return 0;
524 }
525
526 }
527 return 1;
528 }
529
530 int
PyAST_Validate(mod_ty mod)531 PyAST_Validate(mod_ty mod)
532 {
533 int res = 0;
534
535 switch (mod->kind) {
536 case Module_kind:
537 res = validate_stmts(mod->v.Module.body);
538 break;
539 case Interactive_kind:
540 res = validate_stmts(mod->v.Interactive.body);
541 break;
542 case Expression_kind:
543 res = validate_expr(mod->v.Expression.body, Load);
544 break;
545 default:
546 PyErr_SetString(PyExc_SystemError, "impossible module node");
547 res = 0;
548 break;
549 }
550 return res;
551 }
552
553 /* This is done here, so defines like "test" don't interfere with AST use above. */
554 #include "grammar.h"
555 #include "parsetok.h"
556 #include "graminit.h"
557
558 /* Data structure used internally */
559 struct compiling {
560 PyArena *c_arena; /* Arena for allocating memory. */
561 PyObject *c_filename; /* filename */
562 PyObject *c_normalize; /* Normalization function from unicodedata. */
563 int c_feature_version; /* Latest minor version of Python for allowed features */
564 };
565
566 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
567 static expr_ty ast_for_expr(struct compiling *, const node *);
568 static stmt_ty ast_for_stmt(struct compiling *, const node *);
569 static asdl_seq *ast_for_suite(struct compiling *c, const node *n);
570 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
571 expr_context_ty);
572 static expr_ty ast_for_testlist(struct compiling *, const node *);
573 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
574
575 static stmt_ty ast_for_with_stmt(struct compiling *, const node *, bool);
576 static stmt_ty ast_for_for_stmt(struct compiling *, const node *, bool);
577
578 /* Note different signature for ast_for_call */
579 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty,
580 const node *, const node *, const node *);
581
582 static PyObject *parsenumber(struct compiling *, const char *);
583 static expr_ty parsestrplus(struct compiling *, const node *n);
584 static void get_last_end_pos(asdl_seq *, int *, int *);
585
586 #define COMP_GENEXP 0
587 #define COMP_LISTCOMP 1
588 #define COMP_SETCOMP 2
589
590 static int
init_normalization(struct compiling * c)591 init_normalization(struct compiling *c)
592 {
593 PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
594 if (!m)
595 return 0;
596 c->c_normalize = PyObject_GetAttrString(m, "normalize");
597 Py_DECREF(m);
598 if (!c->c_normalize)
599 return 0;
600 return 1;
601 }
602
603 static identifier
new_identifier(const char * n,struct compiling * c)604 new_identifier(const char *n, struct compiling *c)
605 {
606 PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
607 if (!id)
608 return NULL;
609 /* PyUnicode_DecodeUTF8 should always return a ready string. */
610 assert(PyUnicode_IS_READY(id));
611 /* Check whether there are non-ASCII characters in the
612 identifier; if so, normalize to NFKC. */
613 if (!PyUnicode_IS_ASCII(id)) {
614 PyObject *id2;
615 if (!c->c_normalize && !init_normalization(c)) {
616 Py_DECREF(id);
617 return NULL;
618 }
619 PyObject *form = PyUnicode_InternFromString("NFKC");
620 if (form == NULL) {
621 Py_DECREF(id);
622 return NULL;
623 }
624 PyObject *args[2] = {form, id};
625 id2 = _PyObject_FastCall(c->c_normalize, args, 2);
626 Py_DECREF(id);
627 Py_DECREF(form);
628 if (!id2)
629 return NULL;
630 if (!PyUnicode_Check(id2)) {
631 PyErr_Format(PyExc_TypeError,
632 "unicodedata.normalize() must return a string, not "
633 "%.200s",
634 _PyType_Name(Py_TYPE(id2)));
635 Py_DECREF(id2);
636 return NULL;
637 }
638 id = id2;
639 }
640 PyUnicode_InternInPlace(&id);
641 if (PyArena_AddPyObject(c->c_arena, id) < 0) {
642 Py_DECREF(id);
643 return NULL;
644 }
645 return id;
646 }
647
648 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c)
649
650 static int
ast_error(struct compiling * c,const node * n,const char * errmsg,...)651 ast_error(struct compiling *c, const node *n, const char *errmsg, ...)
652 {
653 PyObject *value, *errstr, *loc, *tmp;
654 va_list va;
655
656 va_start(va, errmsg);
657 errstr = PyUnicode_FromFormatV(errmsg, va);
658 va_end(va);
659 if (!errstr) {
660 return 0;
661 }
662 loc = PyErr_ProgramTextObject(c->c_filename, LINENO(n));
663 if (!loc) {
664 Py_INCREF(Py_None);
665 loc = Py_None;
666 }
667 tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset + 1, loc);
668 if (!tmp) {
669 Py_DECREF(errstr);
670 return 0;
671 }
672 value = PyTuple_Pack(2, errstr, tmp);
673 Py_DECREF(errstr);
674 Py_DECREF(tmp);
675 if (value) {
676 PyErr_SetObject(PyExc_SyntaxError, value);
677 Py_DECREF(value);
678 }
679 return 0;
680 }
681
682 /* num_stmts() returns number of contained statements.
683
684 Use this routine to determine how big a sequence is needed for
685 the statements in a parse tree. Its raison d'etre is this bit of
686 grammar:
687
688 stmt: simple_stmt | compound_stmt
689 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
690
691 A simple_stmt can contain multiple small_stmt elements joined
692 by semicolons. If the arg is a simple_stmt, the number of
693 small_stmt elements is returned.
694 */
695
696 static string
new_type_comment(const char * s,struct compiling * c)697 new_type_comment(const char *s, struct compiling *c)
698 {
699 PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
700 if (res == NULL)
701 return NULL;
702 if (PyArena_AddPyObject(c->c_arena, res) < 0) {
703 Py_DECREF(res);
704 return NULL;
705 }
706 return res;
707 }
708 #define NEW_TYPE_COMMENT(n) new_type_comment(STR(n), c)
709
710 static int
num_stmts(const node * n)711 num_stmts(const node *n)
712 {
713 int i, l;
714 node *ch;
715
716 switch (TYPE(n)) {
717 case single_input:
718 if (TYPE(CHILD(n, 0)) == NEWLINE)
719 return 0;
720 else
721 return num_stmts(CHILD(n, 0));
722 case file_input:
723 l = 0;
724 for (i = 0; i < NCH(n); i++) {
725 ch = CHILD(n, i);
726 if (TYPE(ch) == stmt)
727 l += num_stmts(ch);
728 }
729 return l;
730 case stmt:
731 return num_stmts(CHILD(n, 0));
732 case compound_stmt:
733 return 1;
734 case simple_stmt:
735 return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
736 case suite:
737 case func_body_suite:
738 /* func_body_suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
739 /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
740 if (NCH(n) == 1)
741 return num_stmts(CHILD(n, 0));
742 else {
743 i = 2;
744 l = 0;
745 if (TYPE(CHILD(n, 1)) == TYPE_COMMENT)
746 i += 2;
747 for (; i < (NCH(n) - 1); i++)
748 l += num_stmts(CHILD(n, i));
749 return l;
750 }
751 default: {
752 _Py_FatalErrorFormat(__func__, "Non-statement found: %d %d",
753 TYPE(n), NCH(n));
754 }
755 }
756 Py_UNREACHABLE();
757 }
758
759 /* Transform the CST rooted at node * to the appropriate AST
760 */
761
762 mod_ty
PyAST_FromNodeObject(const node * n,PyCompilerFlags * flags,PyObject * filename,PyArena * arena)763 PyAST_FromNodeObject(const node *n, PyCompilerFlags *flags,
764 PyObject *filename, PyArena *arena)
765 {
766 int i, j, k, num;
767 asdl_seq *stmts = NULL;
768 asdl_seq *type_ignores = NULL;
769 stmt_ty s;
770 node *ch;
771 struct compiling c;
772 mod_ty res = NULL;
773 asdl_seq *argtypes = NULL;
774 expr_ty ret, arg;
775
776 c.c_arena = arena;
777 /* borrowed reference */
778 c.c_filename = filename;
779 c.c_normalize = NULL;
780 c.c_feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ?
781 flags->cf_feature_version : PY_MINOR_VERSION;
782
783 if (TYPE(n) == encoding_decl)
784 n = CHILD(n, 0);
785
786 k = 0;
787 switch (TYPE(n)) {
788 case file_input:
789 stmts = _Py_asdl_seq_new(num_stmts(n), arena);
790 if (!stmts)
791 goto out;
792 for (i = 0; i < NCH(n) - 1; i++) {
793 ch = CHILD(n, i);
794 if (TYPE(ch) == NEWLINE)
795 continue;
796 REQ(ch, stmt);
797 num = num_stmts(ch);
798 if (num == 1) {
799 s = ast_for_stmt(&c, ch);
800 if (!s)
801 goto out;
802 asdl_seq_SET(stmts, k++, s);
803 }
804 else {
805 ch = CHILD(ch, 0);
806 REQ(ch, simple_stmt);
807 for (j = 0; j < num; j++) {
808 s = ast_for_stmt(&c, CHILD(ch, j * 2));
809 if (!s)
810 goto out;
811 asdl_seq_SET(stmts, k++, s);
812 }
813 }
814 }
815
816 /* Type ignores are stored under the ENDMARKER in file_input. */
817 ch = CHILD(n, NCH(n) - 1);
818 REQ(ch, ENDMARKER);
819 num = NCH(ch);
820 type_ignores = _Py_asdl_seq_new(num, arena);
821 if (!type_ignores)
822 goto out;
823
824 for (i = 0; i < num; i++) {
825 string type_comment = new_type_comment(STR(CHILD(ch, i)), &c);
826 if (!type_comment)
827 goto out;
828 type_ignore_ty ti = TypeIgnore(LINENO(CHILD(ch, i)), type_comment, arena);
829 if (!ti)
830 goto out;
831 asdl_seq_SET(type_ignores, i, ti);
832 }
833
834 res = Module(stmts, type_ignores, arena);
835 break;
836 case eval_input: {
837 expr_ty testlist_ast;
838
839 /* XXX Why not comp_for here? */
840 testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
841 if (!testlist_ast)
842 goto out;
843 res = Expression(testlist_ast, arena);
844 break;
845 }
846 case single_input:
847 if (TYPE(CHILD(n, 0)) == NEWLINE) {
848 stmts = _Py_asdl_seq_new(1, arena);
849 if (!stmts)
850 goto out;
851 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
852 n->n_end_lineno, n->n_end_col_offset,
853 arena));
854 if (!asdl_seq_GET(stmts, 0))
855 goto out;
856 res = Interactive(stmts, arena);
857 }
858 else {
859 n = CHILD(n, 0);
860 num = num_stmts(n);
861 stmts = _Py_asdl_seq_new(num, arena);
862 if (!stmts)
863 goto out;
864 if (num == 1) {
865 s = ast_for_stmt(&c, n);
866 if (!s)
867 goto out;
868 asdl_seq_SET(stmts, 0, s);
869 }
870 else {
871 /* Only a simple_stmt can contain multiple statements. */
872 REQ(n, simple_stmt);
873 for (i = 0; i < NCH(n); i += 2) {
874 if (TYPE(CHILD(n, i)) == NEWLINE)
875 break;
876 s = ast_for_stmt(&c, CHILD(n, i));
877 if (!s)
878 goto out;
879 asdl_seq_SET(stmts, i / 2, s);
880 }
881 }
882
883 res = Interactive(stmts, arena);
884 }
885 break;
886 case func_type_input:
887 n = CHILD(n, 0);
888 REQ(n, func_type);
889
890 if (TYPE(CHILD(n, 1)) == typelist) {
891 ch = CHILD(n, 1);
892 /* this is overly permissive -- we don't pay any attention to
893 * stars on the args -- just parse them into an ordered list */
894 num = 0;
895 for (i = 0; i < NCH(ch); i++) {
896 if (TYPE(CHILD(ch, i)) == test) {
897 num++;
898 }
899 }
900
901 argtypes = _Py_asdl_seq_new(num, arena);
902 if (!argtypes)
903 goto out;
904
905 j = 0;
906 for (i = 0; i < NCH(ch); i++) {
907 if (TYPE(CHILD(ch, i)) == test) {
908 arg = ast_for_expr(&c, CHILD(ch, i));
909 if (!arg)
910 goto out;
911 asdl_seq_SET(argtypes, j++, arg);
912 }
913 }
914 }
915 else {
916 argtypes = _Py_asdl_seq_new(0, arena);
917 if (!argtypes)
918 goto out;
919 }
920
921 ret = ast_for_expr(&c, CHILD(n, NCH(n) - 1));
922 if (!ret)
923 goto out;
924 res = FunctionType(argtypes, ret, arena);
925 break;
926 default:
927 PyErr_Format(PyExc_SystemError,
928 "invalid node %d for PyAST_FromNode", TYPE(n));
929 goto out;
930 }
931 out:
932 if (c.c_normalize) {
933 Py_DECREF(c.c_normalize);
934 }
935 return res;
936 }
937
938 mod_ty
PyAST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename_str,PyArena * arena)939 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename_str,
940 PyArena *arena)
941 {
942 mod_ty mod;
943 PyObject *filename;
944 filename = PyUnicode_DecodeFSDefault(filename_str);
945 if (filename == NULL)
946 return NULL;
947 mod = PyAST_FromNodeObject(n, flags, filename, arena);
948 Py_DECREF(filename);
949 return mod;
950
951 }
952
953 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
954 */
955
956 static operator_ty
get_operator(struct compiling * c,const node * n)957 get_operator(struct compiling *c, const node *n)
958 {
959 switch (TYPE(n)) {
960 case VBAR:
961 return BitOr;
962 case CIRCUMFLEX:
963 return BitXor;
964 case AMPER:
965 return BitAnd;
966 case LEFTSHIFT:
967 return LShift;
968 case RIGHTSHIFT:
969 return RShift;
970 case PLUS:
971 return Add;
972 case MINUS:
973 return Sub;
974 case STAR:
975 return Mult;
976 case AT:
977 if (c->c_feature_version < 5) {
978 ast_error(c, n,
979 "The '@' operator is only supported in Python 3.5 and greater");
980 return (operator_ty)0;
981 }
982 return MatMult;
983 case SLASH:
984 return Div;
985 case DOUBLESLASH:
986 return FloorDiv;
987 case PERCENT:
988 return Mod;
989 default:
990 return (operator_ty)0;
991 }
992 }
993
994 static const char * const FORBIDDEN[] = {
995 "None",
996 "True",
997 "False",
998 "__debug__",
999 NULL,
1000 };
1001
1002 static int
forbidden_name(struct compiling * c,identifier name,const node * n,int full_checks)1003 forbidden_name(struct compiling *c, identifier name, const node *n,
1004 int full_checks)
1005 {
1006 assert(PyUnicode_Check(name));
1007 const char * const *p = FORBIDDEN;
1008 if (!full_checks) {
1009 /* In most cases, the parser will protect True, False, and None
1010 from being assign to. */
1011 p += 3;
1012 }
1013 for (; *p; p++) {
1014 if (_PyUnicode_EqualToASCIIString(name, *p)) {
1015 ast_error(c, n, "cannot assign to %U", name);
1016 return 1;
1017 }
1018 }
1019 return 0;
1020 }
1021
1022 static expr_ty
copy_location(expr_ty e,const node * n,const node * end)1023 copy_location(expr_ty e, const node *n, const node *end)
1024 {
1025 if (e) {
1026 e->lineno = LINENO(n);
1027 e->col_offset = n->n_col_offset;
1028 e->end_lineno = end->n_end_lineno;
1029 e->end_col_offset = end->n_end_col_offset;
1030 }
1031 return e;
1032 }
1033
1034 static const char *
get_expr_name(expr_ty e)1035 get_expr_name(expr_ty e)
1036 {
1037 switch (e->kind) {
1038 case Attribute_kind:
1039 return "attribute";
1040 case Subscript_kind:
1041 return "subscript";
1042 case Starred_kind:
1043 return "starred";
1044 case Name_kind:
1045 return "name";
1046 case List_kind:
1047 return "list";
1048 case Tuple_kind:
1049 return "tuple";
1050 case Lambda_kind:
1051 return "lambda";
1052 case Call_kind:
1053 return "function call";
1054 case BoolOp_kind:
1055 case BinOp_kind:
1056 case UnaryOp_kind:
1057 return "operator";
1058 case GeneratorExp_kind:
1059 return "generator expression";
1060 case Yield_kind:
1061 case YieldFrom_kind:
1062 return "yield expression";
1063 case Await_kind:
1064 return "await expression";
1065 case ListComp_kind:
1066 return "list comprehension";
1067 case SetComp_kind:
1068 return "set comprehension";
1069 case DictComp_kind:
1070 return "dict comprehension";
1071 case Dict_kind:
1072 return "dict display";
1073 case Set_kind:
1074 return "set display";
1075 case JoinedStr_kind:
1076 case FormattedValue_kind:
1077 return "f-string expression";
1078 case Constant_kind: {
1079 PyObject *value = e->v.Constant.value;
1080 if (value == Py_None) {
1081 return "None";
1082 }
1083 if (value == Py_False) {
1084 return "False";
1085 }
1086 if (value == Py_True) {
1087 return "True";
1088 }
1089 if (value == Py_Ellipsis) {
1090 return "Ellipsis";
1091 }
1092 return "literal";
1093 }
1094 case Compare_kind:
1095 return "comparison";
1096 case IfExp_kind:
1097 return "conditional expression";
1098 case NamedExpr_kind:
1099 return "named expression";
1100 default:
1101 PyErr_Format(PyExc_SystemError,
1102 "unexpected expression in assignment %d (line %d)",
1103 e->kind, e->lineno);
1104 return NULL;
1105 }
1106 }
1107
1108 /* Set the context ctx for expr_ty e, recursively traversing e.
1109
1110 Only sets context for expr kinds that "can appear in assignment context"
1111 (according to ../Parser/Python.asdl). For other expr kinds, it sets
1112 an appropriate syntax error and returns false.
1113 */
1114
1115 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)1116 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
1117 {
1118 asdl_seq *s = NULL;
1119
1120 /* Expressions in an augmented assignment have a Store context. */
1121
1122 switch (e->kind) {
1123 case Attribute_kind:
1124 e->v.Attribute.ctx = ctx;
1125 if (ctx == Store && forbidden_name(c, e->v.Attribute.attr, n, 1))
1126 return 0;
1127 break;
1128 case Subscript_kind:
1129 e->v.Subscript.ctx = ctx;
1130 break;
1131 case Starred_kind:
1132 e->v.Starred.ctx = ctx;
1133 if (!set_context(c, e->v.Starred.value, ctx, n))
1134 return 0;
1135 break;
1136 case Name_kind:
1137 if (ctx == Store) {
1138 if (forbidden_name(c, e->v.Name.id, n, 0))
1139 return 0; /* forbidden_name() calls ast_error() */
1140 }
1141 e->v.Name.ctx = ctx;
1142 break;
1143 case List_kind:
1144 e->v.List.ctx = ctx;
1145 s = e->v.List.elts;
1146 break;
1147 case Tuple_kind:
1148 e->v.Tuple.ctx = ctx;
1149 s = e->v.Tuple.elts;
1150 break;
1151 default: {
1152 const char *expr_name = get_expr_name(e);
1153 if (expr_name != NULL) {
1154 ast_error(c, n, "cannot %s %s",
1155 ctx == Store ? "assign to" : "delete",
1156 expr_name);
1157 }
1158 return 0;
1159 }
1160 }
1161
1162 /* If the LHS is a list or tuple, we need to set the assignment
1163 context for all the contained elements.
1164 */
1165 if (s) {
1166 Py_ssize_t i;
1167
1168 for (i = 0; i < asdl_seq_LEN(s); i++) {
1169 if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
1170 return 0;
1171 }
1172 }
1173 return 1;
1174 }
1175
1176 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)1177 ast_for_augassign(struct compiling *c, const node *n)
1178 {
1179 REQ(n, augassign);
1180 n = CHILD(n, 0);
1181 switch (STR(n)[0]) {
1182 case '+':
1183 return Add;
1184 case '-':
1185 return Sub;
1186 case '/':
1187 if (STR(n)[1] == '/')
1188 return FloorDiv;
1189 else
1190 return Div;
1191 case '%':
1192 return Mod;
1193 case '<':
1194 return LShift;
1195 case '>':
1196 return RShift;
1197 case '&':
1198 return BitAnd;
1199 case '^':
1200 return BitXor;
1201 case '|':
1202 return BitOr;
1203 case '*':
1204 if (STR(n)[1] == '*')
1205 return Pow;
1206 else
1207 return Mult;
1208 case '@':
1209 if (c->c_feature_version < 5) {
1210 ast_error(c, n,
1211 "The '@' operator is only supported in Python 3.5 and greater");
1212 return (operator_ty)0;
1213 }
1214 return MatMult;
1215 default:
1216 PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
1217 return (operator_ty)0;
1218 }
1219 }
1220
1221 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)1222 ast_for_comp_op(struct compiling *c, const node *n)
1223 {
1224 /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'
1225 |'is' 'not'
1226 */
1227 REQ(n, comp_op);
1228 if (NCH(n) == 1) {
1229 n = CHILD(n, 0);
1230 switch (TYPE(n)) {
1231 case LESS:
1232 return Lt;
1233 case GREATER:
1234 return Gt;
1235 case EQEQUAL: /* == */
1236 return Eq;
1237 case LESSEQUAL:
1238 return LtE;
1239 case GREATEREQUAL:
1240 return GtE;
1241 case NOTEQUAL:
1242 return NotEq;
1243 case NAME:
1244 if (strcmp(STR(n), "in") == 0)
1245 return In;
1246 if (strcmp(STR(n), "is") == 0)
1247 return Is;
1248 /* fall through */
1249 default:
1250 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
1251 STR(n));
1252 return (cmpop_ty)0;
1253 }
1254 }
1255 else if (NCH(n) == 2) {
1256 /* handle "not in" and "is not" */
1257 switch (TYPE(CHILD(n, 0))) {
1258 case NAME:
1259 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
1260 return NotIn;
1261 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
1262 return IsNot;
1263 /* fall through */
1264 default:
1265 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
1266 STR(CHILD(n, 0)), STR(CHILD(n, 1)));
1267 return (cmpop_ty)0;
1268 }
1269 }
1270 PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
1271 NCH(n));
1272 return (cmpop_ty)0;
1273 }
1274
1275 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)1276 seq_for_testlist(struct compiling *c, const node *n)
1277 {
1278 /* testlist: test (',' test)* [',']
1279 testlist_star_expr: test|star_expr (',' test|star_expr)* [',']
1280 */
1281 asdl_seq *seq;
1282 expr_ty expression;
1283 int i;
1284 assert(TYPE(n) == testlist || TYPE(n) == testlist_star_expr || TYPE(n) == testlist_comp);
1285
1286 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1287 if (!seq)
1288 return NULL;
1289
1290 for (i = 0; i < NCH(n); i += 2) {
1291 const node *ch = CHILD(n, i);
1292 assert(TYPE(ch) == test || TYPE(ch) == test_nocond || TYPE(ch) == star_expr || TYPE(ch) == namedexpr_test);
1293
1294 expression = ast_for_expr(c, ch);
1295 if (!expression)
1296 return NULL;
1297
1298 assert(i / 2 < seq->size);
1299 asdl_seq_SET(seq, i / 2, expression);
1300 }
1301 return seq;
1302 }
1303
1304 static arg_ty
ast_for_arg(struct compiling * c,const node * n)1305 ast_for_arg(struct compiling *c, const node *n)
1306 {
1307 identifier name;
1308 expr_ty annotation = NULL;
1309 node *ch;
1310 arg_ty ret;
1311
1312 assert(TYPE(n) == tfpdef || TYPE(n) == vfpdef);
1313 ch = CHILD(n, 0);
1314 name = NEW_IDENTIFIER(ch);
1315 if (!name)
1316 return NULL;
1317 if (forbidden_name(c, name, ch, 0))
1318 return NULL;
1319
1320 if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) {
1321 annotation = ast_for_expr(c, CHILD(n, 2));
1322 if (!annotation)
1323 return NULL;
1324 }
1325
1326 ret = arg(name, annotation, NULL, LINENO(n), n->n_col_offset,
1327 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
1328 if (!ret)
1329 return NULL;
1330 return ret;
1331 }
1332
1333 /* returns -1 if failed to handle keyword only arguments
1334 returns new position to keep processing if successful
1335 (',' tfpdef ['=' test])*
1336 ^^^
1337 start pointing here
1338 */
1339 static int
handle_keywordonly_args(struct compiling * c,const node * n,int start,asdl_seq * kwonlyargs,asdl_seq * kwdefaults)1340 handle_keywordonly_args(struct compiling *c, const node *n, int start,
1341 asdl_seq *kwonlyargs, asdl_seq *kwdefaults)
1342 {
1343 PyObject *argname;
1344 node *ch;
1345 expr_ty expression, annotation;
1346 arg_ty arg = NULL;
1347 int i = start;
1348 int j = 0; /* index for kwdefaults and kwonlyargs */
1349
1350 if (kwonlyargs == NULL) {
1351 ast_error(c, CHILD(n, start), "named arguments must follow bare *");
1352 return -1;
1353 }
1354 assert(kwdefaults != NULL);
1355 while (i < NCH(n)) {
1356 ch = CHILD(n, i);
1357 switch (TYPE(ch)) {
1358 case vfpdef:
1359 case tfpdef:
1360 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1361 expression = ast_for_expr(c, CHILD(n, i + 2));
1362 if (!expression)
1363 goto error;
1364 asdl_seq_SET(kwdefaults, j, expression);
1365 i += 2; /* '=' and test */
1366 }
1367 else { /* setting NULL if no default value exists */
1368 asdl_seq_SET(kwdefaults, j, NULL);
1369 }
1370 if (NCH(ch) == 3) {
1371 /* ch is NAME ':' test */
1372 annotation = ast_for_expr(c, CHILD(ch, 2));
1373 if (!annotation)
1374 goto error;
1375 }
1376 else {
1377 annotation = NULL;
1378 }
1379 ch = CHILD(ch, 0);
1380 argname = NEW_IDENTIFIER(ch);
1381 if (!argname)
1382 goto error;
1383 if (forbidden_name(c, argname, ch, 0))
1384 goto error;
1385 arg = arg(argname, annotation, NULL, LINENO(ch), ch->n_col_offset,
1386 ch->n_end_lineno, ch->n_end_col_offset,
1387 c->c_arena);
1388 if (!arg)
1389 goto error;
1390 asdl_seq_SET(kwonlyargs, j++, arg);
1391 i += 1; /* the name */
1392 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1393 i += 1; /* the comma, if present */
1394 break;
1395 case TYPE_COMMENT:
1396 /* arg will be equal to the last argument processed */
1397 arg->type_comment = NEW_TYPE_COMMENT(ch);
1398 if (!arg->type_comment)
1399 goto error;
1400 i += 1;
1401 break;
1402 case DOUBLESTAR:
1403 return i;
1404 default:
1405 ast_error(c, ch, "unexpected node");
1406 goto error;
1407 }
1408 }
1409 return i;
1410 error:
1411 return -1;
1412 }
1413
1414 /* Create AST for argument list. */
1415
1416 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)1417 ast_for_arguments(struct compiling *c, const node *n)
1418 {
1419 /* This function handles both typedargslist (function definition)
1420 and varargslist (lambda definition).
1421
1422 parameters: '(' [typedargslist] ')'
1423
1424 The following definition for typedarglist is equivalent to this set of rules:
1425
1426 arguments = argument (',' [TYPE_COMMENT] argument)*
1427 argument = tfpdef ['=' test]
1428 kwargs = '**' tfpdef [','] [TYPE_COMMENT]
1429 args = '*' [tfpdef]
1430 kwonly_kwargs = (',' [TYPE_COMMENT] argument)* (TYPE_COMMENT | [','
1431 [TYPE_COMMENT] [kwargs]])
1432 args_kwonly_kwargs = args kwonly_kwargs | kwargs
1433 poskeyword_args_kwonly_kwargs = arguments ( TYPE_COMMENT | [','
1434 [TYPE_COMMENT] [args_kwonly_kwargs]])
1435 typedargslist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
1436 typedarglist = (arguments ',' [TYPE_COMMENT] '/' [',' [[TYPE_COMMENT]
1437 typedargslist_no_posonly]])|(typedargslist_no_posonly)"
1438
1439 typedargslist: ( (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1440 ',' [TYPE_COMMENT] '/' [',' [ [TYPE_COMMENT] tfpdef ['=' test] ( ','
1441 [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
1442 [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
1443 [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
1444 [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1445 (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
1446 '**' tfpdef [','] [TYPE_COMMENT]]] ) | (tfpdef ['=' test] (','
1447 [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
1448 [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
1449 [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
1450 [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1451 (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
1452 '**' tfpdef [','] [TYPE_COMMENT]))
1453
1454 tfpdef: NAME [':' test]
1455
1456 The following definition for varargslist is equivalent to this set of rules:
1457
1458 arguments = argument (',' argument )*
1459 argument = vfpdef ['=' test]
1460 kwargs = '**' vfpdef [',']
1461 args = '*' [vfpdef]
1462 kwonly_kwargs = (',' argument )* [',' [kwargs]]
1463 args_kwonly_kwargs = args kwonly_kwargs | kwargs
1464 poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]]
1465 vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
1466 varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] |
1467 (vararglist_no_posonly)
1468
1469 varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['='
1470 test] (',' vfpdef ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [','
1471 ['**' vfpdef [',']]] | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])*
1472 [',' ['**' vfpdef [',']]] | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef
1473 ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
1474 | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef
1475 [',']]] | '**' vfpdef [','])
1476
1477 vfpdef: NAME
1478
1479 */
1480 int i, j, k, l, nposonlyargs=0, nposargs = 0, nkwonlyargs = 0;
1481 int nposdefaults = 0, found_default = 0;
1482 asdl_seq *posonlyargs, *posargs, *posdefaults, *kwonlyargs, *kwdefaults;
1483 arg_ty vararg = NULL, kwarg = NULL;
1484 arg_ty arg = NULL;
1485 node *ch;
1486
1487 if (TYPE(n) == parameters) {
1488 if (NCH(n) == 2) /* () as argument list */
1489 return arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1490 n = CHILD(n, 1);
1491 }
1492 assert(TYPE(n) == typedargslist || TYPE(n) == varargslist);
1493
1494 /* First count the number of positional args & defaults. The
1495 variable i is the loop index for this for loop and the next.
1496 The next loop picks up where the first leaves off.
1497 */
1498 for (i = 0; i < NCH(n); i++) {
1499 ch = CHILD(n, i);
1500 if (TYPE(ch) == STAR) {
1501 /* skip star */
1502 i++;
1503 if (i < NCH(n) && /* skip argument following star */
1504 (TYPE(CHILD(n, i)) == tfpdef ||
1505 TYPE(CHILD(n, i)) == vfpdef)) {
1506 i++;
1507 }
1508 break;
1509 }
1510 if (TYPE(ch) == DOUBLESTAR) break;
1511 if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++;
1512 if (TYPE(ch) == EQUAL) nposdefaults++;
1513 if (TYPE(ch) == SLASH ) {
1514 nposonlyargs = nposargs;
1515 nposargs = 0;
1516 }
1517 }
1518 /* count the number of keyword only args &
1519 defaults for keyword only args */
1520 for ( ; i < NCH(n); ++i) {
1521 ch = CHILD(n, i);
1522 if (TYPE(ch) == DOUBLESTAR) break;
1523 if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++;
1524 }
1525 posonlyargs = (nposonlyargs ? _Py_asdl_seq_new(nposonlyargs, c->c_arena) : NULL);
1526 if (!posonlyargs && nposonlyargs) {
1527 return NULL;
1528 }
1529 posargs = (nposargs ? _Py_asdl_seq_new(nposargs, c->c_arena) : NULL);
1530 if (!posargs && nposargs)
1531 return NULL;
1532 kwonlyargs = (nkwonlyargs ?
1533 _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1534 if (!kwonlyargs && nkwonlyargs)
1535 return NULL;
1536 posdefaults = (nposdefaults ?
1537 _Py_asdl_seq_new(nposdefaults, c->c_arena) : NULL);
1538 if (!posdefaults && nposdefaults)
1539 return NULL;
1540 /* The length of kwonlyargs and kwdefaults are same
1541 since we set NULL as default for keyword only argument w/o default
1542 - we have sequence data structure, but no dictionary */
1543 kwdefaults = (nkwonlyargs ?
1544 _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1545 if (!kwdefaults && nkwonlyargs)
1546 return NULL;
1547
1548 /* tfpdef: NAME [':' test]
1549 vfpdef: NAME
1550 */
1551 i = 0;
1552 j = 0; /* index for defaults */
1553 k = 0; /* index for args */
1554 l = 0; /* index for posonlyargs */
1555 while (i < NCH(n)) {
1556 ch = CHILD(n, i);
1557 switch (TYPE(ch)) {
1558 case tfpdef:
1559 case vfpdef:
1560 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
1561 anything other than EQUAL or a comma? */
1562 /* XXX Should NCH(n) check be made a separate check? */
1563 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1564 expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
1565 if (!expression)
1566 return NULL;
1567 assert(posdefaults != NULL);
1568 asdl_seq_SET(posdefaults, j++, expression);
1569 i += 2;
1570 found_default = 1;
1571 }
1572 else if (found_default) {
1573 ast_error(c, n,
1574 "non-default argument follows default argument");
1575 return NULL;
1576 }
1577 arg = ast_for_arg(c, ch);
1578 if (!arg)
1579 return NULL;
1580 if (l < nposonlyargs) {
1581 asdl_seq_SET(posonlyargs, l++, arg);
1582 } else {
1583 asdl_seq_SET(posargs, k++, arg);
1584 }
1585 i += 1; /* the name */
1586 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1587 i += 1; /* the comma, if present */
1588 break;
1589 case SLASH:
1590 /* Advance the slash and the comma. If there are more names
1591 * after the slash there will be a comma so we are advancing
1592 * the correct number of nodes. If the slash is the last item,
1593 * we will be advancing an extra token but then * i > NCH(n)
1594 * and the enclosing while will finish correctly. */
1595 i += 2;
1596 break;
1597 case STAR:
1598 if (i+1 >= NCH(n) ||
1599 (i+2 == NCH(n) && (TYPE(CHILD(n, i+1)) == COMMA
1600 || TYPE(CHILD(n, i+1)) == TYPE_COMMENT))) {
1601 ast_error(c, CHILD(n, i),
1602 "named arguments must follow bare *");
1603 return NULL;
1604 }
1605 ch = CHILD(n, i+1); /* tfpdef or COMMA */
1606 if (TYPE(ch) == COMMA) {
1607 int res = 0;
1608 i += 2; /* now follows keyword only arguments */
1609
1610 if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
1611 ast_error(c, CHILD(n, i),
1612 "bare * has associated type comment");
1613 return NULL;
1614 }
1615
1616 res = handle_keywordonly_args(c, n, i,
1617 kwonlyargs, kwdefaults);
1618 if (res == -1) return NULL;
1619 i = res; /* res has new position to process */
1620 }
1621 else {
1622 vararg = ast_for_arg(c, ch);
1623 if (!vararg)
1624 return NULL;
1625
1626 i += 2; /* the star and the name */
1627 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1628 i += 1; /* the comma, if present */
1629
1630 if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
1631 vararg->type_comment = NEW_TYPE_COMMENT(CHILD(n, i));
1632 if (!vararg->type_comment)
1633 return NULL;
1634 i += 1;
1635 }
1636
1637 if (i < NCH(n) && (TYPE(CHILD(n, i)) == tfpdef
1638 || TYPE(CHILD(n, i)) == vfpdef)) {
1639 int res = 0;
1640 res = handle_keywordonly_args(c, n, i,
1641 kwonlyargs, kwdefaults);
1642 if (res == -1) return NULL;
1643 i = res; /* res has new position to process */
1644 }
1645 }
1646 break;
1647 case DOUBLESTAR:
1648 ch = CHILD(n, i+1); /* tfpdef */
1649 assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef);
1650 kwarg = ast_for_arg(c, ch);
1651 if (!kwarg)
1652 return NULL;
1653 i += 2; /* the double star and the name */
1654 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1655 i += 1; /* the comma, if present */
1656 break;
1657 case TYPE_COMMENT:
1658 assert(i);
1659
1660 if (kwarg)
1661 arg = kwarg;
1662
1663 /* arg will be equal to the last argument processed */
1664 arg->type_comment = NEW_TYPE_COMMENT(ch);
1665 if (!arg->type_comment)
1666 return NULL;
1667 i += 1;
1668 break;
1669 default:
1670 PyErr_Format(PyExc_SystemError,
1671 "unexpected node in varargslist: %d @ %d",
1672 TYPE(ch), i);
1673 return NULL;
1674 }
1675 }
1676 return arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg, posdefaults, c->c_arena);
1677 }
1678
1679 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)1680 ast_for_decorator(struct compiling *c, const node *n)
1681 {
1682 /* decorator: '@' namedexpr_test NEWLINE */
1683
1684 REQ(n, decorator);
1685 REQ(CHILD(n, 0), AT);
1686 REQ(CHILD(n, 2), NEWLINE);
1687
1688 return ast_for_expr(c, CHILD(n, 1));
1689 }
1690
1691 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)1692 ast_for_decorators(struct compiling *c, const node *n)
1693 {
1694 asdl_seq* decorator_seq;
1695 expr_ty d;
1696 int i;
1697
1698 REQ(n, decorators);
1699 decorator_seq = _Py_asdl_seq_new(NCH(n), c->c_arena);
1700 if (!decorator_seq)
1701 return NULL;
1702
1703 for (i = 0; i < NCH(n); i++) {
1704 d = ast_for_decorator(c, CHILD(n, i));
1705 if (!d)
1706 return NULL;
1707 asdl_seq_SET(decorator_seq, i, d);
1708 }
1709 return decorator_seq;
1710 }
1711
1712 static stmt_ty
ast_for_funcdef_impl(struct compiling * c,const node * n0,asdl_seq * decorator_seq,bool is_async)1713 ast_for_funcdef_impl(struct compiling *c, const node *n0,
1714 asdl_seq *decorator_seq, bool is_async)
1715 {
1716 /* funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] suite */
1717 const node * const n = is_async ? CHILD(n0, 1) : n0;
1718 identifier name;
1719 arguments_ty args;
1720 asdl_seq *body;
1721 expr_ty returns = NULL;
1722 int name_i = 1;
1723 int end_lineno, end_col_offset;
1724 node *tc;
1725 string type_comment = NULL;
1726
1727 if (is_async && c->c_feature_version < 5) {
1728 ast_error(c, n,
1729 "Async functions are only supported in Python 3.5 and greater");
1730 return NULL;
1731 }
1732
1733 REQ(n, funcdef);
1734
1735 name = NEW_IDENTIFIER(CHILD(n, name_i));
1736 if (!name)
1737 return NULL;
1738 if (forbidden_name(c, name, CHILD(n, name_i), 0))
1739 return NULL;
1740 args = ast_for_arguments(c, CHILD(n, name_i + 1));
1741 if (!args)
1742 return NULL;
1743 if (TYPE(CHILD(n, name_i+2)) == RARROW) {
1744 returns = ast_for_expr(c, CHILD(n, name_i + 3));
1745 if (!returns)
1746 return NULL;
1747 name_i += 2;
1748 }
1749 if (TYPE(CHILD(n, name_i + 3)) == TYPE_COMMENT) {
1750 type_comment = NEW_TYPE_COMMENT(CHILD(n, name_i + 3));
1751 if (!type_comment)
1752 return NULL;
1753 name_i += 1;
1754 }
1755 body = ast_for_suite(c, CHILD(n, name_i + 3));
1756 if (!body)
1757 return NULL;
1758 get_last_end_pos(body, &end_lineno, &end_col_offset);
1759
1760 if (NCH(CHILD(n, name_i + 3)) > 1) {
1761 /* Check if the suite has a type comment in it. */
1762 tc = CHILD(CHILD(n, name_i + 3), 1);
1763
1764 if (TYPE(tc) == TYPE_COMMENT) {
1765 if (type_comment != NULL) {
1766 ast_error(c, n, "Cannot have two type comments on def");
1767 return NULL;
1768 }
1769 type_comment = NEW_TYPE_COMMENT(tc);
1770 if (!type_comment)
1771 return NULL;
1772 }
1773 }
1774
1775 if (is_async)
1776 return AsyncFunctionDef(name, args, body, decorator_seq, returns, type_comment,
1777 LINENO(n0), n0->n_col_offset, end_lineno, end_col_offset, c->c_arena);
1778 else
1779 return FunctionDef(name, args, body, decorator_seq, returns, type_comment,
1780 LINENO(n), n->n_col_offset, end_lineno, end_col_offset, c->c_arena);
1781 }
1782
1783 static stmt_ty
ast_for_async_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1784 ast_for_async_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1785 {
1786 /* async_funcdef: ASYNC funcdef */
1787 REQ(n, async_funcdef);
1788 REQ(CHILD(n, 0), ASYNC);
1789 REQ(CHILD(n, 1), funcdef);
1790
1791 return ast_for_funcdef_impl(c, n, decorator_seq,
1792 true /* is_async */);
1793 }
1794
1795 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1796 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1797 {
1798 /* funcdef: 'def' NAME parameters ['->' test] ':' suite */
1799 return ast_for_funcdef_impl(c, n, decorator_seq,
1800 false /* is_async */);
1801 }
1802
1803
1804 static stmt_ty
ast_for_async_stmt(struct compiling * c,const node * n)1805 ast_for_async_stmt(struct compiling *c, const node *n)
1806 {
1807 /* async_stmt: ASYNC (funcdef | with_stmt | for_stmt) */
1808 REQ(n, async_stmt);
1809 REQ(CHILD(n, 0), ASYNC);
1810
1811 switch (TYPE(CHILD(n, 1))) {
1812 case funcdef:
1813 return ast_for_funcdef_impl(c, n, NULL,
1814 true /* is_async */);
1815 case with_stmt:
1816 return ast_for_with_stmt(c, n,
1817 true /* is_async */);
1818
1819 case for_stmt:
1820 return ast_for_for_stmt(c, n,
1821 true /* is_async */);
1822
1823 default:
1824 PyErr_Format(PyExc_SystemError,
1825 "invalid async stament: %s",
1826 STR(CHILD(n, 1)));
1827 return NULL;
1828 }
1829 }
1830
1831 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)1832 ast_for_decorated(struct compiling *c, const node *n)
1833 {
1834 /* decorated: decorators (classdef | funcdef | async_funcdef) */
1835 stmt_ty thing = NULL;
1836 asdl_seq *decorator_seq = NULL;
1837
1838 REQ(n, decorated);
1839
1840 decorator_seq = ast_for_decorators(c, CHILD(n, 0));
1841 if (!decorator_seq)
1842 return NULL;
1843
1844 assert(TYPE(CHILD(n, 1)) == funcdef ||
1845 TYPE(CHILD(n, 1)) == async_funcdef ||
1846 TYPE(CHILD(n, 1)) == classdef);
1847
1848 if (TYPE(CHILD(n, 1)) == funcdef) {
1849 thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
1850 } else if (TYPE(CHILD(n, 1)) == classdef) {
1851 thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
1852 } else if (TYPE(CHILD(n, 1)) == async_funcdef) {
1853 thing = ast_for_async_funcdef(c, CHILD(n, 1), decorator_seq);
1854 }
1855 return thing;
1856 }
1857
1858 static expr_ty
ast_for_namedexpr(struct compiling * c,const node * n)1859 ast_for_namedexpr(struct compiling *c, const node *n)
1860 {
1861 /* namedexpr_test: test [':=' test]
1862 argument: ( test [comp_for] |
1863 test ':=' test |
1864 test '=' test |
1865 '**' test |
1866 '*' test )
1867 */
1868 expr_ty target, value;
1869
1870 target = ast_for_expr(c, CHILD(n, 0));
1871 if (!target)
1872 return NULL;
1873
1874 value = ast_for_expr(c, CHILD(n, 2));
1875 if (!value)
1876 return NULL;
1877
1878 if (target->kind != Name_kind) {
1879 const char *expr_name = get_expr_name(target);
1880 if (expr_name != NULL) {
1881 ast_error(c, n, "cannot use assignment expressions with %s", expr_name);
1882 }
1883 return NULL;
1884 }
1885
1886 if (!set_context(c, target, Store, n))
1887 return NULL;
1888
1889 return NamedExpr(target, value, LINENO(n), n->n_col_offset, n->n_end_lineno,
1890 n->n_end_col_offset, c->c_arena);
1891 }
1892
1893 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)1894 ast_for_lambdef(struct compiling *c, const node *n)
1895 {
1896 /* lambdef: 'lambda' [varargslist] ':' test
1897 lambdef_nocond: 'lambda' [varargslist] ':' test_nocond */
1898 arguments_ty args;
1899 expr_ty expression;
1900
1901 if (NCH(n) == 3) {
1902 args = arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1903 if (!args)
1904 return NULL;
1905 expression = ast_for_expr(c, CHILD(n, 2));
1906 if (!expression)
1907 return NULL;
1908 }
1909 else {
1910 args = ast_for_arguments(c, CHILD(n, 1));
1911 if (!args)
1912 return NULL;
1913 expression = ast_for_expr(c, CHILD(n, 3));
1914 if (!expression)
1915 return NULL;
1916 }
1917
1918 return Lambda(args, expression, LINENO(n), n->n_col_offset,
1919 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
1920 }
1921
1922 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)1923 ast_for_ifexpr(struct compiling *c, const node *n)
1924 {
1925 /* test: or_test 'if' or_test 'else' test */
1926 expr_ty expression, body, orelse;
1927
1928 assert(NCH(n) == 5);
1929 body = ast_for_expr(c, CHILD(n, 0));
1930 if (!body)
1931 return NULL;
1932 expression = ast_for_expr(c, CHILD(n, 2));
1933 if (!expression)
1934 return NULL;
1935 orelse = ast_for_expr(c, CHILD(n, 4));
1936 if (!orelse)
1937 return NULL;
1938 return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
1939 n->n_end_lineno, n->n_end_col_offset,
1940 c->c_arena);
1941 }
1942
1943 /*
1944 Count the number of 'for' loops in a comprehension.
1945
1946 Helper for ast_for_comprehension().
1947 */
1948
1949 static int
count_comp_fors(struct compiling * c,const node * n)1950 count_comp_fors(struct compiling *c, const node *n)
1951 {
1952 int n_fors = 0;
1953
1954 count_comp_for:
1955 n_fors++;
1956 REQ(n, comp_for);
1957 if (NCH(n) == 2) {
1958 REQ(CHILD(n, 0), ASYNC);
1959 n = CHILD(n, 1);
1960 }
1961 else if (NCH(n) == 1) {
1962 n = CHILD(n, 0);
1963 }
1964 else {
1965 goto error;
1966 }
1967 if (NCH(n) == (5)) {
1968 n = CHILD(n, 4);
1969 }
1970 else {
1971 return n_fors;
1972 }
1973 count_comp_iter:
1974 REQ(n, comp_iter);
1975 n = CHILD(n, 0);
1976 if (TYPE(n) == comp_for)
1977 goto count_comp_for;
1978 else if (TYPE(n) == comp_if) {
1979 if (NCH(n) == 3) {
1980 n = CHILD(n, 2);
1981 goto count_comp_iter;
1982 }
1983 else
1984 return n_fors;
1985 }
1986
1987 error:
1988 /* Should never be reached */
1989 PyErr_SetString(PyExc_SystemError,
1990 "logic error in count_comp_fors");
1991 return -1;
1992 }
1993
1994 /* Count the number of 'if' statements in a comprehension.
1995
1996 Helper for ast_for_comprehension().
1997 */
1998
1999 static int
count_comp_ifs(struct compiling * c,const node * n)2000 count_comp_ifs(struct compiling *c, const node *n)
2001 {
2002 int n_ifs = 0;
2003
2004 while (1) {
2005 REQ(n, comp_iter);
2006 if (TYPE(CHILD(n, 0)) == comp_for)
2007 return n_ifs;
2008 n = CHILD(n, 0);
2009 REQ(n, comp_if);
2010 n_ifs++;
2011 if (NCH(n) == 2)
2012 return n_ifs;
2013 n = CHILD(n, 2);
2014 }
2015 }
2016
2017 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)2018 ast_for_comprehension(struct compiling *c, const node *n)
2019 {
2020 int i, n_fors;
2021 asdl_seq *comps;
2022
2023 n_fors = count_comp_fors(c, n);
2024 if (n_fors == -1)
2025 return NULL;
2026
2027 comps = _Py_asdl_seq_new(n_fors, c->c_arena);
2028 if (!comps)
2029 return NULL;
2030
2031 for (i = 0; i < n_fors; i++) {
2032 comprehension_ty comp;
2033 asdl_seq *t;
2034 expr_ty expression, first;
2035 node *for_ch;
2036 node *sync_n;
2037 int is_async = 0;
2038
2039 REQ(n, comp_for);
2040
2041 if (NCH(n) == 2) {
2042 is_async = 1;
2043 REQ(CHILD(n, 0), ASYNC);
2044 sync_n = CHILD(n, 1);
2045 }
2046 else {
2047 sync_n = CHILD(n, 0);
2048 }
2049 REQ(sync_n, sync_comp_for);
2050
2051 /* Async comprehensions only allowed in Python 3.6 and greater */
2052 if (is_async && c->c_feature_version < 6) {
2053 ast_error(c, n,
2054 "Async comprehensions are only supported in Python 3.6 and greater");
2055 return NULL;
2056 }
2057
2058 for_ch = CHILD(sync_n, 1);
2059 t = ast_for_exprlist(c, for_ch, Store);
2060 if (!t)
2061 return NULL;
2062 expression = ast_for_expr(c, CHILD(sync_n, 3));
2063 if (!expression)
2064 return NULL;
2065
2066 /* Check the # of children rather than the length of t, since
2067 (x for x, in ...) has 1 element in t, but still requires a Tuple. */
2068 first = (expr_ty)asdl_seq_GET(t, 0);
2069 if (NCH(for_ch) == 1)
2070 comp = comprehension(first, expression, NULL,
2071 is_async, c->c_arena);
2072 else
2073 comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
2074 for_ch->n_end_lineno, for_ch->n_end_col_offset,
2075 c->c_arena),
2076 expression, NULL, is_async, c->c_arena);
2077 if (!comp)
2078 return NULL;
2079
2080 if (NCH(sync_n) == 5) {
2081 int j, n_ifs;
2082 asdl_seq *ifs;
2083
2084 n = CHILD(sync_n, 4);
2085 n_ifs = count_comp_ifs(c, n);
2086 if (n_ifs == -1)
2087 return NULL;
2088
2089 ifs = _Py_asdl_seq_new(n_ifs, c->c_arena);
2090 if (!ifs)
2091 return NULL;
2092
2093 for (j = 0; j < n_ifs; j++) {
2094 REQ(n, comp_iter);
2095 n = CHILD(n, 0);
2096 REQ(n, comp_if);
2097
2098 expression = ast_for_expr(c, CHILD(n, 1));
2099 if (!expression)
2100 return NULL;
2101 asdl_seq_SET(ifs, j, expression);
2102 if (NCH(n) == 3)
2103 n = CHILD(n, 2);
2104 }
2105 /* on exit, must guarantee that n is a comp_for */
2106 if (TYPE(n) == comp_iter)
2107 n = CHILD(n, 0);
2108 comp->ifs = ifs;
2109 }
2110 asdl_seq_SET(comps, i, comp);
2111 }
2112 return comps;
2113 }
2114
2115 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)2116 ast_for_itercomp(struct compiling *c, const node *n, int type)
2117 {
2118 /* testlist_comp: (test|star_expr)
2119 * ( comp_for | (',' (test|star_expr))* [','] ) */
2120 expr_ty elt;
2121 asdl_seq *comps;
2122 node *ch;
2123
2124 assert(NCH(n) > 1);
2125
2126 ch = CHILD(n, 0);
2127 elt = ast_for_expr(c, ch);
2128 if (!elt)
2129 return NULL;
2130 if (elt->kind == Starred_kind) {
2131 ast_error(c, ch, "iterable unpacking cannot be used in comprehension");
2132 return NULL;
2133 }
2134
2135 comps = ast_for_comprehension(c, CHILD(n, 1));
2136 if (!comps)
2137 return NULL;
2138
2139 if (type == COMP_GENEXP)
2140 return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset,
2141 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2142 else if (type == COMP_LISTCOMP)
2143 return ListComp(elt, comps, LINENO(n), n->n_col_offset,
2144 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2145 else if (type == COMP_SETCOMP)
2146 return SetComp(elt, comps, LINENO(n), n->n_col_offset,
2147 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2148 else
2149 /* Should never happen */
2150 return NULL;
2151 }
2152
2153 /* Fills in the key, value pair corresponding to the dict element. In case
2154 * of an unpacking, key is NULL. *i is advanced by the number of ast
2155 * elements. Iff successful, nonzero is returned.
2156 */
2157 static int
ast_for_dictelement(struct compiling * c,const node * n,int * i,expr_ty * key,expr_ty * value)2158 ast_for_dictelement(struct compiling *c, const node *n, int *i,
2159 expr_ty *key, expr_ty *value)
2160 {
2161 expr_ty expression;
2162 if (TYPE(CHILD(n, *i)) == DOUBLESTAR) {
2163 assert(NCH(n) - *i >= 2);
2164
2165 expression = ast_for_expr(c, CHILD(n, *i + 1));
2166 if (!expression)
2167 return 0;
2168 *key = NULL;
2169 *value = expression;
2170
2171 *i += 2;
2172 }
2173 else {
2174 assert(NCH(n) - *i >= 3);
2175
2176 expression = ast_for_expr(c, CHILD(n, *i));
2177 if (!expression)
2178 return 0;
2179 *key = expression;
2180
2181 REQ(CHILD(n, *i + 1), COLON);
2182
2183 expression = ast_for_expr(c, CHILD(n, *i + 2));
2184 if (!expression)
2185 return 0;
2186 *value = expression;
2187
2188 *i += 3;
2189 }
2190 return 1;
2191 }
2192
2193 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)2194 ast_for_dictcomp(struct compiling *c, const node *n)
2195 {
2196 expr_ty key, value;
2197 asdl_seq *comps;
2198 int i = 0;
2199
2200 if (!ast_for_dictelement(c, n, &i, &key, &value))
2201 return NULL;
2202 assert(key);
2203 assert(NCH(n) - i >= 1);
2204
2205 comps = ast_for_comprehension(c, CHILD(n, i));
2206 if (!comps)
2207 return NULL;
2208
2209 return DictComp(key, value, comps, LINENO(n), n->n_col_offset,
2210 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2211 }
2212
2213 static expr_ty
ast_for_dictdisplay(struct compiling * c,const node * n)2214 ast_for_dictdisplay(struct compiling *c, const node *n)
2215 {
2216 int i;
2217 int j;
2218 int size;
2219 asdl_seq *keys, *values;
2220
2221 size = (NCH(n) + 1) / 3; /* +1 in case no trailing comma */
2222 keys = _Py_asdl_seq_new(size, c->c_arena);
2223 if (!keys)
2224 return NULL;
2225
2226 values = _Py_asdl_seq_new(size, c->c_arena);
2227 if (!values)
2228 return NULL;
2229
2230 j = 0;
2231 for (i = 0; i < NCH(n); i++) {
2232 expr_ty key, value;
2233
2234 if (!ast_for_dictelement(c, n, &i, &key, &value))
2235 return NULL;
2236 asdl_seq_SET(keys, j, key);
2237 asdl_seq_SET(values, j, value);
2238
2239 j++;
2240 }
2241 keys->size = j;
2242 values->size = j;
2243 return Dict(keys, values, LINENO(n), n->n_col_offset,
2244 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2245 }
2246
2247 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)2248 ast_for_genexp(struct compiling *c, const node *n)
2249 {
2250 assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
2251 return ast_for_itercomp(c, n, COMP_GENEXP);
2252 }
2253
2254 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)2255 ast_for_listcomp(struct compiling *c, const node *n)
2256 {
2257 assert(TYPE(n) == (testlist_comp));
2258 return ast_for_itercomp(c, n, COMP_LISTCOMP);
2259 }
2260
2261 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)2262 ast_for_setcomp(struct compiling *c, const node *n)
2263 {
2264 assert(TYPE(n) == (dictorsetmaker));
2265 return ast_for_itercomp(c, n, COMP_SETCOMP);
2266 }
2267
2268 static expr_ty
ast_for_setdisplay(struct compiling * c,const node * n)2269 ast_for_setdisplay(struct compiling *c, const node *n)
2270 {
2271 int i;
2272 int size;
2273 asdl_seq *elts;
2274
2275 assert(TYPE(n) == (dictorsetmaker));
2276 size = (NCH(n) + 1) / 2; /* +1 in case no trailing comma */
2277 elts = _Py_asdl_seq_new(size, c->c_arena);
2278 if (!elts)
2279 return NULL;
2280 for (i = 0; i < NCH(n); i += 2) {
2281 expr_ty expression;
2282 expression = ast_for_expr(c, CHILD(n, i));
2283 if (!expression)
2284 return NULL;
2285 asdl_seq_SET(elts, i / 2, expression);
2286 }
2287 return Set(elts, LINENO(n), n->n_col_offset,
2288 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2289 }
2290
2291 static expr_ty
ast_for_atom(struct compiling * c,const node * n)2292 ast_for_atom(struct compiling *c, const node *n)
2293 {
2294 /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']'
2295 | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+
2296 | '...' | 'None' | 'True' | 'False'
2297 */
2298 node *ch = CHILD(n, 0);
2299
2300 switch (TYPE(ch)) {
2301 case NAME: {
2302 PyObject *name;
2303 const char *s = STR(ch);
2304 size_t len = strlen(s);
2305 if (len >= 4 && len <= 5) {
2306 if (!strcmp(s, "None"))
2307 return Constant(Py_None, NULL, LINENO(n), n->n_col_offset,
2308 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2309 if (!strcmp(s, "True"))
2310 return Constant(Py_True, NULL, LINENO(n), n->n_col_offset,
2311 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2312 if (!strcmp(s, "False"))
2313 return Constant(Py_False, NULL, LINENO(n), n->n_col_offset,
2314 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2315 }
2316 name = new_identifier(s, c);
2317 if (!name)
2318 return NULL;
2319 /* All names start in Load context, but may later be changed. */
2320 return Name(name, Load, LINENO(n), n->n_col_offset,
2321 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2322 }
2323 case STRING: {
2324 expr_ty str = parsestrplus(c, n);
2325 if (!str) {
2326 const char *errtype = NULL;
2327 if (PyErr_ExceptionMatches(PyExc_UnicodeError))
2328 errtype = "unicode error";
2329 else if (PyErr_ExceptionMatches(PyExc_ValueError))
2330 errtype = "value error";
2331 if (errtype) {
2332 PyObject *type, *value, *tback, *errstr;
2333 PyErr_Fetch(&type, &value, &tback);
2334 errstr = PyObject_Str(value);
2335 if (errstr) {
2336 ast_error(c, n, "(%s) %U", errtype, errstr);
2337 Py_DECREF(errstr);
2338 }
2339 else {
2340 PyErr_Clear();
2341 ast_error(c, n, "(%s) unknown error", errtype);
2342 }
2343 Py_DECREF(type);
2344 Py_XDECREF(value);
2345 Py_XDECREF(tback);
2346 }
2347 return NULL;
2348 }
2349 return str;
2350 }
2351 case NUMBER: {
2352 PyObject *pynum;
2353 /* Underscores in numeric literals are only allowed in Python 3.6 or greater */
2354 /* Check for underscores here rather than in parse_number so we can report a line number on error */
2355 if (c->c_feature_version < 6 && strchr(STR(ch), '_') != NULL) {
2356 ast_error(c, ch,
2357 "Underscores in numeric literals are only supported in Python 3.6 and greater");
2358 return NULL;
2359 }
2360 pynum = parsenumber(c, STR(ch));
2361 if (!pynum)
2362 return NULL;
2363
2364 if (PyArena_AddPyObject(c->c_arena, pynum) < 0) {
2365 Py_DECREF(pynum);
2366 return NULL;
2367 }
2368 return Constant(pynum, NULL, LINENO(n), n->n_col_offset,
2369 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2370 }
2371 case ELLIPSIS: /* Ellipsis */
2372 return Constant(Py_Ellipsis, NULL, LINENO(n), n->n_col_offset,
2373 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2374 case LPAR: /* some parenthesized expressions */
2375 ch = CHILD(n, 1);
2376
2377 if (TYPE(ch) == RPAR)
2378 return Tuple(NULL, Load, LINENO(n), n->n_col_offset,
2379 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2380
2381 if (TYPE(ch) == yield_expr)
2382 return ast_for_expr(c, ch);
2383
2384 /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2385 if (NCH(ch) == 1) {
2386 return ast_for_testlist(c, ch);
2387 }
2388
2389 if (TYPE(CHILD(ch, 1)) == comp_for) {
2390 return copy_location(ast_for_genexp(c, ch), n, n);
2391 }
2392 else {
2393 return copy_location(ast_for_testlist(c, ch), n, n);
2394 }
2395 case LSQB: /* list (or list comprehension) */
2396 ch = CHILD(n, 1);
2397
2398 if (TYPE(ch) == RSQB)
2399 return List(NULL, Load, LINENO(n), n->n_col_offset,
2400 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2401
2402 REQ(ch, testlist_comp);
2403 if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
2404 asdl_seq *elts = seq_for_testlist(c, ch);
2405 if (!elts)
2406 return NULL;
2407
2408 return List(elts, Load, LINENO(n), n->n_col_offset,
2409 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2410 }
2411 else {
2412 return copy_location(ast_for_listcomp(c, ch), n, n);
2413 }
2414 case LBRACE: {
2415 /* dictorsetmaker: ( ((test ':' test | '**' test)
2416 * (comp_for | (',' (test ':' test | '**' test))* [','])) |
2417 * ((test | '*' test)
2418 * (comp_for | (',' (test | '*' test))* [','])) ) */
2419 expr_ty res;
2420 ch = CHILD(n, 1);
2421 if (TYPE(ch) == RBRACE) {
2422 /* It's an empty dict. */
2423 return Dict(NULL, NULL, LINENO(n), n->n_col_offset,
2424 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2425 }
2426 else {
2427 int is_dict = (TYPE(CHILD(ch, 0)) == DOUBLESTAR);
2428 if (NCH(ch) == 1 ||
2429 (NCH(ch) > 1 &&
2430 TYPE(CHILD(ch, 1)) == COMMA)) {
2431 /* It's a set display. */
2432 res = ast_for_setdisplay(c, ch);
2433 }
2434 else if (NCH(ch) > 1 &&
2435 TYPE(CHILD(ch, 1)) == comp_for) {
2436 /* It's a set comprehension. */
2437 res = ast_for_setcomp(c, ch);
2438 }
2439 else if (NCH(ch) > 3 - is_dict &&
2440 TYPE(CHILD(ch, 3 - is_dict)) == comp_for) {
2441 /* It's a dictionary comprehension. */
2442 if (is_dict) {
2443 ast_error(c, n,
2444 "dict unpacking cannot be used in dict comprehension");
2445 return NULL;
2446 }
2447 res = ast_for_dictcomp(c, ch);
2448 }
2449 else {
2450 /* It's a dictionary display. */
2451 res = ast_for_dictdisplay(c, ch);
2452 }
2453 return copy_location(res, n, n);
2454 }
2455 }
2456 default:
2457 PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
2458 return NULL;
2459 }
2460 }
2461
2462 static expr_ty
ast_for_slice(struct compiling * c,const node * n)2463 ast_for_slice(struct compiling *c, const node *n)
2464 {
2465 node *ch;
2466 expr_ty lower = NULL, upper = NULL, step = NULL;
2467
2468 REQ(n, subscript);
2469
2470 /*
2471 subscript: test | [test] ':' [test] [sliceop]
2472 sliceop: ':' [test]
2473 */
2474 ch = CHILD(n, 0);
2475 if (NCH(n) == 1 && TYPE(ch) == test) {
2476 return ast_for_expr(c, ch);
2477 }
2478
2479 if (TYPE(ch) == test) {
2480 lower = ast_for_expr(c, ch);
2481 if (!lower)
2482 return NULL;
2483 }
2484
2485 /* If there's an upper bound it's in the second or third position. */
2486 if (TYPE(ch) == COLON) {
2487 if (NCH(n) > 1) {
2488 node *n2 = CHILD(n, 1);
2489
2490 if (TYPE(n2) == test) {
2491 upper = ast_for_expr(c, n2);
2492 if (!upper)
2493 return NULL;
2494 }
2495 }
2496 } else if (NCH(n) > 2) {
2497 node *n2 = CHILD(n, 2);
2498
2499 if (TYPE(n2) == test) {
2500 upper = ast_for_expr(c, n2);
2501 if (!upper)
2502 return NULL;
2503 }
2504 }
2505
2506 ch = CHILD(n, NCH(n) - 1);
2507 if (TYPE(ch) == sliceop) {
2508 if (NCH(ch) != 1) {
2509 ch = CHILD(ch, 1);
2510 if (TYPE(ch) == test) {
2511 step = ast_for_expr(c, ch);
2512 if (!step)
2513 return NULL;
2514 }
2515 }
2516 }
2517
2518 return Slice(lower, upper, step, LINENO(n), n->n_col_offset,
2519 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2520 }
2521
2522 static expr_ty
ast_for_binop(struct compiling * c,const node * n)2523 ast_for_binop(struct compiling *c, const node *n)
2524 {
2525 /* Must account for a sequence of expressions.
2526 How should A op B op C by represented?
2527 BinOp(BinOp(A, op, B), op, C).
2528 */
2529
2530 int i, nops;
2531 expr_ty expr1, expr2, result;
2532 operator_ty newoperator;
2533
2534 expr1 = ast_for_expr(c, CHILD(n, 0));
2535 if (!expr1)
2536 return NULL;
2537
2538 expr2 = ast_for_expr(c, CHILD(n, 2));
2539 if (!expr2)
2540 return NULL;
2541
2542 newoperator = get_operator(c, CHILD(n, 1));
2543 if (!newoperator)
2544 return NULL;
2545
2546 result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2547 CHILD(n, 2)->n_end_lineno, CHILD(n, 2)->n_end_col_offset,
2548 c->c_arena);
2549 if (!result)
2550 return NULL;
2551
2552 nops = (NCH(n) - 1) / 2;
2553 for (i = 1; i < nops; i++) {
2554 expr_ty tmp_result, tmp;
2555 const node* next_oper = CHILD(n, i * 2 + 1);
2556
2557 newoperator = get_operator(c, next_oper);
2558 if (!newoperator)
2559 return NULL;
2560
2561 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
2562 if (!tmp)
2563 return NULL;
2564
2565 tmp_result = BinOp(result, newoperator, tmp,
2566 LINENO(n), n->n_col_offset,
2567 CHILD(n, i * 2 + 2)->n_end_lineno,
2568 CHILD(n, i * 2 + 2)->n_end_col_offset,
2569 c->c_arena);
2570 if (!tmp_result)
2571 return NULL;
2572 result = tmp_result;
2573 }
2574 return result;
2575 }
2576
2577 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr,const node * start)2578 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr, const node *start)
2579 {
2580 /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
2581 subscriptlist: subscript (',' subscript)* [',']
2582 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
2583 */
2584 const node *n_copy = n;
2585 REQ(n, trailer);
2586 if (TYPE(CHILD(n, 0)) == LPAR) {
2587 if (NCH(n) == 2)
2588 return Call(left_expr, NULL, NULL, LINENO(start), start->n_col_offset,
2589 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2590 else
2591 return ast_for_call(c, CHILD(n, 1), left_expr,
2592 start, CHILD(n, 0), CHILD(n, 2));
2593 }
2594 else if (TYPE(CHILD(n, 0)) == DOT) {
2595 PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
2596 if (!attr_id)
2597 return NULL;
2598 return Attribute(left_expr, attr_id, Load,
2599 LINENO(start), start->n_col_offset,
2600 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2601 }
2602 else {
2603 REQ(CHILD(n, 0), LSQB);
2604 REQ(CHILD(n, 2), RSQB);
2605 n = CHILD(n, 1);
2606 if (NCH(n) == 1) {
2607 expr_ty slc = ast_for_slice(c, CHILD(n, 0));
2608 if (!slc)
2609 return NULL;
2610 return Subscript(left_expr, slc, Load, LINENO(start), start->n_col_offset,
2611 n_copy->n_end_lineno, n_copy->n_end_col_offset,
2612 c->c_arena);
2613 }
2614 else {
2615 int j;
2616 expr_ty slc, e;
2617 asdl_seq *elts;
2618 elts = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2619 if (!elts)
2620 return NULL;
2621 for (j = 0; j < NCH(n); j += 2) {
2622 slc = ast_for_slice(c, CHILD(n, j));
2623 if (!slc)
2624 return NULL;
2625 asdl_seq_SET(elts, j / 2, slc);
2626 }
2627 e = Tuple(elts, Load, LINENO(n), n->n_col_offset,
2628 n->n_end_lineno, n->n_end_col_offset,
2629 c->c_arena);
2630 if (!e)
2631 return NULL;
2632 return Subscript(left_expr, e,
2633 Load, LINENO(start), start->n_col_offset,
2634 n_copy->n_end_lineno, n_copy->n_end_col_offset,
2635 c->c_arena);
2636 }
2637 }
2638 }
2639
2640 static expr_ty
ast_for_factor(struct compiling * c,const node * n)2641 ast_for_factor(struct compiling *c, const node *n)
2642 {
2643 expr_ty expression;
2644
2645 expression = ast_for_expr(c, CHILD(n, 1));
2646 if (!expression)
2647 return NULL;
2648
2649 switch (TYPE(CHILD(n, 0))) {
2650 case PLUS:
2651 return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
2652 n->n_end_lineno, n->n_end_col_offset,
2653 c->c_arena);
2654 case MINUS:
2655 return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
2656 n->n_end_lineno, n->n_end_col_offset,
2657 c->c_arena);
2658 case TILDE:
2659 return UnaryOp(Invert, expression, LINENO(n), n->n_col_offset,
2660 n->n_end_lineno, n->n_end_col_offset,
2661 c->c_arena);
2662 }
2663 PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
2664 TYPE(CHILD(n, 0)));
2665 return NULL;
2666 }
2667
2668 static expr_ty
ast_for_atom_expr(struct compiling * c,const node * n)2669 ast_for_atom_expr(struct compiling *c, const node *n)
2670 {
2671 int i, nch, start = 0;
2672 expr_ty e;
2673
2674 REQ(n, atom_expr);
2675 nch = NCH(n);
2676
2677 if (TYPE(CHILD(n, 0)) == AWAIT) {
2678 if (c->c_feature_version < 5) {
2679 ast_error(c, n,
2680 "Await expressions are only supported in Python 3.5 and greater");
2681 return NULL;
2682 }
2683 start = 1;
2684 assert(nch > 1);
2685 }
2686
2687 e = ast_for_atom(c, CHILD(n, start));
2688 if (!e)
2689 return NULL;
2690 if (nch == 1)
2691 return e;
2692 if (start && nch == 2) {
2693 return Await(e, LINENO(n), n->n_col_offset,
2694 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2695 }
2696
2697 for (i = start + 1; i < nch; i++) {
2698 node *ch = CHILD(n, i);
2699 if (TYPE(ch) != trailer)
2700 break;
2701 e = ast_for_trailer(c, ch, e, CHILD(n, start));
2702 if (!e)
2703 return NULL;
2704 }
2705
2706 if (start) {
2707 /* there was an 'await' */
2708 return Await(e, LINENO(n), n->n_col_offset,
2709 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2710 }
2711 else {
2712 return e;
2713 }
2714 }
2715
2716 static expr_ty
ast_for_power(struct compiling * c,const node * n)2717 ast_for_power(struct compiling *c, const node *n)
2718 {
2719 /* power: atom trailer* ('**' factor)*
2720 */
2721 expr_ty e;
2722 REQ(n, power);
2723 e = ast_for_atom_expr(c, CHILD(n, 0));
2724 if (!e)
2725 return NULL;
2726 if (NCH(n) == 1)
2727 return e;
2728 if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
2729 expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
2730 if (!f)
2731 return NULL;
2732 e = BinOp(e, Pow, f, LINENO(n), n->n_col_offset,
2733 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2734 }
2735 return e;
2736 }
2737
2738 static expr_ty
ast_for_starred(struct compiling * c,const node * n)2739 ast_for_starred(struct compiling *c, const node *n)
2740 {
2741 expr_ty tmp;
2742 REQ(n, star_expr);
2743
2744 tmp = ast_for_expr(c, CHILD(n, 1));
2745 if (!tmp)
2746 return NULL;
2747
2748 /* The Load context is changed later. */
2749 return Starred(tmp, Load, LINENO(n), n->n_col_offset,
2750 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2751 }
2752
2753
2754 /* Do not name a variable 'expr'! Will cause a compile error.
2755 */
2756
2757 static expr_ty
ast_for_expr(struct compiling * c,const node * n)2758 ast_for_expr(struct compiling *c, const node *n)
2759 {
2760 /* handle the full range of simple expressions
2761 namedexpr_test: test [':=' test]
2762 test: or_test ['if' or_test 'else' test] | lambdef
2763 test_nocond: or_test | lambdef_nocond
2764 or_test: and_test ('or' and_test)*
2765 and_test: not_test ('and' not_test)*
2766 not_test: 'not' not_test | comparison
2767 comparison: expr (comp_op expr)*
2768 expr: xor_expr ('|' xor_expr)*
2769 xor_expr: and_expr ('^' and_expr)*
2770 and_expr: shift_expr ('&' shift_expr)*
2771 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
2772 arith_expr: term (('+'|'-') term)*
2773 term: factor (('*'|'@'|'/'|'%'|'//') factor)*
2774 factor: ('+'|'-'|'~') factor | power
2775 power: atom_expr ['**' factor]
2776 atom_expr: [AWAIT] atom trailer*
2777 yield_expr: 'yield' [yield_arg]
2778 */
2779
2780 asdl_seq *seq;
2781 int i;
2782
2783 loop:
2784 switch (TYPE(n)) {
2785 case namedexpr_test:
2786 if (NCH(n) == 3)
2787 return ast_for_namedexpr(c, n);
2788 /* Fallthrough */
2789 case test:
2790 case test_nocond:
2791 if (TYPE(CHILD(n, 0)) == lambdef ||
2792 TYPE(CHILD(n, 0)) == lambdef_nocond)
2793 return ast_for_lambdef(c, CHILD(n, 0));
2794 else if (NCH(n) > 1)
2795 return ast_for_ifexpr(c, n);
2796 /* Fallthrough */
2797 case or_test:
2798 case and_test:
2799 if (NCH(n) == 1) {
2800 n = CHILD(n, 0);
2801 goto loop;
2802 }
2803 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2804 if (!seq)
2805 return NULL;
2806 for (i = 0; i < NCH(n); i += 2) {
2807 expr_ty e = ast_for_expr(c, CHILD(n, i));
2808 if (!e)
2809 return NULL;
2810 asdl_seq_SET(seq, i / 2, e);
2811 }
2812 if (!strcmp(STR(CHILD(n, 1)), "and"))
2813 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
2814 n->n_end_lineno, n->n_end_col_offset,
2815 c->c_arena);
2816 assert(!strcmp(STR(CHILD(n, 1)), "or"));
2817 return BoolOp(Or, seq, LINENO(n), n->n_col_offset,
2818 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2819 case not_test:
2820 if (NCH(n) == 1) {
2821 n = CHILD(n, 0);
2822 goto loop;
2823 }
2824 else {
2825 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2826 if (!expression)
2827 return NULL;
2828
2829 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
2830 n->n_end_lineno, n->n_end_col_offset,
2831 c->c_arena);
2832 }
2833 case comparison:
2834 if (NCH(n) == 1) {
2835 n = CHILD(n, 0);
2836 goto loop;
2837 }
2838 else {
2839 expr_ty expression;
2840 asdl_int_seq *ops;
2841 asdl_seq *cmps;
2842 ops = _Py_asdl_int_seq_new(NCH(n) / 2, c->c_arena);
2843 if (!ops)
2844 return NULL;
2845 cmps = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
2846 if (!cmps) {
2847 return NULL;
2848 }
2849 for (i = 1; i < NCH(n); i += 2) {
2850 cmpop_ty newoperator;
2851
2852 newoperator = ast_for_comp_op(c, CHILD(n, i));
2853 if (!newoperator) {
2854 return NULL;
2855 }
2856
2857 expression = ast_for_expr(c, CHILD(n, i + 1));
2858 if (!expression) {
2859 return NULL;
2860 }
2861
2862 asdl_seq_SET(ops, i / 2, newoperator);
2863 asdl_seq_SET(cmps, i / 2, expression);
2864 }
2865 expression = ast_for_expr(c, CHILD(n, 0));
2866 if (!expression) {
2867 return NULL;
2868 }
2869
2870 return Compare(expression, ops, cmps, LINENO(n), n->n_col_offset,
2871 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2872 }
2873
2874 case star_expr:
2875 return ast_for_starred(c, n);
2876 /* The next five cases all handle BinOps. The main body of code
2877 is the same in each case, but the switch turned inside out to
2878 reuse the code for each type of operator.
2879 */
2880 case expr:
2881 case xor_expr:
2882 case and_expr:
2883 case shift_expr:
2884 case arith_expr:
2885 case term:
2886 if (NCH(n) == 1) {
2887 n = CHILD(n, 0);
2888 goto loop;
2889 }
2890 return ast_for_binop(c, n);
2891 case yield_expr: {
2892 node *an = NULL;
2893 node *en = NULL;
2894 int is_from = 0;
2895 expr_ty exp = NULL;
2896 if (NCH(n) > 1)
2897 an = CHILD(n, 1); /* yield_arg */
2898 if (an) {
2899 en = CHILD(an, NCH(an) - 1);
2900 if (NCH(an) == 2) {
2901 is_from = 1;
2902 exp = ast_for_expr(c, en);
2903 }
2904 else
2905 exp = ast_for_testlist(c, en);
2906 if (!exp)
2907 return NULL;
2908 }
2909 if (is_from)
2910 return YieldFrom(exp, LINENO(n), n->n_col_offset,
2911 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2912 return Yield(exp, LINENO(n), n->n_col_offset,
2913 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2914 }
2915 case factor:
2916 if (NCH(n) == 1) {
2917 n = CHILD(n, 0);
2918 goto loop;
2919 }
2920 return ast_for_factor(c, n);
2921 case power:
2922 return ast_for_power(c, n);
2923 default:
2924 PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
2925 return NULL;
2926 }
2927 /* should never get here unless if error is set */
2928 return NULL;
2929 }
2930
2931 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func,const node * start,const node * maybegenbeg,const node * closepar)2932 ast_for_call(struct compiling *c, const node *n, expr_ty func,
2933 const node *start, const node *maybegenbeg, const node *closepar)
2934 {
2935 /*
2936 arglist: argument (',' argument)* [',']
2937 argument: ( test [comp_for] | '*' test | test '=' test | '**' test )
2938 */
2939
2940 int i, nargs, nkeywords;
2941 int ndoublestars;
2942 asdl_seq *args;
2943 asdl_seq *keywords;
2944
2945 REQ(n, arglist);
2946
2947 nargs = 0;
2948 nkeywords = 0;
2949 for (i = 0; i < NCH(n); i++) {
2950 node *ch = CHILD(n, i);
2951 if (TYPE(ch) == argument) {
2952 if (NCH(ch) == 1)
2953 nargs++;
2954 else if (TYPE(CHILD(ch, 1)) == comp_for) {
2955 nargs++;
2956 if (!maybegenbeg) {
2957 ast_error(c, ch, "invalid syntax");
2958 return NULL;
2959 }
2960 if (NCH(n) > 1) {
2961 ast_error(c, ch, "Generator expression must be parenthesized");
2962 return NULL;
2963 }
2964 }
2965 else if (TYPE(CHILD(ch, 0)) == STAR)
2966 nargs++;
2967 else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
2968 nargs++;
2969 }
2970 else
2971 /* TYPE(CHILD(ch, 0)) == DOUBLESTAR or keyword argument */
2972 nkeywords++;
2973 }
2974 }
2975
2976 args = _Py_asdl_seq_new(nargs, c->c_arena);
2977 if (!args)
2978 return NULL;
2979 keywords = _Py_asdl_seq_new(nkeywords, c->c_arena);
2980 if (!keywords)
2981 return NULL;
2982
2983 nargs = 0; /* positional arguments + iterable argument unpackings */
2984 nkeywords = 0; /* keyword arguments + keyword argument unpackings */
2985 ndoublestars = 0; /* just keyword argument unpackings */
2986 for (i = 0; i < NCH(n); i++) {
2987 node *ch = CHILD(n, i);
2988 if (TYPE(ch) == argument) {
2989 expr_ty e;
2990 node *chch = CHILD(ch, 0);
2991 if (NCH(ch) == 1) {
2992 /* a positional argument */
2993 if (nkeywords) {
2994 if (ndoublestars) {
2995 ast_error(c, chch,
2996 "positional argument follows "
2997 "keyword argument unpacking");
2998 }
2999 else {
3000 ast_error(c, chch,
3001 "positional argument follows "
3002 "keyword argument");
3003 }
3004 return NULL;
3005 }
3006 e = ast_for_expr(c, chch);
3007 if (!e)
3008 return NULL;
3009 asdl_seq_SET(args, nargs++, e);
3010 }
3011 else if (TYPE(chch) == STAR) {
3012 /* an iterable argument unpacking */
3013 expr_ty starred;
3014 if (ndoublestars) {
3015 ast_error(c, chch,
3016 "iterable argument unpacking follows "
3017 "keyword argument unpacking");
3018 return NULL;
3019 }
3020 e = ast_for_expr(c, CHILD(ch, 1));
3021 if (!e)
3022 return NULL;
3023 starred = Starred(e, Load, LINENO(chch),
3024 chch->n_col_offset,
3025 e->end_lineno, e->end_col_offset,
3026 c->c_arena);
3027 if (!starred)
3028 return NULL;
3029 asdl_seq_SET(args, nargs++, starred);
3030
3031 }
3032 else if (TYPE(chch) == DOUBLESTAR) {
3033 /* a keyword argument unpacking */
3034 keyword_ty kw;
3035 i++;
3036 e = ast_for_expr(c, CHILD(ch, 1));
3037 if (!e)
3038 return NULL;
3039 kw = keyword(NULL, e, chch->n_lineno, chch->n_col_offset,
3040 e->end_lineno, e->end_col_offset, c->c_arena);
3041 asdl_seq_SET(keywords, nkeywords++, kw);
3042 ndoublestars++;
3043 }
3044 else if (TYPE(CHILD(ch, 1)) == comp_for) {
3045 /* the lone generator expression */
3046 e = copy_location(ast_for_genexp(c, ch), maybegenbeg, closepar);
3047 if (!e)
3048 return NULL;
3049 asdl_seq_SET(args, nargs++, e);
3050 }
3051 else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
3052 /* treat colon equal as positional argument */
3053 if (nkeywords) {
3054 if (ndoublestars) {
3055 ast_error(c, chch,
3056 "positional argument follows "
3057 "keyword argument unpacking");
3058 }
3059 else {
3060 ast_error(c, chch,
3061 "positional argument follows "
3062 "keyword argument");
3063 }
3064 return NULL;
3065 }
3066 e = ast_for_namedexpr(c, ch);
3067 if (!e)
3068 return NULL;
3069 asdl_seq_SET(args, nargs++, e);
3070 }
3071 else {
3072 /* a keyword argument */
3073 keyword_ty kw;
3074 identifier key;
3075
3076 // To remain LL(1), the grammar accepts any test (basically, any
3077 // expression) in the keyword slot of a call site. So, we need
3078 // to manually enforce that the keyword is a NAME here.
3079 static const int name_tree[] = {
3080 test,
3081 or_test,
3082 and_test,
3083 not_test,
3084 comparison,
3085 expr,
3086 xor_expr,
3087 and_expr,
3088 shift_expr,
3089 arith_expr,
3090 term,
3091 factor,
3092 power,
3093 atom_expr,
3094 atom,
3095 0,
3096 };
3097 node *expr_node = chch;
3098 for (int i = 0; name_tree[i]; i++) {
3099 if (TYPE(expr_node) != name_tree[i])
3100 break;
3101 if (NCH(expr_node) != 1)
3102 break;
3103 expr_node = CHILD(expr_node, 0);
3104 }
3105 if (TYPE(expr_node) != NAME) {
3106 ast_error(c, chch,
3107 "expression cannot contain assignment, "
3108 "perhaps you meant \"==\"?");
3109 return NULL;
3110 }
3111 key = new_identifier(STR(expr_node), c);
3112 if (key == NULL) {
3113 return NULL;
3114 }
3115 if (forbidden_name(c, key, chch, 1)) {
3116 return NULL;
3117 }
3118 e = ast_for_expr(c, CHILD(ch, 2));
3119 if (!e)
3120 return NULL;
3121 kw = keyword(key, e, chch->n_lineno, chch->n_col_offset,
3122 e->end_lineno, e->end_col_offset, c->c_arena);
3123
3124 if (!kw)
3125 return NULL;
3126 asdl_seq_SET(keywords, nkeywords++, kw);
3127 }
3128 }
3129 }
3130
3131 return Call(func, args, keywords, LINENO(start), start->n_col_offset,
3132 closepar->n_end_lineno, closepar->n_end_col_offset, c->c_arena);
3133 }
3134
3135 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)3136 ast_for_testlist(struct compiling *c, const node* n)
3137 {
3138 /* testlist_comp: test (comp_for | (',' test)* [',']) */
3139 /* testlist: test (',' test)* [','] */
3140 assert(NCH(n) > 0);
3141 if (TYPE(n) == testlist_comp) {
3142 if (NCH(n) > 1)
3143 assert(TYPE(CHILD(n, 1)) != comp_for);
3144 }
3145 else {
3146 assert(TYPE(n) == testlist ||
3147 TYPE(n) == testlist_star_expr);
3148 }
3149 if (NCH(n) == 1)
3150 return ast_for_expr(c, CHILD(n, 0));
3151 else {
3152 asdl_seq *tmp = seq_for_testlist(c, n);
3153 if (!tmp)
3154 return NULL;
3155 return Tuple(tmp, Load, LINENO(n), n->n_col_offset,
3156 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3157 }
3158 }
3159
3160 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)3161 ast_for_expr_stmt(struct compiling *c, const node *n)
3162 {
3163 REQ(n, expr_stmt);
3164 /* expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
3165 [('=' (yield_expr|testlist_star_expr))+ [TYPE_COMMENT]] )
3166 annassign: ':' test ['=' (yield_expr|testlist)]
3167 testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
3168 augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
3169 '<<=' | '>>=' | '**=' | '//=')
3170 test: ... here starts the operator precedence dance
3171 */
3172 int num = NCH(n);
3173
3174 if (num == 1) {
3175 expr_ty e = ast_for_testlist(c, CHILD(n, 0));
3176 if (!e)
3177 return NULL;
3178
3179 return Expr(e, LINENO(n), n->n_col_offset,
3180 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3181 }
3182 else if (TYPE(CHILD(n, 1)) == augassign) {
3183 expr_ty expr1, expr2;
3184 operator_ty newoperator;
3185 node *ch = CHILD(n, 0);
3186
3187 expr1 = ast_for_testlist(c, ch);
3188 if (!expr1)
3189 return NULL;
3190 /* Augmented assignments can only have a name, a subscript, or an
3191 attribute on the left, though, so we have to explicitly check for
3192 those. */
3193 switch (expr1->kind) {
3194 case Name_kind:
3195 case Attribute_kind:
3196 case Subscript_kind:
3197 break;
3198 default:
3199 ast_error(c, ch, "'%s' is an illegal expression for augmented assignment",
3200 get_expr_name(expr1));
3201 return NULL;
3202 }
3203
3204 /* set_context checks that most expressions are not the left side. */
3205 if(!set_context(c, expr1, Store, ch)) {
3206 return NULL;
3207 }
3208
3209 ch = CHILD(n, 2);
3210 if (TYPE(ch) == testlist)
3211 expr2 = ast_for_testlist(c, ch);
3212 else
3213 expr2 = ast_for_expr(c, ch);
3214 if (!expr2)
3215 return NULL;
3216
3217 newoperator = ast_for_augassign(c, CHILD(n, 1));
3218 if (!newoperator)
3219 return NULL;
3220
3221 return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
3222 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3223 }
3224 else if (TYPE(CHILD(n, 1)) == annassign) {
3225 expr_ty expr1, expr2, expr3;
3226 node *ch = CHILD(n, 0);
3227 node *deep, *ann = CHILD(n, 1);
3228 int simple = 1;
3229
3230 /* AnnAssigns are only allowed in Python 3.6 or greater */
3231 if (c->c_feature_version < 6) {
3232 ast_error(c, ch,
3233 "Variable annotation syntax is only supported in Python 3.6 and greater");
3234 return NULL;
3235 }
3236
3237 /* we keep track of parens to qualify (x) as expression not name */
3238 deep = ch;
3239 while (NCH(deep) == 1) {
3240 deep = CHILD(deep, 0);
3241 }
3242 if (NCH(deep) > 0 && TYPE(CHILD(deep, 0)) == LPAR) {
3243 simple = 0;
3244 }
3245 expr1 = ast_for_testlist(c, ch);
3246 if (!expr1) {
3247 return NULL;
3248 }
3249 switch (expr1->kind) {
3250 case Name_kind:
3251 if (forbidden_name(c, expr1->v.Name.id, n, 0)) {
3252 return NULL;
3253 }
3254 expr1->v.Name.ctx = Store;
3255 break;
3256 case Attribute_kind:
3257 if (forbidden_name(c, expr1->v.Attribute.attr, n, 1)) {
3258 return NULL;
3259 }
3260 expr1->v.Attribute.ctx = Store;
3261 break;
3262 case Subscript_kind:
3263 expr1->v.Subscript.ctx = Store;
3264 break;
3265 case List_kind:
3266 ast_error(c, ch,
3267 "only single target (not list) can be annotated");
3268 return NULL;
3269 case Tuple_kind:
3270 ast_error(c, ch,
3271 "only single target (not tuple) can be annotated");
3272 return NULL;
3273 default:
3274 ast_error(c, ch,
3275 "illegal target for annotation");
3276 return NULL;
3277 }
3278
3279 if (expr1->kind != Name_kind) {
3280 simple = 0;
3281 }
3282 ch = CHILD(ann, 1);
3283 expr2 = ast_for_expr(c, ch);
3284 if (!expr2) {
3285 return NULL;
3286 }
3287 if (NCH(ann) == 2) {
3288 return AnnAssign(expr1, expr2, NULL, simple,
3289 LINENO(n), n->n_col_offset,
3290 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3291 }
3292 else {
3293 ch = CHILD(ann, 3);
3294 if (TYPE(ch) == testlist_star_expr) {
3295 expr3 = ast_for_testlist(c, ch);
3296 }
3297 else {
3298 expr3 = ast_for_expr(c, ch);
3299 }
3300 if (!expr3) {
3301 return NULL;
3302 }
3303 return AnnAssign(expr1, expr2, expr3, simple,
3304 LINENO(n), n->n_col_offset,
3305 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3306 }
3307 }
3308 else {
3309 int i, nch_minus_type, has_type_comment;
3310 asdl_seq *targets;
3311 node *value;
3312 expr_ty expression;
3313 string type_comment;
3314
3315 /* a normal assignment */
3316 REQ(CHILD(n, 1), EQUAL);
3317
3318 has_type_comment = TYPE(CHILD(n, num - 1)) == TYPE_COMMENT;
3319 nch_minus_type = num - has_type_comment;
3320
3321 targets = _Py_asdl_seq_new(nch_minus_type / 2, c->c_arena);
3322 if (!targets)
3323 return NULL;
3324 for (i = 0; i < nch_minus_type - 2; i += 2) {
3325 expr_ty e;
3326 node *ch = CHILD(n, i);
3327 if (TYPE(ch) == yield_expr) {
3328 ast_error(c, ch, "assignment to yield expression not possible");
3329 return NULL;
3330 }
3331 e = ast_for_testlist(c, ch);
3332 if (!e)
3333 return NULL;
3334
3335 /* set context to assign */
3336 if (!set_context(c, e, Store, CHILD(n, i)))
3337 return NULL;
3338
3339 asdl_seq_SET(targets, i / 2, e);
3340 }
3341 value = CHILD(n, nch_minus_type - 1);
3342 if (TYPE(value) == testlist_star_expr)
3343 expression = ast_for_testlist(c, value);
3344 else
3345 expression = ast_for_expr(c, value);
3346 if (!expression)
3347 return NULL;
3348 if (has_type_comment) {
3349 type_comment = NEW_TYPE_COMMENT(CHILD(n, nch_minus_type));
3350 if (!type_comment)
3351 return NULL;
3352 }
3353 else
3354 type_comment = NULL;
3355 return Assign(targets, expression, type_comment, LINENO(n), n->n_col_offset,
3356 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3357 }
3358 }
3359
3360
3361 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)3362 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
3363 {
3364 asdl_seq *seq;
3365 int i;
3366 expr_ty e;
3367
3368 REQ(n, exprlist);
3369
3370 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3371 if (!seq)
3372 return NULL;
3373 for (i = 0; i < NCH(n); i += 2) {
3374 e = ast_for_expr(c, CHILD(n, i));
3375 if (!e)
3376 return NULL;
3377 asdl_seq_SET(seq, i / 2, e);
3378 if (context && !set_context(c, e, context, CHILD(n, i)))
3379 return NULL;
3380 }
3381 return seq;
3382 }
3383
3384 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)3385 ast_for_del_stmt(struct compiling *c, const node *n)
3386 {
3387 asdl_seq *expr_list;
3388
3389 /* del_stmt: 'del' exprlist */
3390 REQ(n, del_stmt);
3391
3392 expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
3393 if (!expr_list)
3394 return NULL;
3395 return Delete(expr_list, LINENO(n), n->n_col_offset,
3396 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3397 }
3398
3399 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)3400 ast_for_flow_stmt(struct compiling *c, const node *n)
3401 {
3402 /*
3403 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
3404 | yield_stmt
3405 break_stmt: 'break'
3406 continue_stmt: 'continue'
3407 return_stmt: 'return' [testlist]
3408 yield_stmt: yield_expr
3409 yield_expr: 'yield' testlist | 'yield' 'from' test
3410 raise_stmt: 'raise' [test [',' test [',' test]]]
3411 */
3412 node *ch;
3413
3414 REQ(n, flow_stmt);
3415 ch = CHILD(n, 0);
3416 switch (TYPE(ch)) {
3417 case break_stmt:
3418 return Break(LINENO(n), n->n_col_offset,
3419 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3420 case continue_stmt:
3421 return Continue(LINENO(n), n->n_col_offset,
3422 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3423 case yield_stmt: { /* will reduce to yield_expr */
3424 expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
3425 if (!exp)
3426 return NULL;
3427 return Expr(exp, LINENO(n), n->n_col_offset,
3428 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3429 }
3430 case return_stmt:
3431 if (NCH(ch) == 1)
3432 return Return(NULL, LINENO(n), n->n_col_offset,
3433 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3434 else {
3435 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
3436 if (!expression)
3437 return NULL;
3438 return Return(expression, LINENO(n), n->n_col_offset,
3439 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3440 }
3441 case raise_stmt:
3442 if (NCH(ch) == 1)
3443 return Raise(NULL, NULL, LINENO(n), n->n_col_offset,
3444 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3445 else if (NCH(ch) >= 2) {
3446 expr_ty cause = NULL;
3447 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
3448 if (!expression)
3449 return NULL;
3450 if (NCH(ch) == 4) {
3451 cause = ast_for_expr(c, CHILD(ch, 3));
3452 if (!cause)
3453 return NULL;
3454 }
3455 return Raise(expression, cause, LINENO(n), n->n_col_offset,
3456 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3457 }
3458 /* fall through */
3459 default:
3460 PyErr_Format(PyExc_SystemError,
3461 "unexpected flow_stmt: %d", TYPE(ch));
3462 return NULL;
3463 }
3464 }
3465
3466 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)3467 alias_for_import_name(struct compiling *c, const node *n, int store)
3468 {
3469 /*
3470 import_as_name: NAME ['as' NAME]
3471 dotted_as_name: dotted_name ['as' NAME]
3472 dotted_name: NAME ('.' NAME)*
3473 */
3474 identifier str, name;
3475
3476 loop:
3477 switch (TYPE(n)) {
3478 case import_as_name: {
3479 node *name_node = CHILD(n, 0);
3480 str = NULL;
3481 name = NEW_IDENTIFIER(name_node);
3482 if (!name)
3483 return NULL;
3484 if (NCH(n) == 3) {
3485 node *str_node = CHILD(n, 2);
3486 str = NEW_IDENTIFIER(str_node);
3487 if (!str)
3488 return NULL;
3489 if (store && forbidden_name(c, str, str_node, 0))
3490 return NULL;
3491 }
3492 else {
3493 if (forbidden_name(c, name, name_node, 0))
3494 return NULL;
3495 }
3496 return alias(name, str, c->c_arena);
3497 }
3498 case dotted_as_name:
3499 if (NCH(n) == 1) {
3500 n = CHILD(n, 0);
3501 goto loop;
3502 }
3503 else {
3504 node *asname_node = CHILD(n, 2);
3505 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
3506 if (!a)
3507 return NULL;
3508 assert(!a->asname);
3509 a->asname = NEW_IDENTIFIER(asname_node);
3510 if (!a->asname)
3511 return NULL;
3512 if (forbidden_name(c, a->asname, asname_node, 0))
3513 return NULL;
3514 return a;
3515 }
3516 case dotted_name:
3517 if (NCH(n) == 1) {
3518 node *name_node = CHILD(n, 0);
3519 name = NEW_IDENTIFIER(name_node);
3520 if (!name)
3521 return NULL;
3522 if (store && forbidden_name(c, name, name_node, 0))
3523 return NULL;
3524 return alias(name, NULL, c->c_arena);
3525 }
3526 else {
3527 /* Create a string of the form "a.b.c" */
3528 int i;
3529 size_t len;
3530 char *s;
3531 PyObject *uni;
3532
3533 len = 0;
3534 for (i = 0; i < NCH(n); i += 2)
3535 /* length of string plus one for the dot */
3536 len += strlen(STR(CHILD(n, i))) + 1;
3537 len--; /* the last name doesn't have a dot */
3538 str = PyBytes_FromStringAndSize(NULL, len);
3539 if (!str)
3540 return NULL;
3541 s = PyBytes_AS_STRING(str);
3542 if (!s)
3543 return NULL;
3544 for (i = 0; i < NCH(n); i += 2) {
3545 char *sch = STR(CHILD(n, i));
3546 strcpy(s, STR(CHILD(n, i)));
3547 s += strlen(sch);
3548 *s++ = '.';
3549 }
3550 --s;
3551 *s = '\0';
3552 uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str),
3553 PyBytes_GET_SIZE(str),
3554 NULL);
3555 Py_DECREF(str);
3556 if (!uni)
3557 return NULL;
3558 str = uni;
3559 PyUnicode_InternInPlace(&str);
3560 if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3561 Py_DECREF(str);
3562 return NULL;
3563 }
3564 return alias(str, NULL, c->c_arena);
3565 }
3566 case STAR:
3567 str = PyUnicode_InternFromString("*");
3568 if (!str)
3569 return NULL;
3570 if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3571 Py_DECREF(str);
3572 return NULL;
3573 }
3574 return alias(str, NULL, c->c_arena);
3575 default:
3576 PyErr_Format(PyExc_SystemError,
3577 "unexpected import name: %d", TYPE(n));
3578 return NULL;
3579 }
3580 }
3581
3582 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)3583 ast_for_import_stmt(struct compiling *c, const node *n)
3584 {
3585 /*
3586 import_stmt: import_name | import_from
3587 import_name: 'import' dotted_as_names
3588 import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+)
3589 'import' ('*' | '(' import_as_names ')' | import_as_names)
3590 */
3591 int lineno;
3592 int col_offset;
3593 int i;
3594 asdl_seq *aliases;
3595
3596 REQ(n, import_stmt);
3597 lineno = LINENO(n);
3598 col_offset = n->n_col_offset;
3599 n = CHILD(n, 0);
3600 if (TYPE(n) == import_name) {
3601 n = CHILD(n, 1);
3602 REQ(n, dotted_as_names);
3603 aliases = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3604 if (!aliases)
3605 return NULL;
3606 for (i = 0; i < NCH(n); i += 2) {
3607 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3608 if (!import_alias)
3609 return NULL;
3610 asdl_seq_SET(aliases, i / 2, import_alias);
3611 }
3612 // Even though n is modified above, the end position is not changed
3613 return Import(aliases, lineno, col_offset,
3614 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3615 }
3616 else if (TYPE(n) == import_from) {
3617 int n_children;
3618 int idx, ndots = 0;
3619 const node *n_copy = n;
3620 alias_ty mod = NULL;
3621 identifier modname = NULL;
3622
3623 /* Count the number of dots (for relative imports) and check for the
3624 optional module name */
3625 for (idx = 1; idx < NCH(n); idx++) {
3626 if (TYPE(CHILD(n, idx)) == dotted_name) {
3627 mod = alias_for_import_name(c, CHILD(n, idx), 0);
3628 if (!mod)
3629 return NULL;
3630 idx++;
3631 break;
3632 } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) {
3633 /* three consecutive dots are tokenized as one ELLIPSIS */
3634 ndots += 3;
3635 continue;
3636 } else if (TYPE(CHILD(n, idx)) != DOT) {
3637 break;
3638 }
3639 ndots++;
3640 }
3641 idx++; /* skip over the 'import' keyword */
3642 switch (TYPE(CHILD(n, idx))) {
3643 case STAR:
3644 /* from ... import * */
3645 n = CHILD(n, idx);
3646 n_children = 1;
3647 break;
3648 case LPAR:
3649 /* from ... import (x, y, z) */
3650 n = CHILD(n, idx + 1);
3651 n_children = NCH(n);
3652 break;
3653 case import_as_names:
3654 /* from ... import x, y, z */
3655 n = CHILD(n, idx);
3656 n_children = NCH(n);
3657 if (n_children % 2 == 0) {
3658 ast_error(c, n,
3659 "trailing comma not allowed without"
3660 " surrounding parentheses");
3661 return NULL;
3662 }
3663 break;
3664 default:
3665 ast_error(c, n, "Unexpected node-type in from-import");
3666 return NULL;
3667 }
3668
3669 aliases = _Py_asdl_seq_new((n_children + 1) / 2, c->c_arena);
3670 if (!aliases)
3671 return NULL;
3672
3673 /* handle "from ... import *" special b/c there's no children */
3674 if (TYPE(n) == STAR) {
3675 alias_ty import_alias = alias_for_import_name(c, n, 1);
3676 if (!import_alias)
3677 return NULL;
3678 asdl_seq_SET(aliases, 0, import_alias);
3679 }
3680 else {
3681 for (i = 0; i < NCH(n); i += 2) {
3682 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3683 if (!import_alias)
3684 return NULL;
3685 asdl_seq_SET(aliases, i / 2, import_alias);
3686 }
3687 }
3688 if (mod != NULL)
3689 modname = mod->name;
3690 return ImportFrom(modname, aliases, ndots, lineno, col_offset,
3691 n_copy->n_end_lineno, n_copy->n_end_col_offset,
3692 c->c_arena);
3693 }
3694 PyErr_Format(PyExc_SystemError,
3695 "unknown import statement: starts with command '%s'",
3696 STR(CHILD(n, 0)));
3697 return NULL;
3698 }
3699
3700 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)3701 ast_for_global_stmt(struct compiling *c, const node *n)
3702 {
3703 /* global_stmt: 'global' NAME (',' NAME)* */
3704 identifier name;
3705 asdl_seq *s;
3706 int i;
3707
3708 REQ(n, global_stmt);
3709 s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3710 if (!s)
3711 return NULL;
3712 for (i = 1; i < NCH(n); i += 2) {
3713 name = NEW_IDENTIFIER(CHILD(n, i));
3714 if (!name)
3715 return NULL;
3716 asdl_seq_SET(s, i / 2, name);
3717 }
3718 return Global(s, LINENO(n), n->n_col_offset,
3719 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3720 }
3721
3722 static stmt_ty
ast_for_nonlocal_stmt(struct compiling * c,const node * n)3723 ast_for_nonlocal_stmt(struct compiling *c, const node *n)
3724 {
3725 /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */
3726 identifier name;
3727 asdl_seq *s;
3728 int i;
3729
3730 REQ(n, nonlocal_stmt);
3731 s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3732 if (!s)
3733 return NULL;
3734 for (i = 1; i < NCH(n); i += 2) {
3735 name = NEW_IDENTIFIER(CHILD(n, i));
3736 if (!name)
3737 return NULL;
3738 asdl_seq_SET(s, i / 2, name);
3739 }
3740 return Nonlocal(s, LINENO(n), n->n_col_offset,
3741 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3742 }
3743
3744 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)3745 ast_for_assert_stmt(struct compiling *c, const node *n)
3746 {
3747 /* assert_stmt: 'assert' test [',' test] */
3748 REQ(n, assert_stmt);
3749 if (NCH(n) == 2) {
3750 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
3751 if (!expression)
3752 return NULL;
3753 return Assert(expression, NULL, LINENO(n), n->n_col_offset,
3754 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3755 }
3756 else if (NCH(n) == 4) {
3757 expr_ty expr1, expr2;
3758
3759 expr1 = ast_for_expr(c, CHILD(n, 1));
3760 if (!expr1)
3761 return NULL;
3762 expr2 = ast_for_expr(c, CHILD(n, 3));
3763 if (!expr2)
3764 return NULL;
3765
3766 return Assert(expr1, expr2, LINENO(n), n->n_col_offset,
3767 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3768 }
3769 PyErr_Format(PyExc_SystemError,
3770 "improper number of parts to 'assert' statement: %d",
3771 NCH(n));
3772 return NULL;
3773 }
3774
3775 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)3776 ast_for_suite(struct compiling *c, const node *n)
3777 {
3778 /* suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
3779 asdl_seq *seq;
3780 stmt_ty s;
3781 int i, total, num, end, pos = 0;
3782 node *ch;
3783
3784 if (TYPE(n) != func_body_suite) {
3785 REQ(n, suite);
3786 }
3787
3788 total = num_stmts(n);
3789 seq = _Py_asdl_seq_new(total, c->c_arena);
3790 if (!seq)
3791 return NULL;
3792 if (TYPE(CHILD(n, 0)) == simple_stmt) {
3793 n = CHILD(n, 0);
3794 /* simple_stmt always ends with a NEWLINE,
3795 and may have a trailing SEMI
3796 */
3797 end = NCH(n) - 1;
3798 if (TYPE(CHILD(n, end - 1)) == SEMI)
3799 end--;
3800 /* loop by 2 to skip semi-colons */
3801 for (i = 0; i < end; i += 2) {
3802 ch = CHILD(n, i);
3803 s = ast_for_stmt(c, ch);
3804 if (!s)
3805 return NULL;
3806 asdl_seq_SET(seq, pos++, s);
3807 }
3808 }
3809 else {
3810 i = 2;
3811 if (TYPE(CHILD(n, 1)) == TYPE_COMMENT) {
3812 i += 2;
3813 REQ(CHILD(n, 2), NEWLINE);
3814 }
3815
3816 for (; i < (NCH(n) - 1); i++) {
3817 ch = CHILD(n, i);
3818 REQ(ch, stmt);
3819 num = num_stmts(ch);
3820 if (num == 1) {
3821 /* small_stmt or compound_stmt with only one child */
3822 s = ast_for_stmt(c, ch);
3823 if (!s)
3824 return NULL;
3825 asdl_seq_SET(seq, pos++, s);
3826 }
3827 else {
3828 int j;
3829 ch = CHILD(ch, 0);
3830 REQ(ch, simple_stmt);
3831 for (j = 0; j < NCH(ch); j += 2) {
3832 /* statement terminates with a semi-colon ';' */
3833 if (NCH(CHILD(ch, j)) == 0) {
3834 assert((j + 1) == NCH(ch));
3835 break;
3836 }
3837 s = ast_for_stmt(c, CHILD(ch, j));
3838 if (!s)
3839 return NULL;
3840 asdl_seq_SET(seq, pos++, s);
3841 }
3842 }
3843 }
3844 }
3845 assert(pos == seq->size);
3846 return seq;
3847 }
3848
3849 static void
get_last_end_pos(asdl_seq * s,int * end_lineno,int * end_col_offset)3850 get_last_end_pos(asdl_seq *s, int *end_lineno, int *end_col_offset)
3851 {
3852 Py_ssize_t tot = asdl_seq_LEN(s);
3853 // There must be no empty suites.
3854 assert(tot > 0);
3855 stmt_ty last = asdl_seq_GET(s, tot - 1);
3856 *end_lineno = last->end_lineno;
3857 *end_col_offset = last->end_col_offset;
3858 }
3859
3860 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)3861 ast_for_if_stmt(struct compiling *c, const node *n)
3862 {
3863 /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
3864 ['else' ':' suite]
3865 */
3866 char *s;
3867 int end_lineno, end_col_offset;
3868
3869 REQ(n, if_stmt);
3870
3871 if (NCH(n) == 4) {
3872 expr_ty expression;
3873 asdl_seq *suite_seq;
3874
3875 expression = ast_for_expr(c, CHILD(n, 1));
3876 if (!expression)
3877 return NULL;
3878 suite_seq = ast_for_suite(c, CHILD(n, 3));
3879 if (!suite_seq)
3880 return NULL;
3881 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
3882
3883 return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
3884 end_lineno, end_col_offset, c->c_arena);
3885 }
3886
3887 s = STR(CHILD(n, 4));
3888 /* s[2], the third character in the string, will be
3889 's' for el_s_e, or
3890 'i' for el_i_f
3891 */
3892 if (s[2] == 's') {
3893 expr_ty expression;
3894 asdl_seq *seq1, *seq2;
3895
3896 expression = ast_for_expr(c, CHILD(n, 1));
3897 if (!expression)
3898 return NULL;
3899 seq1 = ast_for_suite(c, CHILD(n, 3));
3900 if (!seq1)
3901 return NULL;
3902 seq2 = ast_for_suite(c, CHILD(n, 6));
3903 if (!seq2)
3904 return NULL;
3905 get_last_end_pos(seq2, &end_lineno, &end_col_offset);
3906
3907 return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
3908 end_lineno, end_col_offset, c->c_arena);
3909 }
3910 else if (s[2] == 'i') {
3911 int i, n_elif, has_else = 0;
3912 expr_ty expression;
3913 asdl_seq *suite_seq;
3914 asdl_seq *orelse = NULL;
3915 n_elif = NCH(n) - 4;
3916 /* must reference the child n_elif+1 since 'else' token is third,
3917 not fourth, child from the end. */
3918 if (TYPE(CHILD(n, (n_elif + 1))) == NAME
3919 && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
3920 has_else = 1;
3921 n_elif -= 3;
3922 }
3923 n_elif /= 4;
3924
3925 if (has_else) {
3926 asdl_seq *suite_seq2;
3927
3928 orelse = _Py_asdl_seq_new(1, c->c_arena);
3929 if (!orelse)
3930 return NULL;
3931 expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
3932 if (!expression)
3933 return NULL;
3934 suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
3935 if (!suite_seq)
3936 return NULL;
3937 suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
3938 if (!suite_seq2)
3939 return NULL;
3940 get_last_end_pos(suite_seq2, &end_lineno, &end_col_offset);
3941
3942 asdl_seq_SET(orelse, 0,
3943 If(expression, suite_seq, suite_seq2,
3944 LINENO(CHILD(n, NCH(n) - 7)),
3945 CHILD(n, NCH(n) - 7)->n_col_offset,
3946 end_lineno, end_col_offset, c->c_arena));
3947 /* the just-created orelse handled the last elif */
3948 n_elif--;
3949 }
3950
3951 for (i = 0; i < n_elif; i++) {
3952 int off = 5 + (n_elif - i - 1) * 4;
3953 asdl_seq *newobj = _Py_asdl_seq_new(1, c->c_arena);
3954 if (!newobj)
3955 return NULL;
3956 expression = ast_for_expr(c, CHILD(n, off));
3957 if (!expression)
3958 return NULL;
3959 suite_seq = ast_for_suite(c, CHILD(n, off + 2));
3960 if (!suite_seq)
3961 return NULL;
3962
3963 if (orelse != NULL) {
3964 get_last_end_pos(orelse, &end_lineno, &end_col_offset);
3965 } else {
3966 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
3967 }
3968 asdl_seq_SET(newobj, 0,
3969 If(expression, suite_seq, orelse,
3970 LINENO(CHILD(n, off - 1)),
3971 CHILD(n, off - 1)->n_col_offset,
3972 end_lineno, end_col_offset, c->c_arena));
3973 orelse = newobj;
3974 }
3975 expression = ast_for_expr(c, CHILD(n, 1));
3976 if (!expression)
3977 return NULL;
3978 suite_seq = ast_for_suite(c, CHILD(n, 3));
3979 if (!suite_seq)
3980 return NULL;
3981 get_last_end_pos(orelse, &end_lineno, &end_col_offset);
3982 return If(expression, suite_seq, orelse,
3983 LINENO(n), n->n_col_offset,
3984 end_lineno, end_col_offset, c->c_arena);
3985 }
3986
3987 PyErr_Format(PyExc_SystemError,
3988 "unexpected token in 'if' statement: %s", s);
3989 return NULL;
3990 }
3991
3992 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)3993 ast_for_while_stmt(struct compiling *c, const node *n)
3994 {
3995 /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
3996 REQ(n, while_stmt);
3997 int end_lineno, end_col_offset;
3998
3999 if (NCH(n) == 4) {
4000 expr_ty expression;
4001 asdl_seq *suite_seq;
4002
4003 expression = ast_for_expr(c, CHILD(n, 1));
4004 if (!expression)
4005 return NULL;
4006 suite_seq = ast_for_suite(c, CHILD(n, 3));
4007 if (!suite_seq)
4008 return NULL;
4009 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4010 return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
4011 end_lineno, end_col_offset, c->c_arena);
4012 }
4013 else if (NCH(n) == 7) {
4014 expr_ty expression;
4015 asdl_seq *seq1, *seq2;
4016
4017 expression = ast_for_expr(c, CHILD(n, 1));
4018 if (!expression)
4019 return NULL;
4020 seq1 = ast_for_suite(c, CHILD(n, 3));
4021 if (!seq1)
4022 return NULL;
4023 seq2 = ast_for_suite(c, CHILD(n, 6));
4024 if (!seq2)
4025 return NULL;
4026 get_last_end_pos(seq2, &end_lineno, &end_col_offset);
4027
4028 return While(expression, seq1, seq2, LINENO(n), n->n_col_offset,
4029 end_lineno, end_col_offset, c->c_arena);
4030 }
4031
4032 PyErr_Format(PyExc_SystemError,
4033 "wrong number of tokens for 'while' statement: %d",
4034 NCH(n));
4035 return NULL;
4036 }
4037
4038 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n0,bool is_async)4039 ast_for_for_stmt(struct compiling *c, const node *n0, bool is_async)
4040 {
4041 const node * const n = is_async ? CHILD(n0, 1) : n0;
4042 asdl_seq *_target, *seq = NULL, *suite_seq;
4043 expr_ty expression;
4044 expr_ty target, first;
4045 const node *node_target;
4046 int end_lineno, end_col_offset;
4047 int has_type_comment;
4048 string type_comment;
4049
4050 if (is_async && c->c_feature_version < 5) {
4051 ast_error(c, n,
4052 "Async for loops are only supported in Python 3.5 and greater");
4053 return NULL;
4054 }
4055
4056 /* for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite] */
4057 REQ(n, for_stmt);
4058
4059 has_type_comment = TYPE(CHILD(n, 5)) == TYPE_COMMENT;
4060
4061 if (NCH(n) == 9 + has_type_comment) {
4062 seq = ast_for_suite(c, CHILD(n, 8 + has_type_comment));
4063 if (!seq)
4064 return NULL;
4065 }
4066
4067 node_target = CHILD(n, 1);
4068 _target = ast_for_exprlist(c, node_target, Store);
4069 if (!_target)
4070 return NULL;
4071 /* Check the # of children rather than the length of _target, since
4072 for x, in ... has 1 element in _target, but still requires a Tuple. */
4073 first = (expr_ty)asdl_seq_GET(_target, 0);
4074 if (NCH(node_target) == 1)
4075 target = first;
4076 else
4077 target = Tuple(_target, Store, first->lineno, first->col_offset,
4078 node_target->n_end_lineno, node_target->n_end_col_offset,
4079 c->c_arena);
4080
4081 expression = ast_for_testlist(c, CHILD(n, 3));
4082 if (!expression)
4083 return NULL;
4084 suite_seq = ast_for_suite(c, CHILD(n, 5 + has_type_comment));
4085 if (!suite_seq)
4086 return NULL;
4087
4088 if (seq != NULL) {
4089 get_last_end_pos(seq, &end_lineno, &end_col_offset);
4090 } else {
4091 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4092 }
4093
4094 if (has_type_comment) {
4095 type_comment = NEW_TYPE_COMMENT(CHILD(n, 5));
4096 if (!type_comment)
4097 return NULL;
4098 }
4099 else
4100 type_comment = NULL;
4101
4102 if (is_async)
4103 return AsyncFor(target, expression, suite_seq, seq, type_comment,
4104 LINENO(n0), n0->n_col_offset,
4105 end_lineno, end_col_offset, c->c_arena);
4106 else
4107 return For(target, expression, suite_seq, seq, type_comment,
4108 LINENO(n), n->n_col_offset,
4109 end_lineno, end_col_offset, c->c_arena);
4110 }
4111
4112 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)4113 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
4114 {
4115 /* except_clause: 'except' [test ['as' test]] */
4116 int end_lineno, end_col_offset;
4117 REQ(exc, except_clause);
4118 REQ(body, suite);
4119
4120 if (NCH(exc) == 1) {
4121 asdl_seq *suite_seq = ast_for_suite(c, body);
4122 if (!suite_seq)
4123 return NULL;
4124 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4125
4126 return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
4127 exc->n_col_offset,
4128 end_lineno, end_col_offset, c->c_arena);
4129 }
4130 else if (NCH(exc) == 2) {
4131 expr_ty expression;
4132 asdl_seq *suite_seq;
4133
4134 expression = ast_for_expr(c, CHILD(exc, 1));
4135 if (!expression)
4136 return NULL;
4137 suite_seq = ast_for_suite(c, body);
4138 if (!suite_seq)
4139 return NULL;
4140 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4141
4142 return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
4143 exc->n_col_offset,
4144 end_lineno, end_col_offset, c->c_arena);
4145 }
4146 else if (NCH(exc) == 4) {
4147 asdl_seq *suite_seq;
4148 expr_ty expression;
4149 identifier e = NEW_IDENTIFIER(CHILD(exc, 3));
4150 if (!e)
4151 return NULL;
4152 if (forbidden_name(c, e, CHILD(exc, 3), 0))
4153 return NULL;
4154 expression = ast_for_expr(c, CHILD(exc, 1));
4155 if (!expression)
4156 return NULL;
4157 suite_seq = ast_for_suite(c, body);
4158 if (!suite_seq)
4159 return NULL;
4160 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4161
4162 return ExceptHandler(expression, e, suite_seq, LINENO(exc),
4163 exc->n_col_offset,
4164 end_lineno, end_col_offset, c->c_arena);
4165 }
4166
4167 PyErr_Format(PyExc_SystemError,
4168 "wrong number of children for 'except' clause: %d",
4169 NCH(exc));
4170 return NULL;
4171 }
4172
4173 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)4174 ast_for_try_stmt(struct compiling *c, const node *n)
4175 {
4176 const int nch = NCH(n);
4177 int end_lineno, end_col_offset, n_except = (nch - 3)/3;
4178 asdl_seq *body, *handlers = NULL, *orelse = NULL, *finally = NULL;
4179 excepthandler_ty last_handler;
4180
4181 REQ(n, try_stmt);
4182
4183 body = ast_for_suite(c, CHILD(n, 2));
4184 if (body == NULL)
4185 return NULL;
4186
4187 if (TYPE(CHILD(n, nch - 3)) == NAME) {
4188 if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
4189 if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
4190 /* we can assume it's an "else",
4191 because nch >= 9 for try-else-finally and
4192 it would otherwise have a type of except_clause */
4193 orelse = ast_for_suite(c, CHILD(n, nch - 4));
4194 if (orelse == NULL)
4195 return NULL;
4196 n_except--;
4197 }
4198
4199 finally = ast_for_suite(c, CHILD(n, nch - 1));
4200 if (finally == NULL)
4201 return NULL;
4202 n_except--;
4203 }
4204 else {
4205 /* we can assume it's an "else",
4206 otherwise it would have a type of except_clause */
4207 orelse = ast_for_suite(c, CHILD(n, nch - 1));
4208 if (orelse == NULL)
4209 return NULL;
4210 n_except--;
4211 }
4212 }
4213 else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
4214 ast_error(c, n, "malformed 'try' statement");
4215 return NULL;
4216 }
4217
4218 if (n_except > 0) {
4219 int i;
4220 /* process except statements to create a try ... except */
4221 handlers = _Py_asdl_seq_new(n_except, c->c_arena);
4222 if (handlers == NULL)
4223 return NULL;
4224
4225 for (i = 0; i < n_except; i++) {
4226 excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
4227 CHILD(n, 5 + i * 3));
4228 if (!e)
4229 return NULL;
4230 asdl_seq_SET(handlers, i, e);
4231 }
4232 }
4233
4234 assert(finally != NULL || asdl_seq_LEN(handlers));
4235 if (finally != NULL) {
4236 // finally is always last
4237 get_last_end_pos(finally, &end_lineno, &end_col_offset);
4238 } else if (orelse != NULL) {
4239 // otherwise else is last
4240 get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4241 } else {
4242 // inline the get_last_end_pos logic due to layout mismatch
4243 last_handler = (excepthandler_ty) asdl_seq_GET(handlers, n_except - 1);
4244 end_lineno = last_handler->end_lineno;
4245 end_col_offset = last_handler->end_col_offset;
4246 }
4247 return Try(body, handlers, orelse, finally, LINENO(n), n->n_col_offset,
4248 end_lineno, end_col_offset, c->c_arena);
4249 }
4250
4251 /* with_item: test ['as' expr] */
4252 static withitem_ty
ast_for_with_item(struct compiling * c,const node * n)4253 ast_for_with_item(struct compiling *c, const node *n)
4254 {
4255 expr_ty context_expr, optional_vars = NULL;
4256
4257 REQ(n, with_item);
4258 context_expr = ast_for_expr(c, CHILD(n, 0));
4259 if (!context_expr)
4260 return NULL;
4261 if (NCH(n) == 3) {
4262 optional_vars = ast_for_expr(c, CHILD(n, 2));
4263
4264 if (!optional_vars) {
4265 return NULL;
4266 }
4267 if (!set_context(c, optional_vars, Store, n)) {
4268 return NULL;
4269 }
4270 }
4271
4272 return withitem(context_expr, optional_vars, c->c_arena);
4273 }
4274
4275 /* with_stmt: 'with' with_item (',' with_item)* ':' [TYPE_COMMENT] suite */
4276 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n0,bool is_async)4277 ast_for_with_stmt(struct compiling *c, const node *n0, bool is_async)
4278 {
4279 const node * const n = is_async ? CHILD(n0, 1) : n0;
4280 int i, n_items, nch_minus_type, has_type_comment, end_lineno, end_col_offset;
4281 asdl_seq *items, *body;
4282 string type_comment;
4283
4284 if (is_async && c->c_feature_version < 5) {
4285 ast_error(c, n,
4286 "Async with statements are only supported in Python 3.5 and greater");
4287 return NULL;
4288 }
4289
4290 REQ(n, with_stmt);
4291
4292 has_type_comment = TYPE(CHILD(n, NCH(n) - 2)) == TYPE_COMMENT;
4293 nch_minus_type = NCH(n) - has_type_comment;
4294
4295 n_items = (nch_minus_type - 2) / 2;
4296 items = _Py_asdl_seq_new(n_items, c->c_arena);
4297 if (!items)
4298 return NULL;
4299 for (i = 1; i < nch_minus_type - 2; i += 2) {
4300 withitem_ty item = ast_for_with_item(c, CHILD(n, i));
4301 if (!item)
4302 return NULL;
4303 asdl_seq_SET(items, (i - 1) / 2, item);
4304 }
4305
4306 body = ast_for_suite(c, CHILD(n, NCH(n) - 1));
4307 if (!body)
4308 return NULL;
4309 get_last_end_pos(body, &end_lineno, &end_col_offset);
4310
4311 if (has_type_comment) {
4312 type_comment = NEW_TYPE_COMMENT(CHILD(n, NCH(n) - 2));
4313 if (!type_comment)
4314 return NULL;
4315 }
4316 else
4317 type_comment = NULL;
4318
4319 if (is_async)
4320 return AsyncWith(items, body, type_comment, LINENO(n0), n0->n_col_offset,
4321 end_lineno, end_col_offset, c->c_arena);
4322 else
4323 return With(items, body, type_comment, LINENO(n), n->n_col_offset,
4324 end_lineno, end_col_offset, c->c_arena);
4325 }
4326
4327 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)4328 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
4329 {
4330 /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */
4331 PyObject *classname;
4332 asdl_seq *s;
4333 expr_ty call;
4334 int end_lineno, end_col_offset;
4335
4336 REQ(n, classdef);
4337
4338 if (NCH(n) == 4) { /* class NAME ':' suite */
4339 s = ast_for_suite(c, CHILD(n, 3));
4340 if (!s)
4341 return NULL;
4342 get_last_end_pos(s, &end_lineno, &end_col_offset);
4343
4344 classname = NEW_IDENTIFIER(CHILD(n, 1));
4345 if (!classname)
4346 return NULL;
4347 if (forbidden_name(c, classname, CHILD(n, 3), 0))
4348 return NULL;
4349 return ClassDef(classname, NULL, NULL, s, decorator_seq,
4350 LINENO(n), n->n_col_offset,
4351 end_lineno, end_col_offset, c->c_arena);
4352 }
4353
4354 if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */
4355 s = ast_for_suite(c, CHILD(n, 5));
4356 if (!s)
4357 return NULL;
4358 get_last_end_pos(s, &end_lineno, &end_col_offset);
4359
4360 classname = NEW_IDENTIFIER(CHILD(n, 1));
4361 if (!classname)
4362 return NULL;
4363 if (forbidden_name(c, classname, CHILD(n, 3), 0))
4364 return NULL;
4365 return ClassDef(classname, NULL, NULL, s, decorator_seq,
4366 LINENO(n), n->n_col_offset,
4367 end_lineno, end_col_offset, c->c_arena);
4368 }
4369
4370 /* class NAME '(' arglist ')' ':' suite */
4371 /* build up a fake Call node so we can extract its pieces */
4372 {
4373 PyObject *dummy_name;
4374 expr_ty dummy;
4375 dummy_name = NEW_IDENTIFIER(CHILD(n, 1));
4376 if (!dummy_name)
4377 return NULL;
4378 dummy = Name(dummy_name, Load, LINENO(n), n->n_col_offset,
4379 CHILD(n, 1)->n_end_lineno, CHILD(n, 1)->n_end_col_offset,
4380 c->c_arena);
4381 call = ast_for_call(c, CHILD(n, 3), dummy,
4382 CHILD(n, 1), NULL, CHILD(n, 4));
4383 if (!call)
4384 return NULL;
4385 }
4386 s = ast_for_suite(c, CHILD(n, 6));
4387 if (!s)
4388 return NULL;
4389 get_last_end_pos(s, &end_lineno, &end_col_offset);
4390
4391 classname = NEW_IDENTIFIER(CHILD(n, 1));
4392 if (!classname)
4393 return NULL;
4394 if (forbidden_name(c, classname, CHILD(n, 1), 0))
4395 return NULL;
4396
4397 return ClassDef(classname, call->v.Call.args, call->v.Call.keywords, s,
4398 decorator_seq, LINENO(n), n->n_col_offset,
4399 end_lineno, end_col_offset, c->c_arena);
4400 }
4401
4402 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)4403 ast_for_stmt(struct compiling *c, const node *n)
4404 {
4405 if (TYPE(n) == stmt) {
4406 assert(NCH(n) == 1);
4407 n = CHILD(n, 0);
4408 }
4409 if (TYPE(n) == simple_stmt) {
4410 assert(num_stmts(n) == 1);
4411 n = CHILD(n, 0);
4412 }
4413 if (TYPE(n) == small_stmt) {
4414 n = CHILD(n, 0);
4415 /* small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt
4416 | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
4417 */
4418 switch (TYPE(n)) {
4419 case expr_stmt:
4420 return ast_for_expr_stmt(c, n);
4421 case del_stmt:
4422 return ast_for_del_stmt(c, n);
4423 case pass_stmt:
4424 return Pass(LINENO(n), n->n_col_offset,
4425 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
4426 case flow_stmt:
4427 return ast_for_flow_stmt(c, n);
4428 case import_stmt:
4429 return ast_for_import_stmt(c, n);
4430 case global_stmt:
4431 return ast_for_global_stmt(c, n);
4432 case nonlocal_stmt:
4433 return ast_for_nonlocal_stmt(c, n);
4434 case assert_stmt:
4435 return ast_for_assert_stmt(c, n);
4436 default:
4437 PyErr_Format(PyExc_SystemError,
4438 "unhandled small_stmt: TYPE=%d NCH=%d\n",
4439 TYPE(n), NCH(n));
4440 return NULL;
4441 }
4442 }
4443 else {
4444 /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
4445 | funcdef | classdef | decorated | async_stmt
4446 */
4447 node *ch = CHILD(n, 0);
4448 REQ(n, compound_stmt);
4449 switch (TYPE(ch)) {
4450 case if_stmt:
4451 return ast_for_if_stmt(c, ch);
4452 case while_stmt:
4453 return ast_for_while_stmt(c, ch);
4454 case for_stmt:
4455 return ast_for_for_stmt(c, ch, 0);
4456 case try_stmt:
4457 return ast_for_try_stmt(c, ch);
4458 case with_stmt:
4459 return ast_for_with_stmt(c, ch, 0);
4460 case funcdef:
4461 return ast_for_funcdef(c, ch, NULL);
4462 case classdef:
4463 return ast_for_classdef(c, ch, NULL);
4464 case decorated:
4465 return ast_for_decorated(c, ch);
4466 case async_stmt:
4467 return ast_for_async_stmt(c, ch);
4468 default:
4469 PyErr_Format(PyExc_SystemError,
4470 "unhandled compound_stmt: TYPE=%d NCH=%d\n",
4471 TYPE(n), NCH(n));
4472 return NULL;
4473 }
4474 }
4475 }
4476
4477 static PyObject *
parsenumber_raw(struct compiling * c,const char * s)4478 parsenumber_raw(struct compiling *c, const char *s)
4479 {
4480 const char *end;
4481 long x;
4482 double dx;
4483 Py_complex compl;
4484 int imflag;
4485
4486 assert(s != NULL);
4487 errno = 0;
4488 end = s + strlen(s) - 1;
4489 imflag = *end == 'j' || *end == 'J';
4490 if (s[0] == '0') {
4491 x = (long) PyOS_strtoul(s, (char **)&end, 0);
4492 if (x < 0 && errno == 0) {
4493 return PyLong_FromString(s, (char **)0, 0);
4494 }
4495 }
4496 else
4497 x = PyOS_strtol(s, (char **)&end, 0);
4498 if (*end == '\0') {
4499 if (errno != 0)
4500 return PyLong_FromString(s, (char **)0, 0);
4501 return PyLong_FromLong(x);
4502 }
4503 /* XXX Huge floats may silently fail */
4504 if (imflag) {
4505 compl.real = 0.;
4506 compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
4507 if (compl.imag == -1.0 && PyErr_Occurred())
4508 return NULL;
4509 return PyComplex_FromCComplex(compl);
4510 }
4511 else
4512 {
4513 dx = PyOS_string_to_double(s, NULL, NULL);
4514 if (dx == -1.0 && PyErr_Occurred())
4515 return NULL;
4516 return PyFloat_FromDouble(dx);
4517 }
4518 }
4519
4520 static PyObject *
parsenumber(struct compiling * c,const char * s)4521 parsenumber(struct compiling *c, const char *s)
4522 {
4523 char *dup, *end;
4524 PyObject *res = NULL;
4525
4526 assert(s != NULL);
4527
4528 if (strchr(s, '_') == NULL) {
4529 return parsenumber_raw(c, s);
4530 }
4531 /* Create a duplicate without underscores. */
4532 dup = PyMem_Malloc(strlen(s) + 1);
4533 if (dup == NULL) {
4534 return PyErr_NoMemory();
4535 }
4536 end = dup;
4537 for (; *s; s++) {
4538 if (*s != '_') {
4539 *end++ = *s;
4540 }
4541 }
4542 *end = '\0';
4543 res = parsenumber_raw(c, dup);
4544 PyMem_Free(dup);
4545 return res;
4546 }
4547
4548 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end)4549 decode_utf8(struct compiling *c, const char **sPtr, const char *end)
4550 {
4551 const char *s, *t;
4552 t = s = *sPtr;
4553 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
4554 while (s < end && (*s & 0x80)) s++;
4555 *sPtr = s;
4556 return PyUnicode_DecodeUTF8(t, s - t, NULL);
4557 }
4558
4559 static int
warn_invalid_escape_sequence(struct compiling * c,const node * n,unsigned char first_invalid_escape_char)4560 warn_invalid_escape_sequence(struct compiling *c, const node *n,
4561 unsigned char first_invalid_escape_char)
4562 {
4563 PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
4564 first_invalid_escape_char);
4565 if (msg == NULL) {
4566 return -1;
4567 }
4568 if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
4569 c->c_filename, LINENO(n),
4570 NULL, NULL) < 0)
4571 {
4572 if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
4573 /* Replace the DeprecationWarning exception with a SyntaxError
4574 to get a more accurate error report */
4575 PyErr_Clear();
4576 ast_error(c, n, "%U", msg);
4577 }
4578 Py_DECREF(msg);
4579 return -1;
4580 }
4581 Py_DECREF(msg);
4582 return 0;
4583 }
4584
4585 static PyObject *
decode_unicode_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4586 decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
4587 size_t len)
4588 {
4589 PyObject *v, *u;
4590 char *buf;
4591 char *p;
4592 const char *end;
4593
4594 /* check for integer overflow */
4595 if (len > SIZE_MAX / 6)
4596 return NULL;
4597 /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
4598 "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
4599 u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
4600 if (u == NULL)
4601 return NULL;
4602 p = buf = PyBytes_AsString(u);
4603 end = s + len;
4604 while (s < end) {
4605 if (*s == '\\') {
4606 *p++ = *s++;
4607 if (s >= end || *s & 0x80) {
4608 strcpy(p, "u005c");
4609 p += 5;
4610 if (s >= end)
4611 break;
4612 }
4613 }
4614 if (*s & 0x80) { /* XXX inefficient */
4615 PyObject *w;
4616 int kind;
4617 const void *data;
4618 Py_ssize_t len, i;
4619 w = decode_utf8(c, &s, end);
4620 if (w == NULL) {
4621 Py_DECREF(u);
4622 return NULL;
4623 }
4624 kind = PyUnicode_KIND(w);
4625 data = PyUnicode_DATA(w);
4626 len = PyUnicode_GET_LENGTH(w);
4627 for (i = 0; i < len; i++) {
4628 Py_UCS4 chr = PyUnicode_READ(kind, data, i);
4629 sprintf(p, "\\U%08x", chr);
4630 p += 10;
4631 }
4632 /* Should be impossible to overflow */
4633 assert(p - buf <= PyBytes_GET_SIZE(u));
4634 Py_DECREF(w);
4635 } else {
4636 *p++ = *s++;
4637 }
4638 }
4639 len = p - buf;
4640 s = buf;
4641
4642 const char *first_invalid_escape;
4643 v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
4644
4645 if (v != NULL && first_invalid_escape != NULL) {
4646 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4647 /* We have not decref u before because first_invalid_escape points
4648 inside u. */
4649 Py_XDECREF(u);
4650 Py_DECREF(v);
4651 return NULL;
4652 }
4653 }
4654 Py_XDECREF(u);
4655 return v;
4656 }
4657
4658 static PyObject *
decode_bytes_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4659 decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
4660 size_t len)
4661 {
4662 const char *first_invalid_escape;
4663 PyObject *result = _PyBytes_DecodeEscape(s, len, NULL,
4664 &first_invalid_escape);
4665 if (result == NULL)
4666 return NULL;
4667
4668 if (first_invalid_escape != NULL) {
4669 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4670 Py_DECREF(result);
4671 return NULL;
4672 }
4673 }
4674 return result;
4675 }
4676
4677 /* Shift locations for the given node and all its children by adding `lineno`
4678 and `col_offset` to existing locations. */
fstring_shift_node_locations(node * n,int lineno,int col_offset)4679 static void fstring_shift_node_locations(node *n, int lineno, int col_offset)
4680 {
4681 n->n_col_offset = n->n_col_offset + col_offset;
4682 n->n_end_col_offset = n->n_end_col_offset + col_offset;
4683 for (int i = 0; i < NCH(n); ++i) {
4684 if (n->n_lineno && n->n_lineno < CHILD(n, i)->n_lineno) {
4685 /* Shifting column offsets unnecessary if there's been newlines. */
4686 col_offset = 0;
4687 }
4688 fstring_shift_node_locations(CHILD(n, i), lineno, col_offset);
4689 }
4690 n->n_lineno = n->n_lineno + lineno;
4691 n->n_end_lineno = n->n_end_lineno + lineno;
4692 }
4693
4694 /* Fix locations for the given node and its children.
4695
4696 `parent` is the enclosing node.
4697 `n` is the node which locations are going to be fixed relative to parent.
4698 `expr_str` is the child node's string representation, including braces.
4699 */
4700 static void
fstring_fix_node_location(const node * parent,node * n,char * expr_str)4701 fstring_fix_node_location(const node *parent, node *n, char *expr_str)
4702 {
4703 char *substr = NULL;
4704 char *start;
4705 int lines = LINENO(parent) - 1;
4706 int cols = parent->n_col_offset;
4707 /* Find the full fstring to fix location information in `n`. */
4708 while (parent && parent->n_type != STRING)
4709 parent = parent->n_child;
4710 if (parent && parent->n_str) {
4711 substr = strstr(parent->n_str, expr_str);
4712 if (substr) {
4713 start = substr;
4714 while (start > parent->n_str) {
4715 if (start[0] == '\n')
4716 break;
4717 start--;
4718 }
4719 cols += (int)(substr - start);
4720 /* adjust the start based on the number of newlines encountered
4721 before the f-string expression */
4722 for (char* p = parent->n_str; p < substr; p++) {
4723 if (*p == '\n') {
4724 lines++;
4725 }
4726 }
4727 }
4728 }
4729 fstring_shift_node_locations(n, lines, cols);
4730 }
4731
4732 /* Compile this expression in to an expr_ty. Add parens around the
4733 expression, in order to allow leading spaces in the expression. */
4734 static expr_ty
fstring_compile_expr(const char * expr_start,const char * expr_end,struct compiling * c,const node * n)4735 fstring_compile_expr(const char *expr_start, const char *expr_end,
4736 struct compiling *c, const node *n)
4737
4738 {
4739 node *mod_n;
4740 mod_ty mod;
4741 char *str;
4742 Py_ssize_t len;
4743 const char *s;
4744
4745 assert(expr_end >= expr_start);
4746 assert(*(expr_start-1) == '{');
4747 assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':' ||
4748 *expr_end == '=');
4749
4750 /* If the substring is all whitespace, it's an error. We need to catch this
4751 here, and not when we call PyParser_SimpleParseStringFlagsFilename,
4752 because turning the expression '' in to '()' would go from being invalid
4753 to valid. */
4754 for (s = expr_start; s != expr_end; s++) {
4755 char c = *s;
4756 /* The Python parser ignores only the following whitespace
4757 characters (\r already is converted to \n). */
4758 if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) {
4759 break;
4760 }
4761 }
4762 if (s == expr_end) {
4763 ast_error(c, n, "f-string: empty expression not allowed");
4764 return NULL;
4765 }
4766
4767 len = expr_end - expr_start;
4768 /* Allocate 3 extra bytes: open paren, close paren, null byte. */
4769 str = PyMem_Malloc(len + 3);
4770 if (str == NULL) {
4771 PyErr_NoMemory();
4772 return NULL;
4773 }
4774
4775 str[0] = '(';
4776 memcpy(str+1, expr_start, len);
4777 str[len+1] = ')';
4778 str[len+2] = 0;
4779
4780 PyCompilerFlags cf = _PyCompilerFlags_INIT;
4781 cf.cf_flags = PyCF_ONLY_AST;
4782 mod_n = PyParser_SimpleParseStringFlagsFilename(str, "<fstring>",
4783 Py_eval_input, 0);
4784 if (!mod_n) {
4785 PyMem_Free(str);
4786 return NULL;
4787 }
4788 /* Reuse str to find the correct column offset. */
4789 str[0] = '{';
4790 str[len+1] = '}';
4791 fstring_fix_node_location(n, mod_n, str);
4792 mod = PyAST_FromNode(mod_n, &cf, "<fstring>", c->c_arena);
4793 PyMem_Free(str);
4794 PyNode_Free(mod_n);
4795 if (!mod)
4796 return NULL;
4797 return mod->v.Expression.body;
4798 }
4799
4800 /* Return -1 on error.
4801
4802 Return 0 if we reached the end of the literal.
4803
4804 Return 1 if we haven't reached the end of the literal, but we want
4805 the caller to process the literal up to this point. Used for
4806 doubled braces.
4807 */
4808 static int
fstring_find_literal(const char ** str,const char * end,int raw,PyObject ** literal,int recurse_lvl,struct compiling * c,const node * n)4809 fstring_find_literal(const char **str, const char *end, int raw,
4810 PyObject **literal, int recurse_lvl,
4811 struct compiling *c, const node *n)
4812 {
4813 /* Get any literal string. It ends when we hit an un-doubled left
4814 brace (which isn't part of a unicode name escape such as
4815 "\N{EULER CONSTANT}"), or the end of the string. */
4816
4817 const char *s = *str;
4818 const char *literal_start = s;
4819 int result = 0;
4820
4821 assert(*literal == NULL);
4822 while (s < end) {
4823 char ch = *s++;
4824 if (!raw && ch == '\\' && s < end) {
4825 ch = *s++;
4826 if (ch == 'N') {
4827 if (s < end && *s++ == '{') {
4828 while (s < end && *s++ != '}') {
4829 }
4830 continue;
4831 }
4832 break;
4833 }
4834 if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
4835 return -1;
4836 }
4837 }
4838 if (ch == '{' || ch == '}') {
4839 /* Check for doubled braces, but only at the top level. If
4840 we checked at every level, then f'{0:{3}}' would fail
4841 with the two closing braces. */
4842 if (recurse_lvl == 0) {
4843 if (s < end && *s == ch) {
4844 /* We're going to tell the caller that the literal ends
4845 here, but that they should continue scanning. But also
4846 skip over the second brace when we resume scanning. */
4847 *str = s + 1;
4848 result = 1;
4849 goto done;
4850 }
4851
4852 /* Where a single '{' is the start of a new expression, a
4853 single '}' is not allowed. */
4854 if (ch == '}') {
4855 *str = s - 1;
4856 ast_error(c, n, "f-string: single '}' is not allowed");
4857 return -1;
4858 }
4859 }
4860 /* We're either at a '{', which means we're starting another
4861 expression; or a '}', which means we're at the end of this
4862 f-string (for a nested format_spec). */
4863 s--;
4864 break;
4865 }
4866 }
4867 *str = s;
4868 assert(s <= end);
4869 assert(s == end || *s == '{' || *s == '}');
4870 done:
4871 if (literal_start != s) {
4872 if (raw)
4873 *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
4874 s - literal_start,
4875 NULL, NULL);
4876 else
4877 *literal = decode_unicode_with_escapes(c, n, literal_start,
4878 s - literal_start);
4879 if (!*literal)
4880 return -1;
4881 }
4882 return result;
4883 }
4884
4885 /* Forward declaration because parsing is recursive. */
4886 static expr_ty
4887 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
4888 struct compiling *c, const node *n);
4889
4890 /* Parse the f-string at *str, ending at end. We know *str starts an
4891 expression (so it must be a '{'). Returns the FormattedValue node, which
4892 includes the expression, conversion character, format_spec expression, and
4893 optionally the text of the expression (if = is used).
4894
4895 Note that I don't do a perfect job here: I don't make sure that a
4896 closing brace doesn't match an opening paren, for example. It
4897 doesn't need to error on all invalid expressions, just correctly
4898 find the end of all valid ones. Any errors inside the expression
4899 will be caught when we parse it later.
4900
4901 *expression is set to the expression. For an '=' "debug" expression,
4902 *expr_text is set to the debug text (the original text of the expression,
4903 including the '=' and any whitespace around it, as a string object). If
4904 not a debug expression, *expr_text set to NULL. */
4905 static int
fstring_find_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** expr_text,expr_ty * expression,struct compiling * c,const node * n)4906 fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
4907 PyObject **expr_text, expr_ty *expression,
4908 struct compiling *c, const node *n)
4909 {
4910 /* Return -1 on error, else 0. */
4911
4912 const char *expr_start;
4913 const char *expr_end;
4914 expr_ty simple_expression;
4915 expr_ty format_spec = NULL; /* Optional format specifier. */
4916 int conversion = -1; /* The conversion char. Use default if not
4917 specified, or !r if using = and no format
4918 spec. */
4919
4920 /* 0 if we're not in a string, else the quote char we're trying to
4921 match (single or double quote). */
4922 char quote_char = 0;
4923
4924 /* If we're inside a string, 1=normal, 3=triple-quoted. */
4925 int string_type = 0;
4926
4927 /* Keep track of nesting level for braces/parens/brackets in
4928 expressions. */
4929 Py_ssize_t nested_depth = 0;
4930 char parenstack[MAXLEVEL];
4931
4932 *expr_text = NULL;
4933
4934 /* Can only nest one level deep. */
4935 if (recurse_lvl >= 2) {
4936 ast_error(c, n, "f-string: expressions nested too deeply");
4937 goto error;
4938 }
4939
4940 /* The first char must be a left brace, or we wouldn't have gotten
4941 here. Skip over it. */
4942 assert(**str == '{');
4943 *str += 1;
4944
4945 expr_start = *str;
4946 for (; *str < end; (*str)++) {
4947 char ch;
4948
4949 /* Loop invariants. */
4950 assert(nested_depth >= 0);
4951 assert(*str >= expr_start && *str < end);
4952 if (quote_char)
4953 assert(string_type == 1 || string_type == 3);
4954 else
4955 assert(string_type == 0);
4956
4957 ch = **str;
4958 /* Nowhere inside an expression is a backslash allowed. */
4959 if (ch == '\\') {
4960 /* Error: can't include a backslash character, inside
4961 parens or strings or not. */
4962 ast_error(c, n,
4963 "f-string expression part "
4964 "cannot include a backslash");
4965 goto error;
4966 }
4967 if (quote_char) {
4968 /* We're inside a string. See if we're at the end. */
4969 /* This code needs to implement the same non-error logic
4970 as tok_get from tokenizer.c, at the letter_quote
4971 label. To actually share that code would be a
4972 nightmare. But, it's unlikely to change and is small,
4973 so duplicate it here. Note we don't need to catch all
4974 of the errors, since they'll be caught when parsing the
4975 expression. We just need to match the non-error
4976 cases. Thus we can ignore \n in single-quoted strings,
4977 for example. Or non-terminated strings. */
4978 if (ch == quote_char) {
4979 /* Does this match the string_type (single or triple
4980 quoted)? */
4981 if (string_type == 3) {
4982 if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
4983 /* We're at the end of a triple quoted string. */
4984 *str += 2;
4985 string_type = 0;
4986 quote_char = 0;
4987 continue;
4988 }
4989 } else {
4990 /* We're at the end of a normal string. */
4991 quote_char = 0;
4992 string_type = 0;
4993 continue;
4994 }
4995 }
4996 } else if (ch == '\'' || ch == '"') {
4997 /* Is this a triple quoted string? */
4998 if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
4999 string_type = 3;
5000 *str += 2;
5001 } else {
5002 /* Start of a normal string. */
5003 string_type = 1;
5004 }
5005 /* Start looking for the end of the string. */
5006 quote_char = ch;
5007 } else if (ch == '[' || ch == '{' || ch == '(') {
5008 if (nested_depth >= MAXLEVEL) {
5009 ast_error(c, n, "f-string: too many nested parenthesis");
5010 goto error;
5011 }
5012 parenstack[nested_depth] = ch;
5013 nested_depth++;
5014 } else if (ch == '#') {
5015 /* Error: can't include a comment character, inside parens
5016 or not. */
5017 ast_error(c, n, "f-string expression part cannot include '#'");
5018 goto error;
5019 } else if (nested_depth == 0 &&
5020 (ch == '!' || ch == ':' || ch == '}' ||
5021 ch == '=' || ch == '>' || ch == '<')) {
5022 /* See if there's a next character. */
5023 if (*str+1 < end) {
5024 char next = *(*str+1);
5025
5026 /* For "!=". since '=' is not an allowed conversion character,
5027 nothing is lost in this test. */
5028 if ((ch == '!' && next == '=') || /* != */
5029 (ch == '=' && next == '=') || /* == */
5030 (ch == '<' && next == '=') || /* <= */
5031 (ch == '>' && next == '=') /* >= */
5032 ) {
5033 *str += 1;
5034 continue;
5035 }
5036 /* Don't get out of the loop for these, if they're single
5037 chars (not part of 2-char tokens). If by themselves, they
5038 don't end an expression (unlike say '!'). */
5039 if (ch == '>' || ch == '<') {
5040 continue;
5041 }
5042 }
5043
5044 /* Normal way out of this loop. */
5045 break;
5046 } else if (ch == ']' || ch == '}' || ch == ')') {
5047 if (!nested_depth) {
5048 ast_error(c, n, "f-string: unmatched '%c'", ch);
5049 goto error;
5050 }
5051 nested_depth--;
5052 int opening = parenstack[nested_depth];
5053 if (!((opening == '(' && ch == ')') ||
5054 (opening == '[' && ch == ']') ||
5055 (opening == '{' && ch == '}')))
5056 {
5057 ast_error(c, n,
5058 "f-string: closing parenthesis '%c' "
5059 "does not match opening parenthesis '%c'",
5060 ch, opening);
5061 goto error;
5062 }
5063 } else {
5064 /* Just consume this char and loop around. */
5065 }
5066 }
5067 expr_end = *str;
5068 /* If we leave this loop in a string or with mismatched parens, we
5069 don't care. We'll get a syntax error when compiling the
5070 expression. But, we can produce a better error message, so
5071 let's just do that.*/
5072 if (quote_char) {
5073 ast_error(c, n, "f-string: unterminated string");
5074 goto error;
5075 }
5076 if (nested_depth) {
5077 int opening = parenstack[nested_depth - 1];
5078 ast_error(c, n, "f-string: unmatched '%c'", opening);
5079 goto error;
5080 }
5081
5082 if (*str >= end)
5083 goto unexpected_end_of_string;
5084
5085 /* Compile the expression as soon as possible, so we show errors
5086 related to the expression before errors related to the
5087 conversion or format_spec. */
5088 simple_expression = fstring_compile_expr(expr_start, expr_end, c, n);
5089 if (!simple_expression)
5090 goto error;
5091
5092 /* Check for =, which puts the text value of the expression in
5093 expr_text. */
5094 if (**str == '=') {
5095 if (c->c_feature_version < 8) {
5096 ast_error(c, n,
5097 "f-string: self documenting expressions are "
5098 "only supported in Python 3.8 and greater");
5099 goto error;
5100 }
5101 *str += 1;
5102
5103 /* Skip over ASCII whitespace. No need to test for end of string
5104 here, since we know there's at least a trailing quote somewhere
5105 ahead. */
5106 while (Py_ISSPACE(**str)) {
5107 *str += 1;
5108 }
5109
5110 /* Set *expr_text to the text of the expression. */
5111 *expr_text = PyUnicode_FromStringAndSize(expr_start, *str-expr_start);
5112 if (!*expr_text) {
5113 goto error;
5114 }
5115 }
5116
5117 /* Check for a conversion char, if present. */
5118 if (**str == '!') {
5119 *str += 1;
5120 if (*str >= end)
5121 goto unexpected_end_of_string;
5122
5123 conversion = **str;
5124 *str += 1;
5125
5126 /* Validate the conversion. */
5127 if (!(conversion == 's' || conversion == 'r' || conversion == 'a')) {
5128 ast_error(c, n,
5129 "f-string: invalid conversion character: "
5130 "expected 's', 'r', or 'a'");
5131 goto error;
5132 }
5133
5134 }
5135
5136 /* Check for the format spec, if present. */
5137 if (*str >= end)
5138 goto unexpected_end_of_string;
5139 if (**str == ':') {
5140 *str += 1;
5141 if (*str >= end)
5142 goto unexpected_end_of_string;
5143
5144 /* Parse the format spec. */
5145 format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n);
5146 if (!format_spec)
5147 goto error;
5148 }
5149
5150 if (*str >= end || **str != '}')
5151 goto unexpected_end_of_string;
5152
5153 /* We're at a right brace. Consume it. */
5154 assert(*str < end);
5155 assert(**str == '}');
5156 *str += 1;
5157
5158 /* If we're in = mode (detected by non-NULL expr_text), and have no format
5159 spec and no explicit conversion, set the conversion to 'r'. */
5160 if (*expr_text && format_spec == NULL && conversion == -1) {
5161 conversion = 'r';
5162 }
5163
5164 /* And now create the FormattedValue node that represents this
5165 entire expression with the conversion and format spec. */
5166 *expression = FormattedValue(simple_expression, conversion,
5167 format_spec, LINENO(n),
5168 n->n_col_offset, n->n_end_lineno,
5169 n->n_end_col_offset, c->c_arena);
5170 if (!*expression)
5171 goto error;
5172
5173 return 0;
5174
5175 unexpected_end_of_string:
5176 ast_error(c, n, "f-string: expecting '}'");
5177 /* Falls through to error. */
5178
5179 error:
5180 Py_XDECREF(*expr_text);
5181 return -1;
5182
5183 }
5184
5185 /* Return -1 on error.
5186
5187 Return 0 if we have a literal (possible zero length) and an
5188 expression (zero length if at the end of the string.
5189
5190 Return 1 if we have a literal, but no expression, and we want the
5191 caller to call us again. This is used to deal with doubled
5192 braces.
5193
5194 When called multiple times on the string 'a{{b{0}c', this function
5195 will return:
5196
5197 1. the literal 'a{' with no expression, and a return value
5198 of 1. Despite the fact that there's no expression, the return
5199 value of 1 means we're not finished yet.
5200
5201 2. the literal 'b' and the expression '0', with a return value of
5202 0. The fact that there's an expression means we're not finished.
5203
5204 3. literal 'c' with no expression and a return value of 0. The
5205 combination of the return value of 0 with no expression means
5206 we're finished.
5207 */
5208 static int
fstring_find_literal_and_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** literal,PyObject ** expr_text,expr_ty * expression,struct compiling * c,const node * n)5209 fstring_find_literal_and_expr(const char **str, const char *end, int raw,
5210 int recurse_lvl, PyObject **literal,
5211 PyObject **expr_text, expr_ty *expression,
5212 struct compiling *c, const node *n)
5213 {
5214 int result;
5215
5216 assert(*literal == NULL && *expression == NULL);
5217
5218 /* Get any literal string. */
5219 result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n);
5220 if (result < 0)
5221 goto error;
5222
5223 assert(result == 0 || result == 1);
5224
5225 if (result == 1)
5226 /* We have a literal, but don't look at the expression. */
5227 return 1;
5228
5229 if (*str >= end || **str == '}')
5230 /* We're at the end of the string or the end of a nested
5231 f-string: no expression. The top-level error case where we
5232 expect to be at the end of the string but we're at a '}' is
5233 handled later. */
5234 return 0;
5235
5236 /* We must now be the start of an expression, on a '{'. */
5237 assert(**str == '{');
5238
5239 if (fstring_find_expr(str, end, raw, recurse_lvl, expr_text,
5240 expression, c, n) < 0)
5241 goto error;
5242
5243 return 0;
5244
5245 error:
5246 Py_CLEAR(*literal);
5247 return -1;
5248 }
5249
5250 #define EXPRLIST_N_CACHED 64
5251
5252 typedef struct {
5253 /* Incrementally build an array of expr_ty, so be used in an
5254 asdl_seq. Cache some small but reasonably sized number of
5255 expr_ty's, and then after that start dynamically allocating,
5256 doubling the number allocated each time. Note that the f-string
5257 f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
5258 Constant for the literal 'a'. So you add expr_ty's about twice as
5259 fast as you add expressions in an f-string. */
5260
5261 Py_ssize_t allocated; /* Number we've allocated. */
5262 Py_ssize_t size; /* Number we've used. */
5263 expr_ty *p; /* Pointer to the memory we're actually
5264 using. Will point to 'data' until we
5265 start dynamically allocating. */
5266 expr_ty data[EXPRLIST_N_CACHED];
5267 } ExprList;
5268
5269 #ifdef NDEBUG
5270 #define ExprList_check_invariants(l)
5271 #else
5272 static void
ExprList_check_invariants(ExprList * l)5273 ExprList_check_invariants(ExprList *l)
5274 {
5275 /* Check our invariants. Make sure this object is "live", and
5276 hasn't been deallocated. */
5277 assert(l->size >= 0);
5278 assert(l->p != NULL);
5279 if (l->size <= EXPRLIST_N_CACHED)
5280 assert(l->data == l->p);
5281 }
5282 #endif
5283
5284 static void
ExprList_Init(ExprList * l)5285 ExprList_Init(ExprList *l)
5286 {
5287 l->allocated = EXPRLIST_N_CACHED;
5288 l->size = 0;
5289
5290 /* Until we start allocating dynamically, p points to data. */
5291 l->p = l->data;
5292
5293 ExprList_check_invariants(l);
5294 }
5295
5296 static int
ExprList_Append(ExprList * l,expr_ty exp)5297 ExprList_Append(ExprList *l, expr_ty exp)
5298 {
5299 ExprList_check_invariants(l);
5300 if (l->size >= l->allocated) {
5301 /* We need to alloc (or realloc) the memory. */
5302 Py_ssize_t new_size = l->allocated * 2;
5303
5304 /* See if we've ever allocated anything dynamically. */
5305 if (l->p == l->data) {
5306 Py_ssize_t i;
5307 /* We're still using the cached data. Switch to
5308 alloc-ing. */
5309 l->p = PyMem_Malloc(sizeof(expr_ty) * new_size);
5310 if (!l->p)
5311 return -1;
5312 /* Copy the cached data into the new buffer. */
5313 for (i = 0; i < l->size; i++)
5314 l->p[i] = l->data[i];
5315 } else {
5316 /* Just realloc. */
5317 expr_ty *tmp = PyMem_Realloc(l->p, sizeof(expr_ty) * new_size);
5318 if (!tmp) {
5319 PyMem_Free(l->p);
5320 l->p = NULL;
5321 return -1;
5322 }
5323 l->p = tmp;
5324 }
5325
5326 l->allocated = new_size;
5327 assert(l->allocated == 2 * l->size);
5328 }
5329
5330 l->p[l->size++] = exp;
5331
5332 ExprList_check_invariants(l);
5333 return 0;
5334 }
5335
5336 static void
ExprList_Dealloc(ExprList * l)5337 ExprList_Dealloc(ExprList *l)
5338 {
5339 ExprList_check_invariants(l);
5340
5341 /* If there's been an error, or we've never dynamically allocated,
5342 do nothing. */
5343 if (!l->p || l->p == l->data) {
5344 /* Do nothing. */
5345 } else {
5346 /* We have dynamically allocated. Free the memory. */
5347 PyMem_Free(l->p);
5348 }
5349 l->p = NULL;
5350 l->size = -1;
5351 }
5352
5353 static asdl_seq *
ExprList_Finish(ExprList * l,PyArena * arena)5354 ExprList_Finish(ExprList *l, PyArena *arena)
5355 {
5356 asdl_seq *seq;
5357
5358 ExprList_check_invariants(l);
5359
5360 /* Allocate the asdl_seq and copy the expressions in to it. */
5361 seq = _Py_asdl_seq_new(l->size, arena);
5362 if (seq) {
5363 Py_ssize_t i;
5364 for (i = 0; i < l->size; i++)
5365 asdl_seq_SET(seq, i, l->p[i]);
5366 }
5367 ExprList_Dealloc(l);
5368 return seq;
5369 }
5370
5371 /* The FstringParser is designed to add a mix of strings and
5372 f-strings, and concat them together as needed. Ultimately, it
5373 generates an expr_ty. */
5374 typedef struct {
5375 PyObject *last_str;
5376 ExprList expr_list;
5377 int fmode;
5378 } FstringParser;
5379
5380 #ifdef NDEBUG
5381 #define FstringParser_check_invariants(state)
5382 #else
5383 static void
FstringParser_check_invariants(FstringParser * state)5384 FstringParser_check_invariants(FstringParser *state)
5385 {
5386 if (state->last_str)
5387 assert(PyUnicode_CheckExact(state->last_str));
5388 ExprList_check_invariants(&state->expr_list);
5389 }
5390 #endif
5391
5392 static void
FstringParser_Init(FstringParser * state)5393 FstringParser_Init(FstringParser *state)
5394 {
5395 state->last_str = NULL;
5396 state->fmode = 0;
5397 ExprList_Init(&state->expr_list);
5398 FstringParser_check_invariants(state);
5399 }
5400
5401 static void
FstringParser_Dealloc(FstringParser * state)5402 FstringParser_Dealloc(FstringParser *state)
5403 {
5404 FstringParser_check_invariants(state);
5405
5406 Py_XDECREF(state->last_str);
5407 ExprList_Dealloc(&state->expr_list);
5408 }
5409
5410 /* Constants for the following */
5411 static PyObject *u_kind;
5412
5413 /* Compute 'kind' field for string Constant (either 'u' or None) */
5414 static PyObject *
make_kind(struct compiling * c,const node * n)5415 make_kind(struct compiling *c, const node *n)
5416 {
5417 char *s = NULL;
5418 PyObject *kind = NULL;
5419
5420 /* Find the first string literal, if any */
5421 while (TYPE(n) != STRING) {
5422 if (NCH(n) == 0)
5423 return NULL;
5424 n = CHILD(n, 0);
5425 }
5426 REQ(n, STRING);
5427
5428 /* If it starts with 'u', return a PyUnicode "u" string */
5429 s = STR(n);
5430 if (s && *s == 'u') {
5431 if (!u_kind) {
5432 u_kind = PyUnicode_InternFromString("u");
5433 if (!u_kind)
5434 return NULL;
5435 }
5436 kind = u_kind;
5437 if (PyArena_AddPyObject(c->c_arena, kind) < 0) {
5438 return NULL;
5439 }
5440 Py_INCREF(kind);
5441 }
5442 return kind;
5443 }
5444
5445 /* Make a Constant node, but decref the PyUnicode object being added. */
5446 static expr_ty
make_str_node_and_del(PyObject ** str,struct compiling * c,const node * n)5447 make_str_node_and_del(PyObject **str, struct compiling *c, const node* n)
5448 {
5449 PyObject *s = *str;
5450 PyObject *kind = NULL;
5451 *str = NULL;
5452 assert(PyUnicode_CheckExact(s));
5453 if (PyArena_AddPyObject(c->c_arena, s) < 0) {
5454 Py_DECREF(s);
5455 return NULL;
5456 }
5457 kind = make_kind(c, n);
5458 if (kind == NULL && PyErr_Occurred())
5459 return NULL;
5460 return Constant(s, kind, LINENO(n), n->n_col_offset,
5461 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5462 }
5463
5464 /* Add a non-f-string (that is, a regular literal string). str is
5465 decref'd. */
5466 static int
FstringParser_ConcatAndDel(FstringParser * state,PyObject * str)5467 FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
5468 {
5469 FstringParser_check_invariants(state);
5470
5471 assert(PyUnicode_CheckExact(str));
5472
5473 if (PyUnicode_GET_LENGTH(str) == 0) {
5474 Py_DECREF(str);
5475 return 0;
5476 }
5477
5478 if (!state->last_str) {
5479 /* We didn't have a string before, so just remember this one. */
5480 state->last_str = str;
5481 } else {
5482 /* Concatenate this with the previous string. */
5483 PyUnicode_AppendAndDel(&state->last_str, str);
5484 if (!state->last_str)
5485 return -1;
5486 }
5487 FstringParser_check_invariants(state);
5488 return 0;
5489 }
5490
5491 /* Parse an f-string. The f-string is in *str to end, with no
5492 'f' or quotes. */
5493 static int
FstringParser_ConcatFstring(FstringParser * state,const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5494 FstringParser_ConcatFstring(FstringParser *state, const char **str,
5495 const char *end, int raw, int recurse_lvl,
5496 struct compiling *c, const node *n)
5497 {
5498 FstringParser_check_invariants(state);
5499 state->fmode = 1;
5500
5501 /* Parse the f-string. */
5502 while (1) {
5503 PyObject *literal = NULL;
5504 PyObject *expr_text = NULL;
5505 expr_ty expression = NULL;
5506
5507 /* If there's a zero length literal in front of the
5508 expression, literal will be NULL. If we're at the end of
5509 the f-string, expression will be NULL (unless result == 1,
5510 see below). */
5511 int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
5512 &literal, &expr_text,
5513 &expression, c, n);
5514 if (result < 0)
5515 return -1;
5516
5517 /* Add the literal, if any. */
5518 if (literal && FstringParser_ConcatAndDel(state, literal) < 0) {
5519 Py_XDECREF(expr_text);
5520 return -1;
5521 }
5522 /* Add the expr_text, if any. */
5523 if (expr_text && FstringParser_ConcatAndDel(state, expr_text) < 0) {
5524 return -1;
5525 }
5526
5527 /* We've dealt with the literal and expr_text, their ownership has
5528 been transferred to the state object. Don't look at them again. */
5529
5530 /* See if we should just loop around to get the next literal
5531 and expression, while ignoring the expression this
5532 time. This is used for un-doubling braces, as an
5533 optimization. */
5534 if (result == 1)
5535 continue;
5536
5537 if (!expression)
5538 /* We're done with this f-string. */
5539 break;
5540
5541 /* We know we have an expression. Convert any existing string
5542 to a Constant node. */
5543 if (!state->last_str) {
5544 /* Do nothing. No previous literal. */
5545 } else {
5546 /* Convert the existing last_str literal to a Constant node. */
5547 expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5548 if (!str || ExprList_Append(&state->expr_list, str) < 0)
5549 return -1;
5550 }
5551
5552 if (ExprList_Append(&state->expr_list, expression) < 0)
5553 return -1;
5554 }
5555
5556 /* If recurse_lvl is zero, then we must be at the end of the
5557 string. Otherwise, we must be at a right brace. */
5558
5559 if (recurse_lvl == 0 && *str < end-1) {
5560 ast_error(c, n, "f-string: unexpected end of string");
5561 return -1;
5562 }
5563 if (recurse_lvl != 0 && **str != '}') {
5564 ast_error(c, n, "f-string: expecting '}'");
5565 return -1;
5566 }
5567
5568 FstringParser_check_invariants(state);
5569 return 0;
5570 }
5571
5572 /* Convert the partial state reflected in last_str and expr_list to an
5573 expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
5574 static expr_ty
FstringParser_Finish(FstringParser * state,struct compiling * c,const node * n)5575 FstringParser_Finish(FstringParser *state, struct compiling *c,
5576 const node *n)
5577 {
5578 asdl_seq *seq;
5579
5580 FstringParser_check_invariants(state);
5581
5582 /* If we're just a constant string with no expressions, return
5583 that. */
5584 if (!state->fmode) {
5585 assert(!state->expr_list.size);
5586 if (!state->last_str) {
5587 /* Create a zero length string. */
5588 state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
5589 if (!state->last_str)
5590 goto error;
5591 }
5592 return make_str_node_and_del(&state->last_str, c, n);
5593 }
5594
5595 /* Create a Constant node out of last_str, if needed. It will be the
5596 last node in our expression list. */
5597 if (state->last_str) {
5598 expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5599 if (!str || ExprList_Append(&state->expr_list, str) < 0)
5600 goto error;
5601 }
5602 /* This has already been freed. */
5603 assert(state->last_str == NULL);
5604
5605 seq = ExprList_Finish(&state->expr_list, c->c_arena);
5606 if (!seq)
5607 goto error;
5608
5609 return JoinedStr(seq, LINENO(n), n->n_col_offset,
5610 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5611
5612 error:
5613 FstringParser_Dealloc(state);
5614 return NULL;
5615 }
5616
5617 /* Given an f-string (with no 'f' or quotes) that's in *str and ends
5618 at end, parse it into an expr_ty. Return NULL on error. Adjust
5619 str to point past the parsed portion. */
5620 static expr_ty
fstring_parse(const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5621 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
5622 struct compiling *c, const node *n)
5623 {
5624 FstringParser state;
5625
5626 FstringParser_Init(&state);
5627 if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl,
5628 c, n) < 0) {
5629 FstringParser_Dealloc(&state);
5630 return NULL;
5631 }
5632
5633 return FstringParser_Finish(&state, c, n);
5634 }
5635
5636 /* n is a Python string literal, including the bracketing quote
5637 characters, and r, b, u, &/or f prefixes (if any), and embedded
5638 escape sequences (if any). parsestr parses it, and sets *result to
5639 decoded Python string object. If the string is an f-string, set
5640 *fstr and *fstrlen to the unparsed string object. Return 0 if no
5641 errors occurred.
5642 */
5643 static int
parsestr(struct compiling * c,const node * n,int * bytesmode,int * rawmode,PyObject ** result,const char ** fstr,Py_ssize_t * fstrlen)5644 parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
5645 PyObject **result, const char **fstr, Py_ssize_t *fstrlen)
5646 {
5647 size_t len;
5648 const char *s = STR(n);
5649 int quote = Py_CHARMASK(*s);
5650 int fmode = 0;
5651 *bytesmode = 0;
5652 *rawmode = 0;
5653 *result = NULL;
5654 *fstr = NULL;
5655 if (Py_ISALPHA(quote)) {
5656 while (!*bytesmode || !*rawmode) {
5657 if (quote == 'b' || quote == 'B') {
5658 quote = *++s;
5659 *bytesmode = 1;
5660 }
5661 else if (quote == 'u' || quote == 'U') {
5662 quote = *++s;
5663 }
5664 else if (quote == 'r' || quote == 'R') {
5665 quote = *++s;
5666 *rawmode = 1;
5667 }
5668 else if (quote == 'f' || quote == 'F') {
5669 quote = *++s;
5670 fmode = 1;
5671 }
5672 else {
5673 break;
5674 }
5675 }
5676 }
5677
5678 /* fstrings are only allowed in Python 3.6 and greater */
5679 if (fmode && c->c_feature_version < 6) {
5680 ast_error(c, n, "Format strings are only supported in Python 3.6 and greater");
5681 return -1;
5682 }
5683
5684 if (fmode && *bytesmode) {
5685 PyErr_BadInternalCall();
5686 return -1;
5687 }
5688 if (quote != '\'' && quote != '\"') {
5689 PyErr_BadInternalCall();
5690 return -1;
5691 }
5692 /* Skip the leading quote char. */
5693 s++;
5694 len = strlen(s);
5695 if (len > INT_MAX) {
5696 PyErr_SetString(PyExc_OverflowError,
5697 "string to parse is too long");
5698 return -1;
5699 }
5700 if (s[--len] != quote) {
5701 /* Last quote char must match the first. */
5702 PyErr_BadInternalCall();
5703 return -1;
5704 }
5705 if (len >= 4 && s[0] == quote && s[1] == quote) {
5706 /* A triple quoted string. We've already skipped one quote at
5707 the start and one at the end of the string. Now skip the
5708 two at the start. */
5709 s += 2;
5710 len -= 2;
5711 /* And check that the last two match. */
5712 if (s[--len] != quote || s[--len] != quote) {
5713 PyErr_BadInternalCall();
5714 return -1;
5715 }
5716 }
5717
5718 if (fmode) {
5719 /* Just return the bytes. The caller will parse the resulting
5720 string. */
5721 *fstr = s;
5722 *fstrlen = len;
5723 return 0;
5724 }
5725
5726 /* Not an f-string. */
5727 /* Avoid invoking escape decoding routines if possible. */
5728 *rawmode = *rawmode || strchr(s, '\\') == NULL;
5729 if (*bytesmode) {
5730 /* Disallow non-ASCII characters. */
5731 const char *ch;
5732 for (ch = s; *ch; ch++) {
5733 if (Py_CHARMASK(*ch) >= 0x80) {
5734 ast_error(c, n,
5735 "bytes can only contain ASCII "
5736 "literal characters.");
5737 return -1;
5738 }
5739 }
5740 if (*rawmode)
5741 *result = PyBytes_FromStringAndSize(s, len);
5742 else
5743 *result = decode_bytes_with_escapes(c, n, s, len);
5744 } else {
5745 if (*rawmode)
5746 *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
5747 else
5748 *result = decode_unicode_with_escapes(c, n, s, len);
5749 }
5750 return *result == NULL ? -1 : 0;
5751 }
5752
5753 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through
5754 each STRING atom, and process it as needed. For bytes, just
5755 concatenate them together, and the result will be a Constant node. For
5756 normal strings and f-strings, concatenate them together. The result
5757 will be a Constant node if there were no f-strings; a FormattedValue
5758 node if there's just an f-string (with no leading or trailing
5759 literals), or a JoinedStr node if there are multiple f-strings or
5760 any literals involved. */
5761 static expr_ty
parsestrplus(struct compiling * c,const node * n)5762 parsestrplus(struct compiling *c, const node *n)
5763 {
5764 int bytesmode = 0;
5765 PyObject *bytes_str = NULL;
5766 int i;
5767
5768 FstringParser state;
5769 FstringParser_Init(&state);
5770
5771 for (i = 0; i < NCH(n); i++) {
5772 int this_bytesmode;
5773 int this_rawmode;
5774 PyObject *s;
5775 const char *fstr;
5776 Py_ssize_t fstrlen = -1; /* Silence a compiler warning. */
5777
5778 REQ(CHILD(n, i), STRING);
5779 if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s,
5780 &fstr, &fstrlen) != 0)
5781 goto error;
5782
5783 /* Check that we're not mixing bytes with unicode. */
5784 if (i != 0 && bytesmode != this_bytesmode) {
5785 ast_error(c, n, "cannot mix bytes and nonbytes literals");
5786 /* s is NULL if the current string part is an f-string. */
5787 Py_XDECREF(s);
5788 goto error;
5789 }
5790 bytesmode = this_bytesmode;
5791
5792 if (fstr != NULL) {
5793 int result;
5794 assert(s == NULL && !bytesmode);
5795 /* This is an f-string. Parse and concatenate it. */
5796 result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen,
5797 this_rawmode, 0, c, n);
5798 if (result < 0)
5799 goto error;
5800 } else {
5801 /* A string or byte string. */
5802 assert(s != NULL && fstr == NULL);
5803
5804 assert(bytesmode ? PyBytes_CheckExact(s) :
5805 PyUnicode_CheckExact(s));
5806
5807 if (bytesmode) {
5808 /* For bytes, concat as we go. */
5809 if (i == 0) {
5810 /* First time, just remember this value. */
5811 bytes_str = s;
5812 } else {
5813 PyBytes_ConcatAndDel(&bytes_str, s);
5814 if (!bytes_str)
5815 goto error;
5816 }
5817 } else {
5818 /* This is a regular string. Concatenate it. */
5819 if (FstringParser_ConcatAndDel(&state, s) < 0)
5820 goto error;
5821 }
5822 }
5823 }
5824 if (bytesmode) {
5825 /* Just return the bytes object and we're done. */
5826 if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)
5827 goto error;
5828 return Constant(bytes_str, NULL, LINENO(n), n->n_col_offset,
5829 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5830 }
5831
5832 /* We're not a bytes string, bytes_str should never have been set. */
5833 assert(bytes_str == NULL);
5834
5835 return FstringParser_Finish(&state, c, n);
5836
5837 error:
5838 Py_XDECREF(bytes_str);
5839 FstringParser_Dealloc(&state);
5840 return NULL;
5841 }
5842
5843 PyObject *
_PyAST_GetDocString(asdl_seq * body)5844 _PyAST_GetDocString(asdl_seq *body)
5845 {
5846 if (!asdl_seq_LEN(body)) {
5847 return NULL;
5848 }
5849 stmt_ty st = (stmt_ty)asdl_seq_GET(body, 0);
5850 if (st->kind != Expr_kind) {
5851 return NULL;
5852 }
5853 expr_ty e = st->v.Expr.value;
5854 if (e->kind == Constant_kind && PyUnicode_CheckExact(e->v.Constant.value)) {
5855 return e->v.Constant.value;
5856 }
5857 return NULL;
5858 }
5859