1 /*
2 * This file includes functions to transform a concrete syntax tree (CST) to
3 * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4 *
5 */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "node.h"
9 #include "ast.h"
10 #include "token.h"
11 #include "pythonrun.h"
12
13 #include <assert.h>
14 #include <stdbool.h>
15
16 #define MAXLEVEL 200 /* Max parentheses level */
17
18 static int validate_stmts(asdl_seq *);
19 static int validate_exprs(asdl_seq *, expr_context_ty, int);
20 static int validate_nonempty_seq(asdl_seq *, const char *, const char *);
21 static int validate_stmt(stmt_ty);
22 static int validate_expr(expr_ty, expr_context_ty);
23
24 static int
validate_name(PyObject * name)25 validate_name(PyObject *name)
26 {
27 assert(PyUnicode_Check(name));
28 static const char * const forbidden[] = {
29 "None",
30 "True",
31 "False",
32 NULL
33 };
34 for (int i = 0; forbidden[i] != NULL; i++) {
35 if (_PyUnicode_EqualToASCIIString(name, forbidden[i])) {
36 PyErr_Format(PyExc_ValueError, "Name node can't be used with '%s' constant", forbidden[i]);
37 return 0;
38 }
39 }
40 return 1;
41 }
42
43 static int
validate_comprehension(asdl_seq * gens)44 validate_comprehension(asdl_seq *gens)
45 {
46 Py_ssize_t i;
47 if (!asdl_seq_LEN(gens)) {
48 PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
49 return 0;
50 }
51 for (i = 0; i < asdl_seq_LEN(gens); i++) {
52 comprehension_ty comp = asdl_seq_GET(gens, i);
53 if (!validate_expr(comp->target, Store) ||
54 !validate_expr(comp->iter, Load) ||
55 !validate_exprs(comp->ifs, Load, 0))
56 return 0;
57 }
58 return 1;
59 }
60
61 static int
validate_slice(slice_ty slice)62 validate_slice(slice_ty slice)
63 {
64 switch (slice->kind) {
65 case Slice_kind:
66 return (!slice->v.Slice.lower || validate_expr(slice->v.Slice.lower, Load)) &&
67 (!slice->v.Slice.upper || validate_expr(slice->v.Slice.upper, Load)) &&
68 (!slice->v.Slice.step || validate_expr(slice->v.Slice.step, Load));
69 case ExtSlice_kind: {
70 Py_ssize_t i;
71 if (!validate_nonempty_seq(slice->v.ExtSlice.dims, "dims", "ExtSlice"))
72 return 0;
73 for (i = 0; i < asdl_seq_LEN(slice->v.ExtSlice.dims); i++)
74 if (!validate_slice(asdl_seq_GET(slice->v.ExtSlice.dims, i)))
75 return 0;
76 return 1;
77 }
78 case Index_kind:
79 return validate_expr(slice->v.Index.value, Load);
80 default:
81 PyErr_SetString(PyExc_SystemError, "unknown slice node");
82 return 0;
83 }
84 }
85
86 static int
validate_keywords(asdl_seq * keywords)87 validate_keywords(asdl_seq *keywords)
88 {
89 Py_ssize_t i;
90 for (i = 0; i < asdl_seq_LEN(keywords); i++)
91 if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load))
92 return 0;
93 return 1;
94 }
95
96 static int
validate_args(asdl_seq * args)97 validate_args(asdl_seq *args)
98 {
99 Py_ssize_t i;
100 for (i = 0; i < asdl_seq_LEN(args); i++) {
101 arg_ty arg = asdl_seq_GET(args, i);
102 if (arg->annotation && !validate_expr(arg->annotation, Load))
103 return 0;
104 }
105 return 1;
106 }
107
108 static const char *
expr_context_name(expr_context_ty ctx)109 expr_context_name(expr_context_ty ctx)
110 {
111 switch (ctx) {
112 case Load:
113 return "Load";
114 case Store:
115 return "Store";
116 case Del:
117 return "Del";
118 case AugLoad:
119 return "AugLoad";
120 case AugStore:
121 return "AugStore";
122 case Param:
123 return "Param";
124 default:
125 Py_UNREACHABLE();
126 }
127 }
128
129 static int
validate_arguments(arguments_ty args)130 validate_arguments(arguments_ty args)
131 {
132 if (!validate_args(args->posonlyargs) || !validate_args(args->args)) {
133 return 0;
134 }
135 if (args->vararg && args->vararg->annotation
136 && !validate_expr(args->vararg->annotation, Load)) {
137 return 0;
138 }
139 if (!validate_args(args->kwonlyargs))
140 return 0;
141 if (args->kwarg && args->kwarg->annotation
142 && !validate_expr(args->kwarg->annotation, Load)) {
143 return 0;
144 }
145 if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->posonlyargs) + asdl_seq_LEN(args->args)) {
146 PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
147 return 0;
148 }
149 if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
150 PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
151 "kw_defaults on arguments");
152 return 0;
153 }
154 return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1);
155 }
156
157 static int
validate_constant(PyObject * value)158 validate_constant(PyObject *value)
159 {
160 if (value == Py_None || value == Py_Ellipsis)
161 return 1;
162
163 if (PyLong_CheckExact(value)
164 || PyFloat_CheckExact(value)
165 || PyComplex_CheckExact(value)
166 || PyBool_Check(value)
167 || PyUnicode_CheckExact(value)
168 || PyBytes_CheckExact(value))
169 return 1;
170
171 if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
172 PyObject *it;
173
174 it = PyObject_GetIter(value);
175 if (it == NULL)
176 return 0;
177
178 while (1) {
179 PyObject *item = PyIter_Next(it);
180 if (item == NULL) {
181 if (PyErr_Occurred()) {
182 Py_DECREF(it);
183 return 0;
184 }
185 break;
186 }
187
188 if (!validate_constant(item)) {
189 Py_DECREF(it);
190 Py_DECREF(item);
191 return 0;
192 }
193 Py_DECREF(item);
194 }
195
196 Py_DECREF(it);
197 return 1;
198 }
199
200 return 0;
201 }
202
203 static int
validate_expr(expr_ty exp,expr_context_ty ctx)204 validate_expr(expr_ty exp, expr_context_ty ctx)
205 {
206 int check_ctx = 1;
207 expr_context_ty actual_ctx;
208
209 /* First check expression context. */
210 switch (exp->kind) {
211 case Attribute_kind:
212 actual_ctx = exp->v.Attribute.ctx;
213 break;
214 case Subscript_kind:
215 actual_ctx = exp->v.Subscript.ctx;
216 break;
217 case Starred_kind:
218 actual_ctx = exp->v.Starred.ctx;
219 break;
220 case Name_kind:
221 if (!validate_name(exp->v.Name.id)) {
222 return 0;
223 }
224 actual_ctx = exp->v.Name.ctx;
225 break;
226 case List_kind:
227 actual_ctx = exp->v.List.ctx;
228 break;
229 case Tuple_kind:
230 actual_ctx = exp->v.Tuple.ctx;
231 break;
232 default:
233 if (ctx != Load) {
234 PyErr_Format(PyExc_ValueError, "expression which can't be "
235 "assigned to in %s context", expr_context_name(ctx));
236 return 0;
237 }
238 check_ctx = 0;
239 /* set actual_ctx to prevent gcc warning */
240 actual_ctx = 0;
241 }
242 if (check_ctx && actual_ctx != ctx) {
243 PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
244 expr_context_name(ctx), expr_context_name(actual_ctx));
245 return 0;
246 }
247
248 /* Now validate expression. */
249 switch (exp->kind) {
250 case BoolOp_kind:
251 if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
252 PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
253 return 0;
254 }
255 return validate_exprs(exp->v.BoolOp.values, Load, 0);
256 case BinOp_kind:
257 return validate_expr(exp->v.BinOp.left, Load) &&
258 validate_expr(exp->v.BinOp.right, Load);
259 case UnaryOp_kind:
260 return validate_expr(exp->v.UnaryOp.operand, Load);
261 case Lambda_kind:
262 return validate_arguments(exp->v.Lambda.args) &&
263 validate_expr(exp->v.Lambda.body, Load);
264 case IfExp_kind:
265 return validate_expr(exp->v.IfExp.test, Load) &&
266 validate_expr(exp->v.IfExp.body, Load) &&
267 validate_expr(exp->v.IfExp.orelse, Load);
268 case Dict_kind:
269 if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
270 PyErr_SetString(PyExc_ValueError,
271 "Dict doesn't have the same number of keys as values");
272 return 0;
273 }
274 /* null_ok=1 for keys expressions to allow dict unpacking to work in
275 dict literals, i.e. ``{**{a:b}}`` */
276 return validate_exprs(exp->v.Dict.keys, Load, /*null_ok=*/ 1) &&
277 validate_exprs(exp->v.Dict.values, Load, /*null_ok=*/ 0);
278 case Set_kind:
279 return validate_exprs(exp->v.Set.elts, Load, 0);
280 #define COMP(NAME) \
281 case NAME ## _kind: \
282 return validate_comprehension(exp->v.NAME.generators) && \
283 validate_expr(exp->v.NAME.elt, Load);
284 COMP(ListComp)
285 COMP(SetComp)
286 COMP(GeneratorExp)
287 #undef COMP
288 case DictComp_kind:
289 return validate_comprehension(exp->v.DictComp.generators) &&
290 validate_expr(exp->v.DictComp.key, Load) &&
291 validate_expr(exp->v.DictComp.value, Load);
292 case Yield_kind:
293 return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load);
294 case YieldFrom_kind:
295 return validate_expr(exp->v.YieldFrom.value, Load);
296 case Await_kind:
297 return validate_expr(exp->v.Await.value, Load);
298 case Compare_kind:
299 if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
300 PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
301 return 0;
302 }
303 if (asdl_seq_LEN(exp->v.Compare.comparators) !=
304 asdl_seq_LEN(exp->v.Compare.ops)) {
305 PyErr_SetString(PyExc_ValueError, "Compare has a different number "
306 "of comparators and operands");
307 return 0;
308 }
309 return validate_exprs(exp->v.Compare.comparators, Load, 0) &&
310 validate_expr(exp->v.Compare.left, Load);
311 case Call_kind:
312 return validate_expr(exp->v.Call.func, Load) &&
313 validate_exprs(exp->v.Call.args, Load, 0) &&
314 validate_keywords(exp->v.Call.keywords);
315 case Constant_kind:
316 if (!validate_constant(exp->v.Constant.value)) {
317 PyErr_Format(PyExc_TypeError,
318 "got an invalid type in Constant: %s",
319 Py_TYPE(exp->v.Constant.value)->tp_name);
320 return 0;
321 }
322 return 1;
323 case JoinedStr_kind:
324 return validate_exprs(exp->v.JoinedStr.values, Load, 0);
325 case FormattedValue_kind:
326 if (validate_expr(exp->v.FormattedValue.value, Load) == 0)
327 return 0;
328 if (exp->v.FormattedValue.format_spec)
329 return validate_expr(exp->v.FormattedValue.format_spec, Load);
330 return 1;
331 case Attribute_kind:
332 return validate_expr(exp->v.Attribute.value, Load);
333 case Subscript_kind:
334 return validate_slice(exp->v.Subscript.slice) &&
335 validate_expr(exp->v.Subscript.value, Load);
336 case Starred_kind:
337 return validate_expr(exp->v.Starred.value, ctx);
338 case List_kind:
339 return validate_exprs(exp->v.List.elts, ctx, 0);
340 case Tuple_kind:
341 return validate_exprs(exp->v.Tuple.elts, ctx, 0);
342 case NamedExpr_kind:
343 return validate_expr(exp->v.NamedExpr.value, Load);
344 /* This last case doesn't have any checking. */
345 case Name_kind:
346 return 1;
347 }
348 PyErr_SetString(PyExc_SystemError, "unexpected expression");
349 return 0;
350 }
351
352 static int
validate_nonempty_seq(asdl_seq * seq,const char * what,const char * owner)353 validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
354 {
355 if (asdl_seq_LEN(seq))
356 return 1;
357 PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
358 return 0;
359 }
360
361 static int
validate_assignlist(asdl_seq * targets,expr_context_ty ctx)362 validate_assignlist(asdl_seq *targets, expr_context_ty ctx)
363 {
364 return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
365 validate_exprs(targets, ctx, 0);
366 }
367
368 static int
validate_body(asdl_seq * body,const char * owner)369 validate_body(asdl_seq *body, const char *owner)
370 {
371 return validate_nonempty_seq(body, "body", owner) && validate_stmts(body);
372 }
373
374 static int
validate_stmt(stmt_ty stmt)375 validate_stmt(stmt_ty stmt)
376 {
377 Py_ssize_t i;
378 switch (stmt->kind) {
379 case FunctionDef_kind:
380 return validate_body(stmt->v.FunctionDef.body, "FunctionDef") &&
381 validate_arguments(stmt->v.FunctionDef.args) &&
382 validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) &&
383 (!stmt->v.FunctionDef.returns ||
384 validate_expr(stmt->v.FunctionDef.returns, Load));
385 case ClassDef_kind:
386 return validate_body(stmt->v.ClassDef.body, "ClassDef") &&
387 validate_exprs(stmt->v.ClassDef.bases, Load, 0) &&
388 validate_keywords(stmt->v.ClassDef.keywords) &&
389 validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0);
390 case Return_kind:
391 return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load);
392 case Delete_kind:
393 return validate_assignlist(stmt->v.Delete.targets, Del);
394 case Assign_kind:
395 return validate_assignlist(stmt->v.Assign.targets, Store) &&
396 validate_expr(stmt->v.Assign.value, Load);
397 case AugAssign_kind:
398 return validate_expr(stmt->v.AugAssign.target, Store) &&
399 validate_expr(stmt->v.AugAssign.value, Load);
400 case AnnAssign_kind:
401 if (stmt->v.AnnAssign.target->kind != Name_kind &&
402 stmt->v.AnnAssign.simple) {
403 PyErr_SetString(PyExc_TypeError,
404 "AnnAssign with simple non-Name target");
405 return 0;
406 }
407 return validate_expr(stmt->v.AnnAssign.target, Store) &&
408 (!stmt->v.AnnAssign.value ||
409 validate_expr(stmt->v.AnnAssign.value, Load)) &&
410 validate_expr(stmt->v.AnnAssign.annotation, Load);
411 case For_kind:
412 return validate_expr(stmt->v.For.target, Store) &&
413 validate_expr(stmt->v.For.iter, Load) &&
414 validate_body(stmt->v.For.body, "For") &&
415 validate_stmts(stmt->v.For.orelse);
416 case AsyncFor_kind:
417 return validate_expr(stmt->v.AsyncFor.target, Store) &&
418 validate_expr(stmt->v.AsyncFor.iter, Load) &&
419 validate_body(stmt->v.AsyncFor.body, "AsyncFor") &&
420 validate_stmts(stmt->v.AsyncFor.orelse);
421 case While_kind:
422 return validate_expr(stmt->v.While.test, Load) &&
423 validate_body(stmt->v.While.body, "While") &&
424 validate_stmts(stmt->v.While.orelse);
425 case If_kind:
426 return validate_expr(stmt->v.If.test, Load) &&
427 validate_body(stmt->v.If.body, "If") &&
428 validate_stmts(stmt->v.If.orelse);
429 case With_kind:
430 if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
431 return 0;
432 for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
433 withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
434 if (!validate_expr(item->context_expr, Load) ||
435 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
436 return 0;
437 }
438 return validate_body(stmt->v.With.body, "With");
439 case AsyncWith_kind:
440 if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith"))
441 return 0;
442 for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) {
443 withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i);
444 if (!validate_expr(item->context_expr, Load) ||
445 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
446 return 0;
447 }
448 return validate_body(stmt->v.AsyncWith.body, "AsyncWith");
449 case Raise_kind:
450 if (stmt->v.Raise.exc) {
451 return validate_expr(stmt->v.Raise.exc, Load) &&
452 (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load));
453 }
454 if (stmt->v.Raise.cause) {
455 PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
456 return 0;
457 }
458 return 1;
459 case Try_kind:
460 if (!validate_body(stmt->v.Try.body, "Try"))
461 return 0;
462 if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
463 !asdl_seq_LEN(stmt->v.Try.finalbody)) {
464 PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
465 return 0;
466 }
467 if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
468 asdl_seq_LEN(stmt->v.Try.orelse)) {
469 PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
470 return 0;
471 }
472 for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
473 excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
474 if ((handler->v.ExceptHandler.type &&
475 !validate_expr(handler->v.ExceptHandler.type, Load)) ||
476 !validate_body(handler->v.ExceptHandler.body, "ExceptHandler"))
477 return 0;
478 }
479 return (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
480 validate_stmts(stmt->v.Try.finalbody)) &&
481 (!asdl_seq_LEN(stmt->v.Try.orelse) ||
482 validate_stmts(stmt->v.Try.orelse));
483 case Assert_kind:
484 return validate_expr(stmt->v.Assert.test, Load) &&
485 (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load));
486 case Import_kind:
487 return validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
488 case ImportFrom_kind:
489 if (stmt->v.ImportFrom.level < 0) {
490 PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level");
491 return 0;
492 }
493 return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
494 case Global_kind:
495 return validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
496 case Nonlocal_kind:
497 return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
498 case Expr_kind:
499 return validate_expr(stmt->v.Expr.value, Load);
500 case AsyncFunctionDef_kind:
501 return validate_body(stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") &&
502 validate_arguments(stmt->v.AsyncFunctionDef.args) &&
503 validate_exprs(stmt->v.AsyncFunctionDef.decorator_list, Load, 0) &&
504 (!stmt->v.AsyncFunctionDef.returns ||
505 validate_expr(stmt->v.AsyncFunctionDef.returns, Load));
506 case Pass_kind:
507 case Break_kind:
508 case Continue_kind:
509 return 1;
510 default:
511 PyErr_SetString(PyExc_SystemError, "unexpected statement");
512 return 0;
513 }
514 }
515
516 static int
validate_stmts(asdl_seq * seq)517 validate_stmts(asdl_seq *seq)
518 {
519 Py_ssize_t i;
520 for (i = 0; i < asdl_seq_LEN(seq); i++) {
521 stmt_ty stmt = asdl_seq_GET(seq, i);
522 if (stmt) {
523 if (!validate_stmt(stmt))
524 return 0;
525 }
526 else {
527 PyErr_SetString(PyExc_ValueError,
528 "None disallowed in statement list");
529 return 0;
530 }
531 }
532 return 1;
533 }
534
535 static int
validate_exprs(asdl_seq * exprs,expr_context_ty ctx,int null_ok)536 validate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok)
537 {
538 Py_ssize_t i;
539 for (i = 0; i < asdl_seq_LEN(exprs); i++) {
540 expr_ty expr = asdl_seq_GET(exprs, i);
541 if (expr) {
542 if (!validate_expr(expr, ctx))
543 return 0;
544 }
545 else if (!null_ok) {
546 PyErr_SetString(PyExc_ValueError,
547 "None disallowed in expression list");
548 return 0;
549 }
550
551 }
552 return 1;
553 }
554
555 int
PyAST_Validate(mod_ty mod)556 PyAST_Validate(mod_ty mod)
557 {
558 int res = 0;
559
560 switch (mod->kind) {
561 case Module_kind:
562 res = validate_stmts(mod->v.Module.body);
563 break;
564 case Interactive_kind:
565 res = validate_stmts(mod->v.Interactive.body);
566 break;
567 case Expression_kind:
568 res = validate_expr(mod->v.Expression.body, Load);
569 break;
570 case Suite_kind:
571 PyErr_SetString(PyExc_ValueError, "Suite is not valid in the CPython compiler");
572 break;
573 default:
574 PyErr_SetString(PyExc_SystemError, "impossible module node");
575 res = 0;
576 break;
577 }
578 return res;
579 }
580
581 /* This is done here, so defines like "test" don't interfere with AST use above. */
582 #include "grammar.h"
583 #include "parsetok.h"
584 #include "graminit.h"
585
586 /* Data structure used internally */
587 struct compiling {
588 PyArena *c_arena; /* Arena for allocating memory. */
589 PyObject *c_filename; /* filename */
590 PyObject *c_normalize; /* Normalization function from unicodedata. */
591 int c_feature_version; /* Latest minor version of Python for allowed features */
592 };
593
594 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
595 static expr_ty ast_for_expr(struct compiling *, const node *);
596 static stmt_ty ast_for_stmt(struct compiling *, const node *);
597 static asdl_seq *ast_for_suite(struct compiling *c, const node *n);
598 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
599 expr_context_ty);
600 static expr_ty ast_for_testlist(struct compiling *, const node *);
601 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
602
603 static stmt_ty ast_for_with_stmt(struct compiling *, const node *, bool);
604 static stmt_ty ast_for_for_stmt(struct compiling *, const node *, bool);
605
606 /* Note different signature for ast_for_call */
607 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty,
608 const node *, const node *, const node *);
609
610 static PyObject *parsenumber(struct compiling *, const char *);
611 static expr_ty parsestrplus(struct compiling *, const node *n);
612 static void get_last_end_pos(asdl_seq *, int *, int *);
613
614 #define COMP_GENEXP 0
615 #define COMP_LISTCOMP 1
616 #define COMP_SETCOMP 2
617
618 static int
init_normalization(struct compiling * c)619 init_normalization(struct compiling *c)
620 {
621 PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
622 if (!m)
623 return 0;
624 c->c_normalize = PyObject_GetAttrString(m, "normalize");
625 Py_DECREF(m);
626 if (!c->c_normalize)
627 return 0;
628 return 1;
629 }
630
631 static identifier
new_identifier(const char * n,struct compiling * c)632 new_identifier(const char *n, struct compiling *c)
633 {
634 PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
635 if (!id)
636 return NULL;
637 /* PyUnicode_DecodeUTF8 should always return a ready string. */
638 assert(PyUnicode_IS_READY(id));
639 /* Check whether there are non-ASCII characters in the
640 identifier; if so, normalize to NFKC. */
641 if (!PyUnicode_IS_ASCII(id)) {
642 PyObject *id2;
643 _Py_IDENTIFIER(NFKC);
644 if (!c->c_normalize && !init_normalization(c)) {
645 Py_DECREF(id);
646 return NULL;
647 }
648 PyObject *form = _PyUnicode_FromId(&PyId_NFKC);
649 if (form == NULL) {
650 Py_DECREF(id);
651 return NULL;
652 }
653 PyObject *args[2] = {form, id};
654 id2 = _PyObject_FastCall(c->c_normalize, args, 2);
655 Py_DECREF(id);
656 if (!id2)
657 return NULL;
658 if (!PyUnicode_Check(id2)) {
659 PyErr_Format(PyExc_TypeError,
660 "unicodedata.normalize() must return a string, not "
661 "%.200s",
662 Py_TYPE(id2)->tp_name);
663 Py_DECREF(id2);
664 return NULL;
665 }
666 id = id2;
667 }
668 PyUnicode_InternInPlace(&id);
669 if (PyArena_AddPyObject(c->c_arena, id) < 0) {
670 Py_DECREF(id);
671 return NULL;
672 }
673 return id;
674 }
675
676 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c)
677
678 static int
ast_error(struct compiling * c,const node * n,const char * errmsg,...)679 ast_error(struct compiling *c, const node *n, const char *errmsg, ...)
680 {
681 PyObject *value, *errstr, *loc, *tmp;
682 va_list va;
683
684 va_start(va, errmsg);
685 errstr = PyUnicode_FromFormatV(errmsg, va);
686 va_end(va);
687 if (!errstr) {
688 return 0;
689 }
690 loc = PyErr_ProgramTextObject(c->c_filename, LINENO(n));
691 if (!loc) {
692 Py_INCREF(Py_None);
693 loc = Py_None;
694 }
695 tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset + 1, loc);
696 if (!tmp) {
697 Py_DECREF(errstr);
698 return 0;
699 }
700 value = PyTuple_Pack(2, errstr, tmp);
701 Py_DECREF(errstr);
702 Py_DECREF(tmp);
703 if (value) {
704 PyErr_SetObject(PyExc_SyntaxError, value);
705 Py_DECREF(value);
706 }
707 return 0;
708 }
709
710 /* num_stmts() returns number of contained statements.
711
712 Use this routine to determine how big a sequence is needed for
713 the statements in a parse tree. Its raison d'etre is this bit of
714 grammar:
715
716 stmt: simple_stmt | compound_stmt
717 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
718
719 A simple_stmt can contain multiple small_stmt elements joined
720 by semicolons. If the arg is a simple_stmt, the number of
721 small_stmt elements is returned.
722 */
723
724 static string
new_type_comment(const char * s,struct compiling * c)725 new_type_comment(const char *s, struct compiling *c)
726 {
727 PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
728 if (res == NULL)
729 return NULL;
730 if (PyArena_AddPyObject(c->c_arena, res) < 0) {
731 Py_DECREF(res);
732 return NULL;
733 }
734 return res;
735 }
736 #define NEW_TYPE_COMMENT(n) new_type_comment(STR(n), c)
737
738 static int
num_stmts(const node * n)739 num_stmts(const node *n)
740 {
741 int i, l;
742 node *ch;
743
744 switch (TYPE(n)) {
745 case single_input:
746 if (TYPE(CHILD(n, 0)) == NEWLINE)
747 return 0;
748 else
749 return num_stmts(CHILD(n, 0));
750 case file_input:
751 l = 0;
752 for (i = 0; i < NCH(n); i++) {
753 ch = CHILD(n, i);
754 if (TYPE(ch) == stmt)
755 l += num_stmts(ch);
756 }
757 return l;
758 case stmt:
759 return num_stmts(CHILD(n, 0));
760 case compound_stmt:
761 return 1;
762 case simple_stmt:
763 return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
764 case suite:
765 case func_body_suite:
766 /* func_body_suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
767 /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
768 if (NCH(n) == 1)
769 return num_stmts(CHILD(n, 0));
770 else {
771 i = 2;
772 l = 0;
773 if (TYPE(CHILD(n, 1)) == TYPE_COMMENT)
774 i += 2;
775 for (; i < (NCH(n) - 1); i++)
776 l += num_stmts(CHILD(n, i));
777 return l;
778 }
779 default: {
780 char buf[128];
781
782 sprintf(buf, "Non-statement found: %d %d",
783 TYPE(n), NCH(n));
784 Py_FatalError(buf);
785 }
786 }
787 Py_UNREACHABLE();
788 }
789
790 /* Transform the CST rooted at node * to the appropriate AST
791 */
792
793 mod_ty
PyAST_FromNodeObject(const node * n,PyCompilerFlags * flags,PyObject * filename,PyArena * arena)794 PyAST_FromNodeObject(const node *n, PyCompilerFlags *flags,
795 PyObject *filename, PyArena *arena)
796 {
797 int i, j, k, num;
798 asdl_seq *stmts = NULL;
799 asdl_seq *type_ignores = NULL;
800 stmt_ty s;
801 node *ch;
802 struct compiling c;
803 mod_ty res = NULL;
804 asdl_seq *argtypes = NULL;
805 expr_ty ret, arg;
806
807 c.c_arena = arena;
808 /* borrowed reference */
809 c.c_filename = filename;
810 c.c_normalize = NULL;
811 c.c_feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ?
812 flags->cf_feature_version : PY_MINOR_VERSION;
813
814 if (TYPE(n) == encoding_decl)
815 n = CHILD(n, 0);
816
817 k = 0;
818 switch (TYPE(n)) {
819 case file_input:
820 stmts = _Py_asdl_seq_new(num_stmts(n), arena);
821 if (!stmts)
822 goto out;
823 for (i = 0; i < NCH(n) - 1; i++) {
824 ch = CHILD(n, i);
825 if (TYPE(ch) == NEWLINE)
826 continue;
827 REQ(ch, stmt);
828 num = num_stmts(ch);
829 if (num == 1) {
830 s = ast_for_stmt(&c, ch);
831 if (!s)
832 goto out;
833 asdl_seq_SET(stmts, k++, s);
834 }
835 else {
836 ch = CHILD(ch, 0);
837 REQ(ch, simple_stmt);
838 for (j = 0; j < num; j++) {
839 s = ast_for_stmt(&c, CHILD(ch, j * 2));
840 if (!s)
841 goto out;
842 asdl_seq_SET(stmts, k++, s);
843 }
844 }
845 }
846
847 /* Type ignores are stored under the ENDMARKER in file_input. */
848 ch = CHILD(n, NCH(n) - 1);
849 REQ(ch, ENDMARKER);
850 num = NCH(ch);
851 type_ignores = _Py_asdl_seq_new(num, arena);
852 if (!type_ignores)
853 goto out;
854
855 for (i = 0; i < num; i++) {
856 string type_comment = new_type_comment(STR(CHILD(ch, i)), &c);
857 if (!type_comment)
858 goto out;
859 type_ignore_ty ti = TypeIgnore(LINENO(CHILD(ch, i)), type_comment, arena);
860 if (!ti)
861 goto out;
862 asdl_seq_SET(type_ignores, i, ti);
863 }
864
865 res = Module(stmts, type_ignores, arena);
866 break;
867 case eval_input: {
868 expr_ty testlist_ast;
869
870 /* XXX Why not comp_for here? */
871 testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
872 if (!testlist_ast)
873 goto out;
874 res = Expression(testlist_ast, arena);
875 break;
876 }
877 case single_input:
878 if (TYPE(CHILD(n, 0)) == NEWLINE) {
879 stmts = _Py_asdl_seq_new(1, arena);
880 if (!stmts)
881 goto out;
882 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
883 n->n_end_lineno, n->n_end_col_offset,
884 arena));
885 if (!asdl_seq_GET(stmts, 0))
886 goto out;
887 res = Interactive(stmts, arena);
888 }
889 else {
890 n = CHILD(n, 0);
891 num = num_stmts(n);
892 stmts = _Py_asdl_seq_new(num, arena);
893 if (!stmts)
894 goto out;
895 if (num == 1) {
896 s = ast_for_stmt(&c, n);
897 if (!s)
898 goto out;
899 asdl_seq_SET(stmts, 0, s);
900 }
901 else {
902 /* Only a simple_stmt can contain multiple statements. */
903 REQ(n, simple_stmt);
904 for (i = 0; i < NCH(n); i += 2) {
905 if (TYPE(CHILD(n, i)) == NEWLINE)
906 break;
907 s = ast_for_stmt(&c, CHILD(n, i));
908 if (!s)
909 goto out;
910 asdl_seq_SET(stmts, i / 2, s);
911 }
912 }
913
914 res = Interactive(stmts, arena);
915 }
916 break;
917 case func_type_input:
918 n = CHILD(n, 0);
919 REQ(n, func_type);
920
921 if (TYPE(CHILD(n, 1)) == typelist) {
922 ch = CHILD(n, 1);
923 /* this is overly permissive -- we don't pay any attention to
924 * stars on the args -- just parse them into an ordered list */
925 num = 0;
926 for (i = 0; i < NCH(ch); i++) {
927 if (TYPE(CHILD(ch, i)) == test) {
928 num++;
929 }
930 }
931
932 argtypes = _Py_asdl_seq_new(num, arena);
933 if (!argtypes)
934 goto out;
935
936 j = 0;
937 for (i = 0; i < NCH(ch); i++) {
938 if (TYPE(CHILD(ch, i)) == test) {
939 arg = ast_for_expr(&c, CHILD(ch, i));
940 if (!arg)
941 goto out;
942 asdl_seq_SET(argtypes, j++, arg);
943 }
944 }
945 }
946 else {
947 argtypes = _Py_asdl_seq_new(0, arena);
948 if (!argtypes)
949 goto out;
950 }
951
952 ret = ast_for_expr(&c, CHILD(n, NCH(n) - 1));
953 if (!ret)
954 goto out;
955 res = FunctionType(argtypes, ret, arena);
956 break;
957 default:
958 PyErr_Format(PyExc_SystemError,
959 "invalid node %d for PyAST_FromNode", TYPE(n));
960 goto out;
961 }
962 out:
963 if (c.c_normalize) {
964 Py_DECREF(c.c_normalize);
965 }
966 return res;
967 }
968
969 mod_ty
PyAST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename_str,PyArena * arena)970 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename_str,
971 PyArena *arena)
972 {
973 mod_ty mod;
974 PyObject *filename;
975 filename = PyUnicode_DecodeFSDefault(filename_str);
976 if (filename == NULL)
977 return NULL;
978 mod = PyAST_FromNodeObject(n, flags, filename, arena);
979 Py_DECREF(filename);
980 return mod;
981
982 }
983
984 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
985 */
986
987 static operator_ty
get_operator(struct compiling * c,const node * n)988 get_operator(struct compiling *c, const node *n)
989 {
990 switch (TYPE(n)) {
991 case VBAR:
992 return BitOr;
993 case CIRCUMFLEX:
994 return BitXor;
995 case AMPER:
996 return BitAnd;
997 case LEFTSHIFT:
998 return LShift;
999 case RIGHTSHIFT:
1000 return RShift;
1001 case PLUS:
1002 return Add;
1003 case MINUS:
1004 return Sub;
1005 case STAR:
1006 return Mult;
1007 case AT:
1008 if (c->c_feature_version < 5) {
1009 ast_error(c, n,
1010 "The '@' operator is only supported in Python 3.5 and greater");
1011 return (operator_ty)0;
1012 }
1013 return MatMult;
1014 case SLASH:
1015 return Div;
1016 case DOUBLESLASH:
1017 return FloorDiv;
1018 case PERCENT:
1019 return Mod;
1020 default:
1021 return (operator_ty)0;
1022 }
1023 }
1024
1025 static const char * const FORBIDDEN[] = {
1026 "None",
1027 "True",
1028 "False",
1029 "__debug__",
1030 NULL,
1031 };
1032
1033 static int
forbidden_name(struct compiling * c,identifier name,const node * n,int full_checks)1034 forbidden_name(struct compiling *c, identifier name, const node *n,
1035 int full_checks)
1036 {
1037 assert(PyUnicode_Check(name));
1038 const char * const *p = FORBIDDEN;
1039 if (!full_checks) {
1040 /* In most cases, the parser will protect True, False, and None
1041 from being assign to. */
1042 p += 3;
1043 }
1044 for (; *p; p++) {
1045 if (_PyUnicode_EqualToASCIIString(name, *p)) {
1046 ast_error(c, n, "cannot assign to %U", name);
1047 return 1;
1048 }
1049 }
1050 return 0;
1051 }
1052
1053 static expr_ty
copy_location(expr_ty e,const node * n,const node * end)1054 copy_location(expr_ty e, const node *n, const node *end)
1055 {
1056 if (e) {
1057 e->lineno = LINENO(n);
1058 e->col_offset = n->n_col_offset;
1059 e->end_lineno = end->n_end_lineno;
1060 e->end_col_offset = end->n_end_col_offset;
1061 }
1062 return e;
1063 }
1064
1065 static const char *
get_expr_name(expr_ty e)1066 get_expr_name(expr_ty e)
1067 {
1068 switch (e->kind) {
1069 case Attribute_kind:
1070 return "attribute";
1071 case Subscript_kind:
1072 return "subscript";
1073 case Starred_kind:
1074 return "starred";
1075 case Name_kind:
1076 return "name";
1077 case List_kind:
1078 return "list";
1079 case Tuple_kind:
1080 return "tuple";
1081 case Lambda_kind:
1082 return "lambda";
1083 case Call_kind:
1084 return "function call";
1085 case BoolOp_kind:
1086 case BinOp_kind:
1087 case UnaryOp_kind:
1088 return "operator";
1089 case GeneratorExp_kind:
1090 return "generator expression";
1091 case Yield_kind:
1092 case YieldFrom_kind:
1093 return "yield expression";
1094 case Await_kind:
1095 return "await expression";
1096 case ListComp_kind:
1097 return "list comprehension";
1098 case SetComp_kind:
1099 return "set comprehension";
1100 case DictComp_kind:
1101 return "dict comprehension";
1102 case Dict_kind:
1103 return "dict display";
1104 case Set_kind:
1105 return "set display";
1106 case JoinedStr_kind:
1107 case FormattedValue_kind:
1108 return "f-string expression";
1109 case Constant_kind: {
1110 PyObject *value = e->v.Constant.value;
1111 if (value == Py_None) {
1112 return "None";
1113 }
1114 if (value == Py_False) {
1115 return "False";
1116 }
1117 if (value == Py_True) {
1118 return "True";
1119 }
1120 if (value == Py_Ellipsis) {
1121 return "Ellipsis";
1122 }
1123 return "literal";
1124 }
1125 case Compare_kind:
1126 return "comparison";
1127 case IfExp_kind:
1128 return "conditional expression";
1129 case NamedExpr_kind:
1130 return "named expression";
1131 default:
1132 PyErr_Format(PyExc_SystemError,
1133 "unexpected expression in assignment %d (line %d)",
1134 e->kind, e->lineno);
1135 return NULL;
1136 }
1137 }
1138
1139 /* Set the context ctx for expr_ty e, recursively traversing e.
1140
1141 Only sets context for expr kinds that "can appear in assignment context"
1142 (according to ../Parser/Python.asdl). For other expr kinds, it sets
1143 an appropriate syntax error and returns false.
1144 */
1145
1146 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)1147 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
1148 {
1149 asdl_seq *s = NULL;
1150
1151 /* The ast defines augmented store and load contexts, but the
1152 implementation here doesn't actually use them. The code may be
1153 a little more complex than necessary as a result. It also means
1154 that expressions in an augmented assignment have a Store context.
1155 Consider restructuring so that augmented assignment uses
1156 set_context(), too.
1157 */
1158 assert(ctx != AugStore && ctx != AugLoad);
1159
1160 switch (e->kind) {
1161 case Attribute_kind:
1162 e->v.Attribute.ctx = ctx;
1163 if (ctx == Store && forbidden_name(c, e->v.Attribute.attr, n, 1))
1164 return 0;
1165 break;
1166 case Subscript_kind:
1167 e->v.Subscript.ctx = ctx;
1168 break;
1169 case Starred_kind:
1170 e->v.Starred.ctx = ctx;
1171 if (!set_context(c, e->v.Starred.value, ctx, n))
1172 return 0;
1173 break;
1174 case Name_kind:
1175 if (ctx == Store) {
1176 if (forbidden_name(c, e->v.Name.id, n, 0))
1177 return 0; /* forbidden_name() calls ast_error() */
1178 }
1179 e->v.Name.ctx = ctx;
1180 break;
1181 case List_kind:
1182 e->v.List.ctx = ctx;
1183 s = e->v.List.elts;
1184 break;
1185 case Tuple_kind:
1186 e->v.Tuple.ctx = ctx;
1187 s = e->v.Tuple.elts;
1188 break;
1189 default: {
1190 const char *expr_name = get_expr_name(e);
1191 if (expr_name != NULL) {
1192 ast_error(c, n, "cannot %s %s",
1193 ctx == Store ? "assign to" : "delete",
1194 expr_name);
1195 }
1196 return 0;
1197 }
1198 }
1199
1200 /* If the LHS is a list or tuple, we need to set the assignment
1201 context for all the contained elements.
1202 */
1203 if (s) {
1204 Py_ssize_t i;
1205
1206 for (i = 0; i < asdl_seq_LEN(s); i++) {
1207 if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
1208 return 0;
1209 }
1210 }
1211 return 1;
1212 }
1213
1214 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)1215 ast_for_augassign(struct compiling *c, const node *n)
1216 {
1217 REQ(n, augassign);
1218 n = CHILD(n, 0);
1219 switch (STR(n)[0]) {
1220 case '+':
1221 return Add;
1222 case '-':
1223 return Sub;
1224 case '/':
1225 if (STR(n)[1] == '/')
1226 return FloorDiv;
1227 else
1228 return Div;
1229 case '%':
1230 return Mod;
1231 case '<':
1232 return LShift;
1233 case '>':
1234 return RShift;
1235 case '&':
1236 return BitAnd;
1237 case '^':
1238 return BitXor;
1239 case '|':
1240 return BitOr;
1241 case '*':
1242 if (STR(n)[1] == '*')
1243 return Pow;
1244 else
1245 return Mult;
1246 case '@':
1247 if (c->c_feature_version < 5) {
1248 ast_error(c, n,
1249 "The '@' operator is only supported in Python 3.5 and greater");
1250 return (operator_ty)0;
1251 }
1252 return MatMult;
1253 default:
1254 PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
1255 return (operator_ty)0;
1256 }
1257 }
1258
1259 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)1260 ast_for_comp_op(struct compiling *c, const node *n)
1261 {
1262 /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'
1263 |'is' 'not'
1264 */
1265 REQ(n, comp_op);
1266 if (NCH(n) == 1) {
1267 n = CHILD(n, 0);
1268 switch (TYPE(n)) {
1269 case LESS:
1270 return Lt;
1271 case GREATER:
1272 return Gt;
1273 case EQEQUAL: /* == */
1274 return Eq;
1275 case LESSEQUAL:
1276 return LtE;
1277 case GREATEREQUAL:
1278 return GtE;
1279 case NOTEQUAL:
1280 return NotEq;
1281 case NAME:
1282 if (strcmp(STR(n), "in") == 0)
1283 return In;
1284 if (strcmp(STR(n), "is") == 0)
1285 return Is;
1286 /* fall through */
1287 default:
1288 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
1289 STR(n));
1290 return (cmpop_ty)0;
1291 }
1292 }
1293 else if (NCH(n) == 2) {
1294 /* handle "not in" and "is not" */
1295 switch (TYPE(CHILD(n, 0))) {
1296 case NAME:
1297 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
1298 return NotIn;
1299 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
1300 return IsNot;
1301 /* fall through */
1302 default:
1303 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
1304 STR(CHILD(n, 0)), STR(CHILD(n, 1)));
1305 return (cmpop_ty)0;
1306 }
1307 }
1308 PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
1309 NCH(n));
1310 return (cmpop_ty)0;
1311 }
1312
1313 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)1314 seq_for_testlist(struct compiling *c, const node *n)
1315 {
1316 /* testlist: test (',' test)* [',']
1317 testlist_star_expr: test|star_expr (',' test|star_expr)* [',']
1318 */
1319 asdl_seq *seq;
1320 expr_ty expression;
1321 int i;
1322 assert(TYPE(n) == testlist || TYPE(n) == testlist_star_expr || TYPE(n) == testlist_comp);
1323
1324 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1325 if (!seq)
1326 return NULL;
1327
1328 for (i = 0; i < NCH(n); i += 2) {
1329 const node *ch = CHILD(n, i);
1330 assert(TYPE(ch) == test || TYPE(ch) == test_nocond || TYPE(ch) == star_expr || TYPE(ch) == namedexpr_test);
1331
1332 expression = ast_for_expr(c, ch);
1333 if (!expression)
1334 return NULL;
1335
1336 assert(i / 2 < seq->size);
1337 asdl_seq_SET(seq, i / 2, expression);
1338 }
1339 return seq;
1340 }
1341
1342 static arg_ty
ast_for_arg(struct compiling * c,const node * n)1343 ast_for_arg(struct compiling *c, const node *n)
1344 {
1345 identifier name;
1346 expr_ty annotation = NULL;
1347 node *ch;
1348 arg_ty ret;
1349
1350 assert(TYPE(n) == tfpdef || TYPE(n) == vfpdef);
1351 ch = CHILD(n, 0);
1352 name = NEW_IDENTIFIER(ch);
1353 if (!name)
1354 return NULL;
1355 if (forbidden_name(c, name, ch, 0))
1356 return NULL;
1357
1358 if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) {
1359 annotation = ast_for_expr(c, CHILD(n, 2));
1360 if (!annotation)
1361 return NULL;
1362 }
1363
1364 ret = arg(name, annotation, NULL, LINENO(n), n->n_col_offset,
1365 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
1366 if (!ret)
1367 return NULL;
1368 return ret;
1369 }
1370
1371 /* returns -1 if failed to handle keyword only arguments
1372 returns new position to keep processing if successful
1373 (',' tfpdef ['=' test])*
1374 ^^^
1375 start pointing here
1376 */
1377 static int
handle_keywordonly_args(struct compiling * c,const node * n,int start,asdl_seq * kwonlyargs,asdl_seq * kwdefaults)1378 handle_keywordonly_args(struct compiling *c, const node *n, int start,
1379 asdl_seq *kwonlyargs, asdl_seq *kwdefaults)
1380 {
1381 PyObject *argname;
1382 node *ch;
1383 expr_ty expression, annotation;
1384 arg_ty arg = NULL;
1385 int i = start;
1386 int j = 0; /* index for kwdefaults and kwonlyargs */
1387
1388 if (kwonlyargs == NULL) {
1389 ast_error(c, CHILD(n, start), "named arguments must follow bare *");
1390 return -1;
1391 }
1392 assert(kwdefaults != NULL);
1393 while (i < NCH(n)) {
1394 ch = CHILD(n, i);
1395 switch (TYPE(ch)) {
1396 case vfpdef:
1397 case tfpdef:
1398 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1399 expression = ast_for_expr(c, CHILD(n, i + 2));
1400 if (!expression)
1401 goto error;
1402 asdl_seq_SET(kwdefaults, j, expression);
1403 i += 2; /* '=' and test */
1404 }
1405 else { /* setting NULL if no default value exists */
1406 asdl_seq_SET(kwdefaults, j, NULL);
1407 }
1408 if (NCH(ch) == 3) {
1409 /* ch is NAME ':' test */
1410 annotation = ast_for_expr(c, CHILD(ch, 2));
1411 if (!annotation)
1412 goto error;
1413 }
1414 else {
1415 annotation = NULL;
1416 }
1417 ch = CHILD(ch, 0);
1418 argname = NEW_IDENTIFIER(ch);
1419 if (!argname)
1420 goto error;
1421 if (forbidden_name(c, argname, ch, 0))
1422 goto error;
1423 arg = arg(argname, annotation, NULL, LINENO(ch), ch->n_col_offset,
1424 ch->n_end_lineno, ch->n_end_col_offset,
1425 c->c_arena);
1426 if (!arg)
1427 goto error;
1428 asdl_seq_SET(kwonlyargs, j++, arg);
1429 i += 1; /* the name */
1430 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1431 i += 1; /* the comma, if present */
1432 break;
1433 case TYPE_COMMENT:
1434 /* arg will be equal to the last argument processed */
1435 arg->type_comment = NEW_TYPE_COMMENT(ch);
1436 if (!arg->type_comment)
1437 goto error;
1438 i += 1;
1439 break;
1440 case DOUBLESTAR:
1441 return i;
1442 default:
1443 ast_error(c, ch, "unexpected node");
1444 goto error;
1445 }
1446 }
1447 return i;
1448 error:
1449 return -1;
1450 }
1451
1452 /* Create AST for argument list. */
1453
1454 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)1455 ast_for_arguments(struct compiling *c, const node *n)
1456 {
1457 /* This function handles both typedargslist (function definition)
1458 and varargslist (lambda definition).
1459
1460 parameters: '(' [typedargslist] ')'
1461
1462 The following definition for typedarglist is equivalent to this set of rules:
1463
1464 arguments = argument (',' [TYPE_COMMENT] argument)*
1465 argument = tfpdef ['=' test]
1466 kwargs = '**' tfpdef [','] [TYPE_COMMENT]
1467 args = '*' [tfpdef]
1468 kwonly_kwargs = (',' [TYPE_COMMENT] argument)* (TYPE_COMMENT | [','
1469 [TYPE_COMMENT] [kwargs]])
1470 args_kwonly_kwargs = args kwonly_kwargs | kwargs
1471 poskeyword_args_kwonly_kwargs = arguments ( TYPE_COMMENT | [','
1472 [TYPE_COMMENT] [args_kwonly_kwargs]])
1473 typedargslist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
1474 typedarglist = (arguments ',' [TYPE_COMMENT] '/' [',' [[TYPE_COMMENT]
1475 typedargslist_no_posonly]])|(typedargslist_no_posonly)"
1476
1477 typedargslist: ( (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1478 ',' [TYPE_COMMENT] '/' [',' [ [TYPE_COMMENT] tfpdef ['=' test] ( ','
1479 [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
1480 [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
1481 [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
1482 [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1483 (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
1484 '**' tfpdef [','] [TYPE_COMMENT]]] ) | (tfpdef ['=' test] (','
1485 [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
1486 [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
1487 [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
1488 [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1489 (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
1490 '**' tfpdef [','] [TYPE_COMMENT]))
1491
1492 tfpdef: NAME [':' test]
1493
1494 The following definition for varargslist is equivalent to this set of rules:
1495
1496 arguments = argument (',' argument )*
1497 argument = vfpdef ['=' test]
1498 kwargs = '**' vfpdef [',']
1499 args = '*' [vfpdef]
1500 kwonly_kwargs = (',' argument )* [',' [kwargs]]
1501 args_kwonly_kwargs = args kwonly_kwargs | kwargs
1502 poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]]
1503 vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
1504 varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] |
1505 (vararglist_no_posonly)
1506
1507 varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['='
1508 test] (',' vfpdef ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [','
1509 ['**' vfpdef [',']]] | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])*
1510 [',' ['**' vfpdef [',']]] | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef
1511 ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
1512 | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef
1513 [',']]] | '**' vfpdef [','])
1514
1515 vfpdef: NAME
1516
1517 */
1518 int i, j, k, l, nposonlyargs=0, nposargs = 0, nkwonlyargs = 0;
1519 int nposdefaults = 0, found_default = 0;
1520 asdl_seq *posonlyargs, *posargs, *posdefaults, *kwonlyargs, *kwdefaults;
1521 arg_ty vararg = NULL, kwarg = NULL;
1522 arg_ty arg = NULL;
1523 node *ch;
1524
1525 if (TYPE(n) == parameters) {
1526 if (NCH(n) == 2) /* () as argument list */
1527 return arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1528 n = CHILD(n, 1);
1529 }
1530 assert(TYPE(n) == typedargslist || TYPE(n) == varargslist);
1531
1532 /* First count the number of positional args & defaults. The
1533 variable i is the loop index for this for loop and the next.
1534 The next loop picks up where the first leaves off.
1535 */
1536 for (i = 0; i < NCH(n); i++) {
1537 ch = CHILD(n, i);
1538 if (TYPE(ch) == STAR) {
1539 /* skip star */
1540 i++;
1541 if (i < NCH(n) && /* skip argument following star */
1542 (TYPE(CHILD(n, i)) == tfpdef ||
1543 TYPE(CHILD(n, i)) == vfpdef)) {
1544 i++;
1545 }
1546 break;
1547 }
1548 if (TYPE(ch) == DOUBLESTAR) break;
1549 if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++;
1550 if (TYPE(ch) == EQUAL) nposdefaults++;
1551 if (TYPE(ch) == SLASH ) {
1552 nposonlyargs = nposargs;
1553 nposargs = 0;
1554 }
1555 }
1556 /* count the number of keyword only args &
1557 defaults for keyword only args */
1558 for ( ; i < NCH(n); ++i) {
1559 ch = CHILD(n, i);
1560 if (TYPE(ch) == DOUBLESTAR) break;
1561 if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++;
1562 }
1563 posonlyargs = (nposonlyargs ? _Py_asdl_seq_new(nposonlyargs, c->c_arena) : NULL);
1564 if (!posonlyargs && nposonlyargs) {
1565 return NULL;
1566 }
1567 posargs = (nposargs ? _Py_asdl_seq_new(nposargs, c->c_arena) : NULL);
1568 if (!posargs && nposargs)
1569 return NULL;
1570 kwonlyargs = (nkwonlyargs ?
1571 _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1572 if (!kwonlyargs && nkwonlyargs)
1573 return NULL;
1574 posdefaults = (nposdefaults ?
1575 _Py_asdl_seq_new(nposdefaults, c->c_arena) : NULL);
1576 if (!posdefaults && nposdefaults)
1577 return NULL;
1578 /* The length of kwonlyargs and kwdefaults are same
1579 since we set NULL as default for keyword only argument w/o default
1580 - we have sequence data structure, but no dictionary */
1581 kwdefaults = (nkwonlyargs ?
1582 _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1583 if (!kwdefaults && nkwonlyargs)
1584 return NULL;
1585
1586 /* tfpdef: NAME [':' test]
1587 vfpdef: NAME
1588 */
1589 i = 0;
1590 j = 0; /* index for defaults */
1591 k = 0; /* index for args */
1592 l = 0; /* index for posonlyargs */
1593 while (i < NCH(n)) {
1594 ch = CHILD(n, i);
1595 switch (TYPE(ch)) {
1596 case tfpdef:
1597 case vfpdef:
1598 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
1599 anything other than EQUAL or a comma? */
1600 /* XXX Should NCH(n) check be made a separate check? */
1601 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1602 expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
1603 if (!expression)
1604 return NULL;
1605 assert(posdefaults != NULL);
1606 asdl_seq_SET(posdefaults, j++, expression);
1607 i += 2;
1608 found_default = 1;
1609 }
1610 else if (found_default) {
1611 ast_error(c, n,
1612 "non-default argument follows default argument");
1613 return NULL;
1614 }
1615 arg = ast_for_arg(c, ch);
1616 if (!arg)
1617 return NULL;
1618 if (l < nposonlyargs) {
1619 asdl_seq_SET(posonlyargs, l++, arg);
1620 } else {
1621 asdl_seq_SET(posargs, k++, arg);
1622 }
1623 i += 1; /* the name */
1624 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1625 i += 1; /* the comma, if present */
1626 break;
1627 case SLASH:
1628 /* Advance the slash and the comma. If there are more names
1629 * after the slash there will be a comma so we are advancing
1630 * the correct number of nodes. If the slash is the last item,
1631 * we will be advancing an extra token but then * i > NCH(n)
1632 * and the enclosing while will finish correctly. */
1633 i += 2;
1634 break;
1635 case STAR:
1636 if (i+1 >= NCH(n) ||
1637 (i+2 == NCH(n) && (TYPE(CHILD(n, i+1)) == COMMA
1638 || TYPE(CHILD(n, i+1)) == TYPE_COMMENT))) {
1639 ast_error(c, CHILD(n, i),
1640 "named arguments must follow bare *");
1641 return NULL;
1642 }
1643 ch = CHILD(n, i+1); /* tfpdef or COMMA */
1644 if (TYPE(ch) == COMMA) {
1645 int res = 0;
1646 i += 2; /* now follows keyword only arguments */
1647
1648 if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
1649 ast_error(c, CHILD(n, i),
1650 "bare * has associated type comment");
1651 return NULL;
1652 }
1653
1654 res = handle_keywordonly_args(c, n, i,
1655 kwonlyargs, kwdefaults);
1656 if (res == -1) return NULL;
1657 i = res; /* res has new position to process */
1658 }
1659 else {
1660 vararg = ast_for_arg(c, ch);
1661 if (!vararg)
1662 return NULL;
1663
1664 i += 2; /* the star and the name */
1665 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1666 i += 1; /* the comma, if present */
1667
1668 if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
1669 vararg->type_comment = NEW_TYPE_COMMENT(CHILD(n, i));
1670 if (!vararg->type_comment)
1671 return NULL;
1672 i += 1;
1673 }
1674
1675 if (i < NCH(n) && (TYPE(CHILD(n, i)) == tfpdef
1676 || TYPE(CHILD(n, i)) == vfpdef)) {
1677 int res = 0;
1678 res = handle_keywordonly_args(c, n, i,
1679 kwonlyargs, kwdefaults);
1680 if (res == -1) return NULL;
1681 i = res; /* res has new position to process */
1682 }
1683 }
1684 break;
1685 case DOUBLESTAR:
1686 ch = CHILD(n, i+1); /* tfpdef */
1687 assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef);
1688 kwarg = ast_for_arg(c, ch);
1689 if (!kwarg)
1690 return NULL;
1691 i += 2; /* the double star and the name */
1692 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1693 i += 1; /* the comma, if present */
1694 break;
1695 case TYPE_COMMENT:
1696 assert(i);
1697
1698 if (kwarg)
1699 arg = kwarg;
1700
1701 /* arg will be equal to the last argument processed */
1702 arg->type_comment = NEW_TYPE_COMMENT(ch);
1703 if (!arg->type_comment)
1704 return NULL;
1705 i += 1;
1706 break;
1707 default:
1708 PyErr_Format(PyExc_SystemError,
1709 "unexpected node in varargslist: %d @ %d",
1710 TYPE(ch), i);
1711 return NULL;
1712 }
1713 }
1714 return arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg, posdefaults, c->c_arena);
1715 }
1716
1717 static expr_ty
ast_for_dotted_name(struct compiling * c,const node * n)1718 ast_for_dotted_name(struct compiling *c, const node *n)
1719 {
1720 expr_ty e;
1721 identifier id;
1722 int lineno, col_offset;
1723 int i;
1724 node *ch;
1725
1726 REQ(n, dotted_name);
1727
1728 lineno = LINENO(n);
1729 col_offset = n->n_col_offset;
1730
1731 ch = CHILD(n, 0);
1732 id = NEW_IDENTIFIER(ch);
1733 if (!id)
1734 return NULL;
1735 e = Name(id, Load, lineno, col_offset,
1736 ch->n_end_lineno, ch->n_end_col_offset, c->c_arena);
1737 if (!e)
1738 return NULL;
1739
1740 for (i = 2; i < NCH(n); i+=2) {
1741 const node *child = CHILD(n, i);
1742 id = NEW_IDENTIFIER(child);
1743 if (!id)
1744 return NULL;
1745 e = Attribute(e, id, Load, lineno, col_offset,
1746 child->n_end_lineno, child->n_end_col_offset, c->c_arena);
1747 if (!e)
1748 return NULL;
1749 }
1750
1751 return e;
1752 }
1753
1754 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)1755 ast_for_decorator(struct compiling *c, const node *n)
1756 {
1757 /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
1758 expr_ty d = NULL;
1759 expr_ty name_expr;
1760
1761 REQ(n, decorator);
1762 REQ(CHILD(n, 0), AT);
1763 REQ(RCHILD(n, -1), NEWLINE);
1764
1765 name_expr = ast_for_dotted_name(c, CHILD(n, 1));
1766 if (!name_expr)
1767 return NULL;
1768
1769 if (NCH(n) == 3) { /* No arguments */
1770 d = name_expr;
1771 name_expr = NULL;
1772 }
1773 else if (NCH(n) == 5) { /* Call with no arguments */
1774 d = Call(name_expr, NULL, NULL,
1775 name_expr->lineno, name_expr->col_offset,
1776 CHILD(n, 3)->n_end_lineno, CHILD(n, 3)->n_end_col_offset,
1777 c->c_arena);
1778 if (!d)
1779 return NULL;
1780 name_expr = NULL;
1781 }
1782 else {
1783 d = ast_for_call(c, CHILD(n, 3), name_expr,
1784 CHILD(n, 1), CHILD(n, 2), CHILD(n, 4));
1785 if (!d)
1786 return NULL;
1787 name_expr = NULL;
1788 }
1789
1790 return d;
1791 }
1792
1793 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)1794 ast_for_decorators(struct compiling *c, const node *n)
1795 {
1796 asdl_seq* decorator_seq;
1797 expr_ty d;
1798 int i;
1799
1800 REQ(n, decorators);
1801 decorator_seq = _Py_asdl_seq_new(NCH(n), c->c_arena);
1802 if (!decorator_seq)
1803 return NULL;
1804
1805 for (i = 0; i < NCH(n); i++) {
1806 d = ast_for_decorator(c, CHILD(n, i));
1807 if (!d)
1808 return NULL;
1809 asdl_seq_SET(decorator_seq, i, d);
1810 }
1811 return decorator_seq;
1812 }
1813
1814 static stmt_ty
ast_for_funcdef_impl(struct compiling * c,const node * n0,asdl_seq * decorator_seq,bool is_async)1815 ast_for_funcdef_impl(struct compiling *c, const node *n0,
1816 asdl_seq *decorator_seq, bool is_async)
1817 {
1818 /* funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] suite */
1819 const node * const n = is_async ? CHILD(n0, 1) : n0;
1820 identifier name;
1821 arguments_ty args;
1822 asdl_seq *body;
1823 expr_ty returns = NULL;
1824 int name_i = 1;
1825 int end_lineno, end_col_offset;
1826 node *tc;
1827 string type_comment = NULL;
1828
1829 if (is_async && c->c_feature_version < 5) {
1830 ast_error(c, n,
1831 "Async functions are only supported in Python 3.5 and greater");
1832 return NULL;
1833 }
1834
1835 REQ(n, funcdef);
1836
1837 name = NEW_IDENTIFIER(CHILD(n, name_i));
1838 if (!name)
1839 return NULL;
1840 if (forbidden_name(c, name, CHILD(n, name_i), 0))
1841 return NULL;
1842 args = ast_for_arguments(c, CHILD(n, name_i + 1));
1843 if (!args)
1844 return NULL;
1845 if (TYPE(CHILD(n, name_i+2)) == RARROW) {
1846 returns = ast_for_expr(c, CHILD(n, name_i + 3));
1847 if (!returns)
1848 return NULL;
1849 name_i += 2;
1850 }
1851 if (TYPE(CHILD(n, name_i + 3)) == TYPE_COMMENT) {
1852 type_comment = NEW_TYPE_COMMENT(CHILD(n, name_i + 3));
1853 if (!type_comment)
1854 return NULL;
1855 name_i += 1;
1856 }
1857 body = ast_for_suite(c, CHILD(n, name_i + 3));
1858 if (!body)
1859 return NULL;
1860 get_last_end_pos(body, &end_lineno, &end_col_offset);
1861
1862 if (NCH(CHILD(n, name_i + 3)) > 1) {
1863 /* Check if the suite has a type comment in it. */
1864 tc = CHILD(CHILD(n, name_i + 3), 1);
1865
1866 if (TYPE(tc) == TYPE_COMMENT) {
1867 if (type_comment != NULL) {
1868 ast_error(c, n, "Cannot have two type comments on def");
1869 return NULL;
1870 }
1871 type_comment = NEW_TYPE_COMMENT(tc);
1872 if (!type_comment)
1873 return NULL;
1874 }
1875 }
1876
1877 if (is_async)
1878 return AsyncFunctionDef(name, args, body, decorator_seq, returns, type_comment,
1879 LINENO(n0), n0->n_col_offset, end_lineno, end_col_offset, c->c_arena);
1880 else
1881 return FunctionDef(name, args, body, decorator_seq, returns, type_comment,
1882 LINENO(n), n->n_col_offset, end_lineno, end_col_offset, c->c_arena);
1883 }
1884
1885 static stmt_ty
ast_for_async_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1886 ast_for_async_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1887 {
1888 /* async_funcdef: ASYNC funcdef */
1889 REQ(n, async_funcdef);
1890 REQ(CHILD(n, 0), ASYNC);
1891 REQ(CHILD(n, 1), funcdef);
1892
1893 return ast_for_funcdef_impl(c, n, decorator_seq,
1894 true /* is_async */);
1895 }
1896
1897 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1898 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1899 {
1900 /* funcdef: 'def' NAME parameters ['->' test] ':' suite */
1901 return ast_for_funcdef_impl(c, n, decorator_seq,
1902 false /* is_async */);
1903 }
1904
1905
1906 static stmt_ty
ast_for_async_stmt(struct compiling * c,const node * n)1907 ast_for_async_stmt(struct compiling *c, const node *n)
1908 {
1909 /* async_stmt: ASYNC (funcdef | with_stmt | for_stmt) */
1910 REQ(n, async_stmt);
1911 REQ(CHILD(n, 0), ASYNC);
1912
1913 switch (TYPE(CHILD(n, 1))) {
1914 case funcdef:
1915 return ast_for_funcdef_impl(c, n, NULL,
1916 true /* is_async */);
1917 case with_stmt:
1918 return ast_for_with_stmt(c, n,
1919 true /* is_async */);
1920
1921 case for_stmt:
1922 return ast_for_for_stmt(c, n,
1923 true /* is_async */);
1924
1925 default:
1926 PyErr_Format(PyExc_SystemError,
1927 "invalid async stament: %s",
1928 STR(CHILD(n, 1)));
1929 return NULL;
1930 }
1931 }
1932
1933 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)1934 ast_for_decorated(struct compiling *c, const node *n)
1935 {
1936 /* decorated: decorators (classdef | funcdef | async_funcdef) */
1937 stmt_ty thing = NULL;
1938 asdl_seq *decorator_seq = NULL;
1939
1940 REQ(n, decorated);
1941
1942 decorator_seq = ast_for_decorators(c, CHILD(n, 0));
1943 if (!decorator_seq)
1944 return NULL;
1945
1946 assert(TYPE(CHILD(n, 1)) == funcdef ||
1947 TYPE(CHILD(n, 1)) == async_funcdef ||
1948 TYPE(CHILD(n, 1)) == classdef);
1949
1950 if (TYPE(CHILD(n, 1)) == funcdef) {
1951 thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
1952 } else if (TYPE(CHILD(n, 1)) == classdef) {
1953 thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
1954 } else if (TYPE(CHILD(n, 1)) == async_funcdef) {
1955 thing = ast_for_async_funcdef(c, CHILD(n, 1), decorator_seq);
1956 }
1957 return thing;
1958 }
1959
1960 static expr_ty
ast_for_namedexpr(struct compiling * c,const node * n)1961 ast_for_namedexpr(struct compiling *c, const node *n)
1962 {
1963 /* namedexpr_test: test [':=' test]
1964 argument: ( test [comp_for] |
1965 test ':=' test |
1966 test '=' test |
1967 '**' test |
1968 '*' test )
1969 */
1970 expr_ty target, value;
1971
1972 target = ast_for_expr(c, CHILD(n, 0));
1973 if (!target)
1974 return NULL;
1975
1976 value = ast_for_expr(c, CHILD(n, 2));
1977 if (!value)
1978 return NULL;
1979
1980 if (target->kind != Name_kind) {
1981 const char *expr_name = get_expr_name(target);
1982 if (expr_name != NULL) {
1983 ast_error(c, n, "cannot use assignment expressions with %s", expr_name);
1984 }
1985 return NULL;
1986 }
1987
1988 if (!set_context(c, target, Store, n))
1989 return NULL;
1990
1991 return NamedExpr(target, value, LINENO(n), n->n_col_offset, n->n_end_lineno,
1992 n->n_end_col_offset, c->c_arena);
1993 }
1994
1995 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)1996 ast_for_lambdef(struct compiling *c, const node *n)
1997 {
1998 /* lambdef: 'lambda' [varargslist] ':' test
1999 lambdef_nocond: 'lambda' [varargslist] ':' test_nocond */
2000 arguments_ty args;
2001 expr_ty expression;
2002
2003 if (NCH(n) == 3) {
2004 args = arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
2005 if (!args)
2006 return NULL;
2007 expression = ast_for_expr(c, CHILD(n, 2));
2008 if (!expression)
2009 return NULL;
2010 }
2011 else {
2012 args = ast_for_arguments(c, CHILD(n, 1));
2013 if (!args)
2014 return NULL;
2015 expression = ast_for_expr(c, CHILD(n, 3));
2016 if (!expression)
2017 return NULL;
2018 }
2019
2020 return Lambda(args, expression, LINENO(n), n->n_col_offset,
2021 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2022 }
2023
2024 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)2025 ast_for_ifexpr(struct compiling *c, const node *n)
2026 {
2027 /* test: or_test 'if' or_test 'else' test */
2028 expr_ty expression, body, orelse;
2029
2030 assert(NCH(n) == 5);
2031 body = ast_for_expr(c, CHILD(n, 0));
2032 if (!body)
2033 return NULL;
2034 expression = ast_for_expr(c, CHILD(n, 2));
2035 if (!expression)
2036 return NULL;
2037 orelse = ast_for_expr(c, CHILD(n, 4));
2038 if (!orelse)
2039 return NULL;
2040 return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
2041 n->n_end_lineno, n->n_end_col_offset,
2042 c->c_arena);
2043 }
2044
2045 /*
2046 Count the number of 'for' loops in a comprehension.
2047
2048 Helper for ast_for_comprehension().
2049 */
2050
2051 static int
count_comp_fors(struct compiling * c,const node * n)2052 count_comp_fors(struct compiling *c, const node *n)
2053 {
2054 int n_fors = 0;
2055
2056 count_comp_for:
2057 n_fors++;
2058 REQ(n, comp_for);
2059 if (NCH(n) == 2) {
2060 REQ(CHILD(n, 0), ASYNC);
2061 n = CHILD(n, 1);
2062 }
2063 else if (NCH(n) == 1) {
2064 n = CHILD(n, 0);
2065 }
2066 else {
2067 goto error;
2068 }
2069 if (NCH(n) == (5)) {
2070 n = CHILD(n, 4);
2071 }
2072 else {
2073 return n_fors;
2074 }
2075 count_comp_iter:
2076 REQ(n, comp_iter);
2077 n = CHILD(n, 0);
2078 if (TYPE(n) == comp_for)
2079 goto count_comp_for;
2080 else if (TYPE(n) == comp_if) {
2081 if (NCH(n) == 3) {
2082 n = CHILD(n, 2);
2083 goto count_comp_iter;
2084 }
2085 else
2086 return n_fors;
2087 }
2088
2089 error:
2090 /* Should never be reached */
2091 PyErr_SetString(PyExc_SystemError,
2092 "logic error in count_comp_fors");
2093 return -1;
2094 }
2095
2096 /* Count the number of 'if' statements in a comprehension.
2097
2098 Helper for ast_for_comprehension().
2099 */
2100
2101 static int
count_comp_ifs(struct compiling * c,const node * n)2102 count_comp_ifs(struct compiling *c, const node *n)
2103 {
2104 int n_ifs = 0;
2105
2106 while (1) {
2107 REQ(n, comp_iter);
2108 if (TYPE(CHILD(n, 0)) == comp_for)
2109 return n_ifs;
2110 n = CHILD(n, 0);
2111 REQ(n, comp_if);
2112 n_ifs++;
2113 if (NCH(n) == 2)
2114 return n_ifs;
2115 n = CHILD(n, 2);
2116 }
2117 }
2118
2119 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)2120 ast_for_comprehension(struct compiling *c, const node *n)
2121 {
2122 int i, n_fors;
2123 asdl_seq *comps;
2124
2125 n_fors = count_comp_fors(c, n);
2126 if (n_fors == -1)
2127 return NULL;
2128
2129 comps = _Py_asdl_seq_new(n_fors, c->c_arena);
2130 if (!comps)
2131 return NULL;
2132
2133 for (i = 0; i < n_fors; i++) {
2134 comprehension_ty comp;
2135 asdl_seq *t;
2136 expr_ty expression, first;
2137 node *for_ch;
2138 node *sync_n;
2139 int is_async = 0;
2140
2141 REQ(n, comp_for);
2142
2143 if (NCH(n) == 2) {
2144 is_async = 1;
2145 REQ(CHILD(n, 0), ASYNC);
2146 sync_n = CHILD(n, 1);
2147 }
2148 else {
2149 sync_n = CHILD(n, 0);
2150 }
2151 REQ(sync_n, sync_comp_for);
2152
2153 /* Async comprehensions only allowed in Python 3.6 and greater */
2154 if (is_async && c->c_feature_version < 6) {
2155 ast_error(c, n,
2156 "Async comprehensions are only supported in Python 3.6 and greater");
2157 return NULL;
2158 }
2159
2160 for_ch = CHILD(sync_n, 1);
2161 t = ast_for_exprlist(c, for_ch, Store);
2162 if (!t)
2163 return NULL;
2164 expression = ast_for_expr(c, CHILD(sync_n, 3));
2165 if (!expression)
2166 return NULL;
2167
2168 /* Check the # of children rather than the length of t, since
2169 (x for x, in ...) has 1 element in t, but still requires a Tuple. */
2170 first = (expr_ty)asdl_seq_GET(t, 0);
2171 if (NCH(for_ch) == 1)
2172 comp = comprehension(first, expression, NULL,
2173 is_async, c->c_arena);
2174 else
2175 comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
2176 for_ch->n_end_lineno, for_ch->n_end_col_offset,
2177 c->c_arena),
2178 expression, NULL, is_async, c->c_arena);
2179 if (!comp)
2180 return NULL;
2181
2182 if (NCH(sync_n) == 5) {
2183 int j, n_ifs;
2184 asdl_seq *ifs;
2185
2186 n = CHILD(sync_n, 4);
2187 n_ifs = count_comp_ifs(c, n);
2188 if (n_ifs == -1)
2189 return NULL;
2190
2191 ifs = _Py_asdl_seq_new(n_ifs, c->c_arena);
2192 if (!ifs)
2193 return NULL;
2194
2195 for (j = 0; j < n_ifs; j++) {
2196 REQ(n, comp_iter);
2197 n = CHILD(n, 0);
2198 REQ(n, comp_if);
2199
2200 expression = ast_for_expr(c, CHILD(n, 1));
2201 if (!expression)
2202 return NULL;
2203 asdl_seq_SET(ifs, j, expression);
2204 if (NCH(n) == 3)
2205 n = CHILD(n, 2);
2206 }
2207 /* on exit, must guarantee that n is a comp_for */
2208 if (TYPE(n) == comp_iter)
2209 n = CHILD(n, 0);
2210 comp->ifs = ifs;
2211 }
2212 asdl_seq_SET(comps, i, comp);
2213 }
2214 return comps;
2215 }
2216
2217 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)2218 ast_for_itercomp(struct compiling *c, const node *n, int type)
2219 {
2220 /* testlist_comp: (test|star_expr)
2221 * ( comp_for | (',' (test|star_expr))* [','] ) */
2222 expr_ty elt;
2223 asdl_seq *comps;
2224 node *ch;
2225
2226 assert(NCH(n) > 1);
2227
2228 ch = CHILD(n, 0);
2229 elt = ast_for_expr(c, ch);
2230 if (!elt)
2231 return NULL;
2232 if (elt->kind == Starred_kind) {
2233 ast_error(c, ch, "iterable unpacking cannot be used in comprehension");
2234 return NULL;
2235 }
2236
2237 comps = ast_for_comprehension(c, CHILD(n, 1));
2238 if (!comps)
2239 return NULL;
2240
2241 if (type == COMP_GENEXP)
2242 return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset,
2243 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2244 else if (type == COMP_LISTCOMP)
2245 return ListComp(elt, comps, LINENO(n), n->n_col_offset,
2246 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2247 else if (type == COMP_SETCOMP)
2248 return SetComp(elt, comps, LINENO(n), n->n_col_offset,
2249 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2250 else
2251 /* Should never happen */
2252 return NULL;
2253 }
2254
2255 /* Fills in the key, value pair corresponding to the dict element. In case
2256 * of an unpacking, key is NULL. *i is advanced by the number of ast
2257 * elements. Iff successful, nonzero is returned.
2258 */
2259 static int
ast_for_dictelement(struct compiling * c,const node * n,int * i,expr_ty * key,expr_ty * value)2260 ast_for_dictelement(struct compiling *c, const node *n, int *i,
2261 expr_ty *key, expr_ty *value)
2262 {
2263 expr_ty expression;
2264 if (TYPE(CHILD(n, *i)) == DOUBLESTAR) {
2265 assert(NCH(n) - *i >= 2);
2266
2267 expression = ast_for_expr(c, CHILD(n, *i + 1));
2268 if (!expression)
2269 return 0;
2270 *key = NULL;
2271 *value = expression;
2272
2273 *i += 2;
2274 }
2275 else {
2276 assert(NCH(n) - *i >= 3);
2277
2278 expression = ast_for_expr(c, CHILD(n, *i));
2279 if (!expression)
2280 return 0;
2281 *key = expression;
2282
2283 REQ(CHILD(n, *i + 1), COLON);
2284
2285 expression = ast_for_expr(c, CHILD(n, *i + 2));
2286 if (!expression)
2287 return 0;
2288 *value = expression;
2289
2290 *i += 3;
2291 }
2292 return 1;
2293 }
2294
2295 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)2296 ast_for_dictcomp(struct compiling *c, const node *n)
2297 {
2298 expr_ty key, value;
2299 asdl_seq *comps;
2300 int i = 0;
2301
2302 if (!ast_for_dictelement(c, n, &i, &key, &value))
2303 return NULL;
2304 assert(key);
2305 assert(NCH(n) - i >= 1);
2306
2307 comps = ast_for_comprehension(c, CHILD(n, i));
2308 if (!comps)
2309 return NULL;
2310
2311 return DictComp(key, value, comps, LINENO(n), n->n_col_offset,
2312 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2313 }
2314
2315 static expr_ty
ast_for_dictdisplay(struct compiling * c,const node * n)2316 ast_for_dictdisplay(struct compiling *c, const node *n)
2317 {
2318 int i;
2319 int j;
2320 int size;
2321 asdl_seq *keys, *values;
2322
2323 size = (NCH(n) + 1) / 3; /* +1 in case no trailing comma */
2324 keys = _Py_asdl_seq_new(size, c->c_arena);
2325 if (!keys)
2326 return NULL;
2327
2328 values = _Py_asdl_seq_new(size, c->c_arena);
2329 if (!values)
2330 return NULL;
2331
2332 j = 0;
2333 for (i = 0; i < NCH(n); i++) {
2334 expr_ty key, value;
2335
2336 if (!ast_for_dictelement(c, n, &i, &key, &value))
2337 return NULL;
2338 asdl_seq_SET(keys, j, key);
2339 asdl_seq_SET(values, j, value);
2340
2341 j++;
2342 }
2343 keys->size = j;
2344 values->size = j;
2345 return Dict(keys, values, LINENO(n), n->n_col_offset,
2346 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2347 }
2348
2349 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)2350 ast_for_genexp(struct compiling *c, const node *n)
2351 {
2352 assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
2353 return ast_for_itercomp(c, n, COMP_GENEXP);
2354 }
2355
2356 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)2357 ast_for_listcomp(struct compiling *c, const node *n)
2358 {
2359 assert(TYPE(n) == (testlist_comp));
2360 return ast_for_itercomp(c, n, COMP_LISTCOMP);
2361 }
2362
2363 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)2364 ast_for_setcomp(struct compiling *c, const node *n)
2365 {
2366 assert(TYPE(n) == (dictorsetmaker));
2367 return ast_for_itercomp(c, n, COMP_SETCOMP);
2368 }
2369
2370 static expr_ty
ast_for_setdisplay(struct compiling * c,const node * n)2371 ast_for_setdisplay(struct compiling *c, const node *n)
2372 {
2373 int i;
2374 int size;
2375 asdl_seq *elts;
2376
2377 assert(TYPE(n) == (dictorsetmaker));
2378 size = (NCH(n) + 1) / 2; /* +1 in case no trailing comma */
2379 elts = _Py_asdl_seq_new(size, c->c_arena);
2380 if (!elts)
2381 return NULL;
2382 for (i = 0; i < NCH(n); i += 2) {
2383 expr_ty expression;
2384 expression = ast_for_expr(c, CHILD(n, i));
2385 if (!expression)
2386 return NULL;
2387 asdl_seq_SET(elts, i / 2, expression);
2388 }
2389 return Set(elts, LINENO(n), n->n_col_offset,
2390 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2391 }
2392
2393 static expr_ty
ast_for_atom(struct compiling * c,const node * n)2394 ast_for_atom(struct compiling *c, const node *n)
2395 {
2396 /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']'
2397 | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+
2398 | '...' | 'None' | 'True' | 'False'
2399 */
2400 node *ch = CHILD(n, 0);
2401
2402 switch (TYPE(ch)) {
2403 case NAME: {
2404 PyObject *name;
2405 const char *s = STR(ch);
2406 size_t len = strlen(s);
2407 if (len >= 4 && len <= 5) {
2408 if (!strcmp(s, "None"))
2409 return Constant(Py_None, NULL, LINENO(n), n->n_col_offset,
2410 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2411 if (!strcmp(s, "True"))
2412 return Constant(Py_True, NULL, LINENO(n), n->n_col_offset,
2413 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2414 if (!strcmp(s, "False"))
2415 return Constant(Py_False, NULL, LINENO(n), n->n_col_offset,
2416 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2417 }
2418 name = new_identifier(s, c);
2419 if (!name)
2420 return NULL;
2421 /* All names start in Load context, but may later be changed. */
2422 return Name(name, Load, LINENO(n), n->n_col_offset,
2423 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2424 }
2425 case STRING: {
2426 expr_ty str = parsestrplus(c, n);
2427 if (!str) {
2428 const char *errtype = NULL;
2429 if (PyErr_ExceptionMatches(PyExc_UnicodeError))
2430 errtype = "unicode error";
2431 else if (PyErr_ExceptionMatches(PyExc_ValueError))
2432 errtype = "value error";
2433 if (errtype) {
2434 PyObject *type, *value, *tback, *errstr;
2435 PyErr_Fetch(&type, &value, &tback);
2436 errstr = PyObject_Str(value);
2437 if (errstr) {
2438 ast_error(c, n, "(%s) %U", errtype, errstr);
2439 Py_DECREF(errstr);
2440 }
2441 else {
2442 PyErr_Clear();
2443 ast_error(c, n, "(%s) unknown error", errtype);
2444 }
2445 Py_DECREF(type);
2446 Py_XDECREF(value);
2447 Py_XDECREF(tback);
2448 }
2449 return NULL;
2450 }
2451 return str;
2452 }
2453 case NUMBER: {
2454 PyObject *pynum;
2455 /* Underscores in numeric literals are only allowed in Python 3.6 or greater */
2456 /* Check for underscores here rather than in parse_number so we can report a line number on error */
2457 if (c->c_feature_version < 6 && strchr(STR(ch), '_') != NULL) {
2458 ast_error(c, ch,
2459 "Underscores in numeric literals are only supported in Python 3.6 and greater");
2460 return NULL;
2461 }
2462 pynum = parsenumber(c, STR(ch));
2463 if (!pynum) {
2464 PyThreadState *tstate = PyThreadState_GET();
2465 // The only way a ValueError should happen in _this_ code is via
2466 // PyLong_FromString hitting a length limit.
2467 if (tstate->curexc_type == PyExc_ValueError &&
2468 tstate->curexc_value != NULL) {
2469 PyObject *type, *value, *tb;
2470 // This acts as PyErr_Clear() as we're replacing curexc.
2471 PyErr_Fetch(&type, &value, &tb);
2472 Py_XDECREF(tb);
2473 Py_DECREF(type);
2474 ast_error(c, ch,
2475 "%S - Consider hexadecimal for huge integer literals "
2476 "to avoid decimal conversion limits.",
2477 value);
2478 Py_DECREF(value);
2479 }
2480 return NULL;
2481 }
2482
2483 if (PyArena_AddPyObject(c->c_arena, pynum) < 0) {
2484 Py_DECREF(pynum);
2485 return NULL;
2486 }
2487 return Constant(pynum, NULL, LINENO(n), n->n_col_offset,
2488 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2489 }
2490 case ELLIPSIS: /* Ellipsis */
2491 return Constant(Py_Ellipsis, NULL, LINENO(n), n->n_col_offset,
2492 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2493 case LPAR: /* some parenthesized expressions */
2494 ch = CHILD(n, 1);
2495
2496 if (TYPE(ch) == RPAR)
2497 return Tuple(NULL, Load, LINENO(n), n->n_col_offset,
2498 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2499
2500 if (TYPE(ch) == yield_expr)
2501 return ast_for_expr(c, ch);
2502
2503 /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2504 if (NCH(ch) == 1) {
2505 return ast_for_testlist(c, ch);
2506 }
2507
2508 if (TYPE(CHILD(ch, 1)) == comp_for) {
2509 return copy_location(ast_for_genexp(c, ch), n, n);
2510 }
2511 else {
2512 return copy_location(ast_for_testlist(c, ch), n, n);
2513 }
2514 case LSQB: /* list (or list comprehension) */
2515 ch = CHILD(n, 1);
2516
2517 if (TYPE(ch) == RSQB)
2518 return List(NULL, Load, LINENO(n), n->n_col_offset,
2519 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2520
2521 REQ(ch, testlist_comp);
2522 if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
2523 asdl_seq *elts = seq_for_testlist(c, ch);
2524 if (!elts)
2525 return NULL;
2526
2527 return List(elts, Load, LINENO(n), n->n_col_offset,
2528 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2529 }
2530 else {
2531 return copy_location(ast_for_listcomp(c, ch), n, n);
2532 }
2533 case LBRACE: {
2534 /* dictorsetmaker: ( ((test ':' test | '**' test)
2535 * (comp_for | (',' (test ':' test | '**' test))* [','])) |
2536 * ((test | '*' test)
2537 * (comp_for | (',' (test | '*' test))* [','])) ) */
2538 expr_ty res;
2539 ch = CHILD(n, 1);
2540 if (TYPE(ch) == RBRACE) {
2541 /* It's an empty dict. */
2542 return Dict(NULL, NULL, LINENO(n), n->n_col_offset,
2543 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2544 }
2545 else {
2546 int is_dict = (TYPE(CHILD(ch, 0)) == DOUBLESTAR);
2547 if (NCH(ch) == 1 ||
2548 (NCH(ch) > 1 &&
2549 TYPE(CHILD(ch, 1)) == COMMA)) {
2550 /* It's a set display. */
2551 res = ast_for_setdisplay(c, ch);
2552 }
2553 else if (NCH(ch) > 1 &&
2554 TYPE(CHILD(ch, 1)) == comp_for) {
2555 /* It's a set comprehension. */
2556 res = ast_for_setcomp(c, ch);
2557 }
2558 else if (NCH(ch) > 3 - is_dict &&
2559 TYPE(CHILD(ch, 3 - is_dict)) == comp_for) {
2560 /* It's a dictionary comprehension. */
2561 if (is_dict) {
2562 ast_error(c, n,
2563 "dict unpacking cannot be used in dict comprehension");
2564 return NULL;
2565 }
2566 res = ast_for_dictcomp(c, ch);
2567 }
2568 else {
2569 /* It's a dictionary display. */
2570 res = ast_for_dictdisplay(c, ch);
2571 }
2572 return copy_location(res, n, n);
2573 }
2574 }
2575 default:
2576 PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
2577 return NULL;
2578 }
2579 }
2580
2581 static slice_ty
ast_for_slice(struct compiling * c,const node * n)2582 ast_for_slice(struct compiling *c, const node *n)
2583 {
2584 node *ch;
2585 expr_ty lower = NULL, upper = NULL, step = NULL;
2586
2587 REQ(n, subscript);
2588
2589 /*
2590 subscript: test | [test] ':' [test] [sliceop]
2591 sliceop: ':' [test]
2592 */
2593 ch = CHILD(n, 0);
2594 if (NCH(n) == 1 && TYPE(ch) == test) {
2595 /* 'step' variable hold no significance in terms of being used over
2596 other vars */
2597 step = ast_for_expr(c, ch);
2598 if (!step)
2599 return NULL;
2600
2601 return Index(step, c->c_arena);
2602 }
2603
2604 if (TYPE(ch) == test) {
2605 lower = ast_for_expr(c, ch);
2606 if (!lower)
2607 return NULL;
2608 }
2609
2610 /* If there's an upper bound it's in the second or third position. */
2611 if (TYPE(ch) == COLON) {
2612 if (NCH(n) > 1) {
2613 node *n2 = CHILD(n, 1);
2614
2615 if (TYPE(n2) == test) {
2616 upper = ast_for_expr(c, n2);
2617 if (!upper)
2618 return NULL;
2619 }
2620 }
2621 } else if (NCH(n) > 2) {
2622 node *n2 = CHILD(n, 2);
2623
2624 if (TYPE(n2) == test) {
2625 upper = ast_for_expr(c, n2);
2626 if (!upper)
2627 return NULL;
2628 }
2629 }
2630
2631 ch = CHILD(n, NCH(n) - 1);
2632 if (TYPE(ch) == sliceop) {
2633 if (NCH(ch) != 1) {
2634 ch = CHILD(ch, 1);
2635 if (TYPE(ch) == test) {
2636 step = ast_for_expr(c, ch);
2637 if (!step)
2638 return NULL;
2639 }
2640 }
2641 }
2642
2643 return Slice(lower, upper, step, c->c_arena);
2644 }
2645
2646 static expr_ty
ast_for_binop(struct compiling * c,const node * n)2647 ast_for_binop(struct compiling *c, const node *n)
2648 {
2649 /* Must account for a sequence of expressions.
2650 How should A op B op C by represented?
2651 BinOp(BinOp(A, op, B), op, C).
2652 */
2653
2654 int i, nops;
2655 expr_ty expr1, expr2, result;
2656 operator_ty newoperator;
2657
2658 expr1 = ast_for_expr(c, CHILD(n, 0));
2659 if (!expr1)
2660 return NULL;
2661
2662 expr2 = ast_for_expr(c, CHILD(n, 2));
2663 if (!expr2)
2664 return NULL;
2665
2666 newoperator = get_operator(c, CHILD(n, 1));
2667 if (!newoperator)
2668 return NULL;
2669
2670 result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2671 CHILD(n, 2)->n_end_lineno, CHILD(n, 2)->n_end_col_offset,
2672 c->c_arena);
2673 if (!result)
2674 return NULL;
2675
2676 nops = (NCH(n) - 1) / 2;
2677 for (i = 1; i < nops; i++) {
2678 expr_ty tmp_result, tmp;
2679 const node* next_oper = CHILD(n, i * 2 + 1);
2680
2681 newoperator = get_operator(c, next_oper);
2682 if (!newoperator)
2683 return NULL;
2684
2685 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
2686 if (!tmp)
2687 return NULL;
2688
2689 tmp_result = BinOp(result, newoperator, tmp,
2690 LINENO(n), n->n_col_offset,
2691 CHILD(n, i * 2 + 2)->n_end_lineno,
2692 CHILD(n, i * 2 + 2)->n_end_col_offset,
2693 c->c_arena);
2694 if (!tmp_result)
2695 return NULL;
2696 result = tmp_result;
2697 }
2698 return result;
2699 }
2700
2701 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr,const node * start)2702 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr, const node *start)
2703 {
2704 /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
2705 subscriptlist: subscript (',' subscript)* [',']
2706 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
2707 */
2708 const node *n_copy = n;
2709 REQ(n, trailer);
2710 if (TYPE(CHILD(n, 0)) == LPAR) {
2711 if (NCH(n) == 2)
2712 return Call(left_expr, NULL, NULL, LINENO(start), start->n_col_offset,
2713 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2714 else
2715 return ast_for_call(c, CHILD(n, 1), left_expr,
2716 start, CHILD(n, 0), CHILD(n, 2));
2717 }
2718 else if (TYPE(CHILD(n, 0)) == DOT) {
2719 PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
2720 if (!attr_id)
2721 return NULL;
2722 return Attribute(left_expr, attr_id, Load,
2723 LINENO(start), start->n_col_offset,
2724 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2725 }
2726 else {
2727 REQ(CHILD(n, 0), LSQB);
2728 REQ(CHILD(n, 2), RSQB);
2729 n = CHILD(n, 1);
2730 if (NCH(n) == 1) {
2731 slice_ty slc = ast_for_slice(c, CHILD(n, 0));
2732 if (!slc)
2733 return NULL;
2734 return Subscript(left_expr, slc, Load, LINENO(start), start->n_col_offset,
2735 n_copy->n_end_lineno, n_copy->n_end_col_offset,
2736 c->c_arena);
2737 }
2738 else {
2739 /* The grammar is ambiguous here. The ambiguity is resolved
2740 by treating the sequence as a tuple literal if there are
2741 no slice features.
2742 */
2743 Py_ssize_t j;
2744 slice_ty slc;
2745 expr_ty e;
2746 int simple = 1;
2747 asdl_seq *slices, *elts;
2748 slices = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2749 if (!slices)
2750 return NULL;
2751 for (j = 0; j < NCH(n); j += 2) {
2752 slc = ast_for_slice(c, CHILD(n, j));
2753 if (!slc)
2754 return NULL;
2755 if (slc->kind != Index_kind)
2756 simple = 0;
2757 asdl_seq_SET(slices, j / 2, slc);
2758 }
2759 if (!simple) {
2760 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
2761 Load, LINENO(start), start->n_col_offset,
2762 n_copy->n_end_lineno, n_copy->n_end_col_offset, c->c_arena);
2763 }
2764 /* extract Index values and put them in a Tuple */
2765 elts = _Py_asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
2766 if (!elts)
2767 return NULL;
2768 for (j = 0; j < asdl_seq_LEN(slices); ++j) {
2769 slc = (slice_ty)asdl_seq_GET(slices, j);
2770 assert(slc->kind == Index_kind && slc->v.Index.value);
2771 asdl_seq_SET(elts, j, slc->v.Index.value);
2772 }
2773 e = Tuple(elts, Load, LINENO(n), n->n_col_offset,
2774 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2775 if (!e)
2776 return NULL;
2777 return Subscript(left_expr, Index(e, c->c_arena),
2778 Load, LINENO(start), start->n_col_offset,
2779 n_copy->n_end_lineno, n_copy->n_end_col_offset, c->c_arena);
2780 }
2781 }
2782 }
2783
2784 static expr_ty
ast_for_factor(struct compiling * c,const node * n)2785 ast_for_factor(struct compiling *c, const node *n)
2786 {
2787 expr_ty expression;
2788
2789 expression = ast_for_expr(c, CHILD(n, 1));
2790 if (!expression)
2791 return NULL;
2792
2793 switch (TYPE(CHILD(n, 0))) {
2794 case PLUS:
2795 return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
2796 n->n_end_lineno, n->n_end_col_offset,
2797 c->c_arena);
2798 case MINUS:
2799 return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
2800 n->n_end_lineno, n->n_end_col_offset,
2801 c->c_arena);
2802 case TILDE:
2803 return UnaryOp(Invert, expression, LINENO(n), n->n_col_offset,
2804 n->n_end_lineno, n->n_end_col_offset,
2805 c->c_arena);
2806 }
2807 PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
2808 TYPE(CHILD(n, 0)));
2809 return NULL;
2810 }
2811
2812 static expr_ty
ast_for_atom_expr(struct compiling * c,const node * n)2813 ast_for_atom_expr(struct compiling *c, const node *n)
2814 {
2815 int i, nch, start = 0;
2816 expr_ty e;
2817
2818 REQ(n, atom_expr);
2819 nch = NCH(n);
2820
2821 if (TYPE(CHILD(n, 0)) == AWAIT) {
2822 if (c->c_feature_version < 5) {
2823 ast_error(c, n,
2824 "Await expressions are only supported in Python 3.5 and greater");
2825 return NULL;
2826 }
2827 start = 1;
2828 assert(nch > 1);
2829 }
2830
2831 e = ast_for_atom(c, CHILD(n, start));
2832 if (!e)
2833 return NULL;
2834 if (nch == 1)
2835 return e;
2836 if (start && nch == 2) {
2837 return Await(e, LINENO(n), n->n_col_offset,
2838 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2839 }
2840
2841 for (i = start + 1; i < nch; i++) {
2842 node *ch = CHILD(n, i);
2843 if (TYPE(ch) != trailer)
2844 break;
2845 e = ast_for_trailer(c, ch, e, CHILD(n, start));
2846 if (!e)
2847 return NULL;
2848 }
2849
2850 if (start) {
2851 /* there was an 'await' */
2852 return Await(e, LINENO(n), n->n_col_offset,
2853 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2854 }
2855 else {
2856 return e;
2857 }
2858 }
2859
2860 static expr_ty
ast_for_power(struct compiling * c,const node * n)2861 ast_for_power(struct compiling *c, const node *n)
2862 {
2863 /* power: atom trailer* ('**' factor)*
2864 */
2865 expr_ty e;
2866 REQ(n, power);
2867 e = ast_for_atom_expr(c, CHILD(n, 0));
2868 if (!e)
2869 return NULL;
2870 if (NCH(n) == 1)
2871 return e;
2872 if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
2873 expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
2874 if (!f)
2875 return NULL;
2876 e = BinOp(e, Pow, f, LINENO(n), n->n_col_offset,
2877 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2878 }
2879 return e;
2880 }
2881
2882 static expr_ty
ast_for_starred(struct compiling * c,const node * n)2883 ast_for_starred(struct compiling *c, const node *n)
2884 {
2885 expr_ty tmp;
2886 REQ(n, star_expr);
2887
2888 tmp = ast_for_expr(c, CHILD(n, 1));
2889 if (!tmp)
2890 return NULL;
2891
2892 /* The Load context is changed later. */
2893 return Starred(tmp, Load, LINENO(n), n->n_col_offset,
2894 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2895 }
2896
2897
2898 /* Do not name a variable 'expr'! Will cause a compile error.
2899 */
2900
2901 static expr_ty
ast_for_expr(struct compiling * c,const node * n)2902 ast_for_expr(struct compiling *c, const node *n)
2903 {
2904 /* handle the full range of simple expressions
2905 namedexpr_test: test [':=' test]
2906 test: or_test ['if' or_test 'else' test] | lambdef
2907 test_nocond: or_test | lambdef_nocond
2908 or_test: and_test ('or' and_test)*
2909 and_test: not_test ('and' not_test)*
2910 not_test: 'not' not_test | comparison
2911 comparison: expr (comp_op expr)*
2912 expr: xor_expr ('|' xor_expr)*
2913 xor_expr: and_expr ('^' and_expr)*
2914 and_expr: shift_expr ('&' shift_expr)*
2915 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
2916 arith_expr: term (('+'|'-') term)*
2917 term: factor (('*'|'@'|'/'|'%'|'//') factor)*
2918 factor: ('+'|'-'|'~') factor | power
2919 power: atom_expr ['**' factor]
2920 atom_expr: [AWAIT] atom trailer*
2921 yield_expr: 'yield' [yield_arg]
2922 */
2923
2924 asdl_seq *seq;
2925 int i;
2926
2927 loop:
2928 switch (TYPE(n)) {
2929 case namedexpr_test:
2930 if (NCH(n) == 3)
2931 return ast_for_namedexpr(c, n);
2932 /* Fallthrough */
2933 case test:
2934 case test_nocond:
2935 if (TYPE(CHILD(n, 0)) == lambdef ||
2936 TYPE(CHILD(n, 0)) == lambdef_nocond)
2937 return ast_for_lambdef(c, CHILD(n, 0));
2938 else if (NCH(n) > 1)
2939 return ast_for_ifexpr(c, n);
2940 /* Fallthrough */
2941 case or_test:
2942 case and_test:
2943 if (NCH(n) == 1) {
2944 n = CHILD(n, 0);
2945 goto loop;
2946 }
2947 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2948 if (!seq)
2949 return NULL;
2950 for (i = 0; i < NCH(n); i += 2) {
2951 expr_ty e = ast_for_expr(c, CHILD(n, i));
2952 if (!e)
2953 return NULL;
2954 asdl_seq_SET(seq, i / 2, e);
2955 }
2956 if (!strcmp(STR(CHILD(n, 1)), "and"))
2957 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
2958 n->n_end_lineno, n->n_end_col_offset,
2959 c->c_arena);
2960 assert(!strcmp(STR(CHILD(n, 1)), "or"));
2961 return BoolOp(Or, seq, LINENO(n), n->n_col_offset,
2962 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2963 case not_test:
2964 if (NCH(n) == 1) {
2965 n = CHILD(n, 0);
2966 goto loop;
2967 }
2968 else {
2969 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2970 if (!expression)
2971 return NULL;
2972
2973 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
2974 n->n_end_lineno, n->n_end_col_offset,
2975 c->c_arena);
2976 }
2977 case comparison:
2978 if (NCH(n) == 1) {
2979 n = CHILD(n, 0);
2980 goto loop;
2981 }
2982 else {
2983 expr_ty expression;
2984 asdl_int_seq *ops;
2985 asdl_seq *cmps;
2986 ops = _Py_asdl_int_seq_new(NCH(n) / 2, c->c_arena);
2987 if (!ops)
2988 return NULL;
2989 cmps = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
2990 if (!cmps) {
2991 return NULL;
2992 }
2993 for (i = 1; i < NCH(n); i += 2) {
2994 cmpop_ty newoperator;
2995
2996 newoperator = ast_for_comp_op(c, CHILD(n, i));
2997 if (!newoperator) {
2998 return NULL;
2999 }
3000
3001 expression = ast_for_expr(c, CHILD(n, i + 1));
3002 if (!expression) {
3003 return NULL;
3004 }
3005
3006 asdl_seq_SET(ops, i / 2, newoperator);
3007 asdl_seq_SET(cmps, i / 2, expression);
3008 }
3009 expression = ast_for_expr(c, CHILD(n, 0));
3010 if (!expression) {
3011 return NULL;
3012 }
3013
3014 return Compare(expression, ops, cmps, LINENO(n), n->n_col_offset,
3015 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3016 }
3017
3018 case star_expr:
3019 return ast_for_starred(c, n);
3020 /* The next five cases all handle BinOps. The main body of code
3021 is the same in each case, but the switch turned inside out to
3022 reuse the code for each type of operator.
3023 */
3024 case expr:
3025 case xor_expr:
3026 case and_expr:
3027 case shift_expr:
3028 case arith_expr:
3029 case term:
3030 if (NCH(n) == 1) {
3031 n = CHILD(n, 0);
3032 goto loop;
3033 }
3034 return ast_for_binop(c, n);
3035 case yield_expr: {
3036 node *an = NULL;
3037 node *en = NULL;
3038 int is_from = 0;
3039 expr_ty exp = NULL;
3040 if (NCH(n) > 1)
3041 an = CHILD(n, 1); /* yield_arg */
3042 if (an) {
3043 en = CHILD(an, NCH(an) - 1);
3044 if (NCH(an) == 2) {
3045 is_from = 1;
3046 exp = ast_for_expr(c, en);
3047 }
3048 else
3049 exp = ast_for_testlist(c, en);
3050 if (!exp)
3051 return NULL;
3052 }
3053 if (is_from)
3054 return YieldFrom(exp, LINENO(n), n->n_col_offset,
3055 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3056 return Yield(exp, LINENO(n), n->n_col_offset,
3057 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3058 }
3059 case factor:
3060 if (NCH(n) == 1) {
3061 n = CHILD(n, 0);
3062 goto loop;
3063 }
3064 return ast_for_factor(c, n);
3065 case power:
3066 return ast_for_power(c, n);
3067 default:
3068 PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
3069 return NULL;
3070 }
3071 /* should never get here unless if error is set */
3072 return NULL;
3073 }
3074
3075 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func,const node * start,const node * maybegenbeg,const node * closepar)3076 ast_for_call(struct compiling *c, const node *n, expr_ty func,
3077 const node *start, const node *maybegenbeg, const node *closepar)
3078 {
3079 /*
3080 arglist: argument (',' argument)* [',']
3081 argument: ( test [comp_for] | '*' test | test '=' test | '**' test )
3082 */
3083
3084 int i, nargs, nkeywords;
3085 int ndoublestars;
3086 asdl_seq *args;
3087 asdl_seq *keywords;
3088
3089 REQ(n, arglist);
3090
3091 nargs = 0;
3092 nkeywords = 0;
3093 for (i = 0; i < NCH(n); i++) {
3094 node *ch = CHILD(n, i);
3095 if (TYPE(ch) == argument) {
3096 if (NCH(ch) == 1)
3097 nargs++;
3098 else if (TYPE(CHILD(ch, 1)) == comp_for) {
3099 nargs++;
3100 if (!maybegenbeg) {
3101 ast_error(c, ch, "invalid syntax");
3102 return NULL;
3103 }
3104 if (NCH(n) > 1) {
3105 ast_error(c, ch, "Generator expression must be parenthesized");
3106 return NULL;
3107 }
3108 }
3109 else if (TYPE(CHILD(ch, 0)) == STAR)
3110 nargs++;
3111 else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
3112 nargs++;
3113 }
3114 else
3115 /* TYPE(CHILD(ch, 0)) == DOUBLESTAR or keyword argument */
3116 nkeywords++;
3117 }
3118 }
3119
3120 args = _Py_asdl_seq_new(nargs, c->c_arena);
3121 if (!args)
3122 return NULL;
3123 keywords = _Py_asdl_seq_new(nkeywords, c->c_arena);
3124 if (!keywords)
3125 return NULL;
3126
3127 nargs = 0; /* positional arguments + iterable argument unpackings */
3128 nkeywords = 0; /* keyword arguments + keyword argument unpackings */
3129 ndoublestars = 0; /* just keyword argument unpackings */
3130 for (i = 0; i < NCH(n); i++) {
3131 node *ch = CHILD(n, i);
3132 if (TYPE(ch) == argument) {
3133 expr_ty e;
3134 node *chch = CHILD(ch, 0);
3135 if (NCH(ch) == 1) {
3136 /* a positional argument */
3137 if (nkeywords) {
3138 if (ndoublestars) {
3139 ast_error(c, chch,
3140 "positional argument follows "
3141 "keyword argument unpacking");
3142 }
3143 else {
3144 ast_error(c, chch,
3145 "positional argument follows "
3146 "keyword argument");
3147 }
3148 return NULL;
3149 }
3150 e = ast_for_expr(c, chch);
3151 if (!e)
3152 return NULL;
3153 asdl_seq_SET(args, nargs++, e);
3154 }
3155 else if (TYPE(chch) == STAR) {
3156 /* an iterable argument unpacking */
3157 expr_ty starred;
3158 if (ndoublestars) {
3159 ast_error(c, chch,
3160 "iterable argument unpacking follows "
3161 "keyword argument unpacking");
3162 return NULL;
3163 }
3164 e = ast_for_expr(c, CHILD(ch, 1));
3165 if (!e)
3166 return NULL;
3167 starred = Starred(e, Load, LINENO(chch),
3168 chch->n_col_offset,
3169 e->end_lineno, e->end_col_offset,
3170 c->c_arena);
3171 if (!starred)
3172 return NULL;
3173 asdl_seq_SET(args, nargs++, starred);
3174
3175 }
3176 else if (TYPE(chch) == DOUBLESTAR) {
3177 /* a keyword argument unpacking */
3178 keyword_ty kw;
3179 i++;
3180 e = ast_for_expr(c, CHILD(ch, 1));
3181 if (!e)
3182 return NULL;
3183 kw = keyword(NULL, e, c->c_arena);
3184 asdl_seq_SET(keywords, nkeywords++, kw);
3185 ndoublestars++;
3186 }
3187 else if (TYPE(CHILD(ch, 1)) == comp_for) {
3188 /* the lone generator expression */
3189 e = copy_location(ast_for_genexp(c, ch), maybegenbeg, closepar);
3190 if (!e)
3191 return NULL;
3192 asdl_seq_SET(args, nargs++, e);
3193 }
3194 else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
3195 /* treat colon equal as positional argument */
3196 if (nkeywords) {
3197 if (ndoublestars) {
3198 ast_error(c, chch,
3199 "positional argument follows "
3200 "keyword argument unpacking");
3201 }
3202 else {
3203 ast_error(c, chch,
3204 "positional argument follows "
3205 "keyword argument");
3206 }
3207 return NULL;
3208 }
3209 e = ast_for_namedexpr(c, ch);
3210 if (!e)
3211 return NULL;
3212 asdl_seq_SET(args, nargs++, e);
3213 }
3214 else {
3215 /* a keyword argument */
3216 keyword_ty kw;
3217 identifier key, tmp;
3218 int k;
3219
3220 // To remain LL(1), the grammar accepts any test (basically, any
3221 // expression) in the keyword slot of a call site. So, we need
3222 // to manually enforce that the keyword is a NAME here.
3223 static const int name_tree[] = {
3224 test,
3225 or_test,
3226 and_test,
3227 not_test,
3228 comparison,
3229 expr,
3230 xor_expr,
3231 and_expr,
3232 shift_expr,
3233 arith_expr,
3234 term,
3235 factor,
3236 power,
3237 atom_expr,
3238 atom,
3239 0,
3240 };
3241 node *expr_node = chch;
3242 for (int i = 0; name_tree[i]; i++) {
3243 if (TYPE(expr_node) != name_tree[i])
3244 break;
3245 if (NCH(expr_node) != 1)
3246 break;
3247 expr_node = CHILD(expr_node, 0);
3248 }
3249 if (TYPE(expr_node) != NAME) {
3250 ast_error(c, chch,
3251 "expression cannot contain assignment, "
3252 "perhaps you meant \"==\"?");
3253 return NULL;
3254 }
3255 key = new_identifier(STR(expr_node), c);
3256 if (key == NULL) {
3257 return NULL;
3258 }
3259 if (forbidden_name(c, key, chch, 1)) {
3260 return NULL;
3261 }
3262 for (k = 0; k < nkeywords; k++) {
3263 tmp = ((keyword_ty)asdl_seq_GET(keywords, k))->arg;
3264 if (tmp && !PyUnicode_Compare(tmp, key)) {
3265 ast_error(c, chch,
3266 "keyword argument repeated");
3267 return NULL;
3268 }
3269 }
3270 e = ast_for_expr(c, CHILD(ch, 2));
3271 if (!e)
3272 return NULL;
3273 kw = keyword(key, e, c->c_arena);
3274 if (!kw)
3275 return NULL;
3276 asdl_seq_SET(keywords, nkeywords++, kw);
3277 }
3278 }
3279 }
3280
3281 return Call(func, args, keywords, LINENO(start), start->n_col_offset,
3282 closepar->n_end_lineno, closepar->n_end_col_offset, c->c_arena);
3283 }
3284
3285 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)3286 ast_for_testlist(struct compiling *c, const node* n)
3287 {
3288 /* testlist_comp: test (comp_for | (',' test)* [',']) */
3289 /* testlist: test (',' test)* [','] */
3290 assert(NCH(n) > 0);
3291 if (TYPE(n) == testlist_comp) {
3292 if (NCH(n) > 1)
3293 assert(TYPE(CHILD(n, 1)) != comp_for);
3294 }
3295 else {
3296 assert(TYPE(n) == testlist ||
3297 TYPE(n) == testlist_star_expr);
3298 }
3299 if (NCH(n) == 1)
3300 return ast_for_expr(c, CHILD(n, 0));
3301 else {
3302 asdl_seq *tmp = seq_for_testlist(c, n);
3303 if (!tmp)
3304 return NULL;
3305 return Tuple(tmp, Load, LINENO(n), n->n_col_offset,
3306 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3307 }
3308 }
3309
3310 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)3311 ast_for_expr_stmt(struct compiling *c, const node *n)
3312 {
3313 REQ(n, expr_stmt);
3314 /* expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
3315 [('=' (yield_expr|testlist_star_expr))+ [TYPE_COMMENT]] )
3316 annassign: ':' test ['=' (yield_expr|testlist)]
3317 testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
3318 augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
3319 '<<=' | '>>=' | '**=' | '//=')
3320 test: ... here starts the operator precedence dance
3321 */
3322 int num = NCH(n);
3323
3324 if (num == 1) {
3325 expr_ty e = ast_for_testlist(c, CHILD(n, 0));
3326 if (!e)
3327 return NULL;
3328
3329 return Expr(e, LINENO(n), n->n_col_offset,
3330 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3331 }
3332 else if (TYPE(CHILD(n, 1)) == augassign) {
3333 expr_ty expr1, expr2;
3334 operator_ty newoperator;
3335 node *ch = CHILD(n, 0);
3336
3337 expr1 = ast_for_testlist(c, ch);
3338 if (!expr1)
3339 return NULL;
3340 if(!set_context(c, expr1, Store, ch))
3341 return NULL;
3342 /* set_context checks that most expressions are not the left side.
3343 Augmented assignments can only have a name, a subscript, or an
3344 attribute on the left, though, so we have to explicitly check for
3345 those. */
3346 switch (expr1->kind) {
3347 case Name_kind:
3348 case Attribute_kind:
3349 case Subscript_kind:
3350 break;
3351 default:
3352 ast_error(c, ch, "illegal expression for augmented assignment");
3353 return NULL;
3354 }
3355
3356 ch = CHILD(n, 2);
3357 if (TYPE(ch) == testlist)
3358 expr2 = ast_for_testlist(c, ch);
3359 else
3360 expr2 = ast_for_expr(c, ch);
3361 if (!expr2)
3362 return NULL;
3363
3364 newoperator = ast_for_augassign(c, CHILD(n, 1));
3365 if (!newoperator)
3366 return NULL;
3367
3368 return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
3369 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3370 }
3371 else if (TYPE(CHILD(n, 1)) == annassign) {
3372 expr_ty expr1, expr2, expr3;
3373 node *ch = CHILD(n, 0);
3374 node *deep, *ann = CHILD(n, 1);
3375 int simple = 1;
3376
3377 /* AnnAssigns are only allowed in Python 3.6 or greater */
3378 if (c->c_feature_version < 6) {
3379 ast_error(c, ch,
3380 "Variable annotation syntax is only supported in Python 3.6 and greater");
3381 return NULL;
3382 }
3383
3384 /* we keep track of parens to qualify (x) as expression not name */
3385 deep = ch;
3386 while (NCH(deep) == 1) {
3387 deep = CHILD(deep, 0);
3388 }
3389 if (NCH(deep) > 0 && TYPE(CHILD(deep, 0)) == LPAR) {
3390 simple = 0;
3391 }
3392 expr1 = ast_for_testlist(c, ch);
3393 if (!expr1) {
3394 return NULL;
3395 }
3396 switch (expr1->kind) {
3397 case Name_kind:
3398 if (forbidden_name(c, expr1->v.Name.id, n, 0)) {
3399 return NULL;
3400 }
3401 expr1->v.Name.ctx = Store;
3402 break;
3403 case Attribute_kind:
3404 if (forbidden_name(c, expr1->v.Attribute.attr, n, 1)) {
3405 return NULL;
3406 }
3407 expr1->v.Attribute.ctx = Store;
3408 break;
3409 case Subscript_kind:
3410 expr1->v.Subscript.ctx = Store;
3411 break;
3412 case List_kind:
3413 ast_error(c, ch,
3414 "only single target (not list) can be annotated");
3415 return NULL;
3416 case Tuple_kind:
3417 ast_error(c, ch,
3418 "only single target (not tuple) can be annotated");
3419 return NULL;
3420 default:
3421 ast_error(c, ch,
3422 "illegal target for annotation");
3423 return NULL;
3424 }
3425
3426 if (expr1->kind != Name_kind) {
3427 simple = 0;
3428 }
3429 ch = CHILD(ann, 1);
3430 expr2 = ast_for_expr(c, ch);
3431 if (!expr2) {
3432 return NULL;
3433 }
3434 if (NCH(ann) == 2) {
3435 return AnnAssign(expr1, expr2, NULL, simple,
3436 LINENO(n), n->n_col_offset,
3437 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3438 }
3439 else {
3440 ch = CHILD(ann, 3);
3441 if (TYPE(ch) == testlist_star_expr) {
3442 expr3 = ast_for_testlist(c, ch);
3443 }
3444 else {
3445 expr3 = ast_for_expr(c, ch);
3446 }
3447 if (!expr3) {
3448 return NULL;
3449 }
3450 return AnnAssign(expr1, expr2, expr3, simple,
3451 LINENO(n), n->n_col_offset,
3452 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3453 }
3454 }
3455 else {
3456 int i, nch_minus_type, has_type_comment;
3457 asdl_seq *targets;
3458 node *value;
3459 expr_ty expression;
3460 string type_comment;
3461
3462 /* a normal assignment */
3463 REQ(CHILD(n, 1), EQUAL);
3464
3465 has_type_comment = TYPE(CHILD(n, num - 1)) == TYPE_COMMENT;
3466 nch_minus_type = num - has_type_comment;
3467
3468 targets = _Py_asdl_seq_new(nch_minus_type / 2, c->c_arena);
3469 if (!targets)
3470 return NULL;
3471 for (i = 0; i < nch_minus_type - 2; i += 2) {
3472 expr_ty e;
3473 node *ch = CHILD(n, i);
3474 if (TYPE(ch) == yield_expr) {
3475 ast_error(c, ch, "assignment to yield expression not possible");
3476 return NULL;
3477 }
3478 e = ast_for_testlist(c, ch);
3479 if (!e)
3480 return NULL;
3481
3482 /* set context to assign */
3483 if (!set_context(c, e, Store, CHILD(n, i)))
3484 return NULL;
3485
3486 asdl_seq_SET(targets, i / 2, e);
3487 }
3488 value = CHILD(n, nch_minus_type - 1);
3489 if (TYPE(value) == testlist_star_expr)
3490 expression = ast_for_testlist(c, value);
3491 else
3492 expression = ast_for_expr(c, value);
3493 if (!expression)
3494 return NULL;
3495 if (has_type_comment) {
3496 type_comment = NEW_TYPE_COMMENT(CHILD(n, nch_minus_type));
3497 if (!type_comment)
3498 return NULL;
3499 }
3500 else
3501 type_comment = NULL;
3502 return Assign(targets, expression, type_comment, LINENO(n), n->n_col_offset,
3503 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3504 }
3505 }
3506
3507
3508 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)3509 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
3510 {
3511 asdl_seq *seq;
3512 int i;
3513 expr_ty e;
3514
3515 REQ(n, exprlist);
3516
3517 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3518 if (!seq)
3519 return NULL;
3520 for (i = 0; i < NCH(n); i += 2) {
3521 e = ast_for_expr(c, CHILD(n, i));
3522 if (!e)
3523 return NULL;
3524 asdl_seq_SET(seq, i / 2, e);
3525 if (context && !set_context(c, e, context, CHILD(n, i)))
3526 return NULL;
3527 }
3528 return seq;
3529 }
3530
3531 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)3532 ast_for_del_stmt(struct compiling *c, const node *n)
3533 {
3534 asdl_seq *expr_list;
3535
3536 /* del_stmt: 'del' exprlist */
3537 REQ(n, del_stmt);
3538
3539 expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
3540 if (!expr_list)
3541 return NULL;
3542 return Delete(expr_list, LINENO(n), n->n_col_offset,
3543 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3544 }
3545
3546 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)3547 ast_for_flow_stmt(struct compiling *c, const node *n)
3548 {
3549 /*
3550 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
3551 | yield_stmt
3552 break_stmt: 'break'
3553 continue_stmt: 'continue'
3554 return_stmt: 'return' [testlist]
3555 yield_stmt: yield_expr
3556 yield_expr: 'yield' testlist | 'yield' 'from' test
3557 raise_stmt: 'raise' [test [',' test [',' test]]]
3558 */
3559 node *ch;
3560
3561 REQ(n, flow_stmt);
3562 ch = CHILD(n, 0);
3563 switch (TYPE(ch)) {
3564 case break_stmt:
3565 return Break(LINENO(n), n->n_col_offset,
3566 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3567 case continue_stmt:
3568 return Continue(LINENO(n), n->n_col_offset,
3569 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3570 case yield_stmt: { /* will reduce to yield_expr */
3571 expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
3572 if (!exp)
3573 return NULL;
3574 return Expr(exp, LINENO(n), n->n_col_offset,
3575 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3576 }
3577 case return_stmt:
3578 if (NCH(ch) == 1)
3579 return Return(NULL, LINENO(n), n->n_col_offset,
3580 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3581 else {
3582 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
3583 if (!expression)
3584 return NULL;
3585 return Return(expression, LINENO(n), n->n_col_offset,
3586 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3587 }
3588 case raise_stmt:
3589 if (NCH(ch) == 1)
3590 return Raise(NULL, NULL, LINENO(n), n->n_col_offset,
3591 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3592 else if (NCH(ch) >= 2) {
3593 expr_ty cause = NULL;
3594 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
3595 if (!expression)
3596 return NULL;
3597 if (NCH(ch) == 4) {
3598 cause = ast_for_expr(c, CHILD(ch, 3));
3599 if (!cause)
3600 return NULL;
3601 }
3602 return Raise(expression, cause, LINENO(n), n->n_col_offset,
3603 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3604 }
3605 /* fall through */
3606 default:
3607 PyErr_Format(PyExc_SystemError,
3608 "unexpected flow_stmt: %d", TYPE(ch));
3609 return NULL;
3610 }
3611 }
3612
3613 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)3614 alias_for_import_name(struct compiling *c, const node *n, int store)
3615 {
3616 /*
3617 import_as_name: NAME ['as' NAME]
3618 dotted_as_name: dotted_name ['as' NAME]
3619 dotted_name: NAME ('.' NAME)*
3620 */
3621 identifier str, name;
3622
3623 loop:
3624 switch (TYPE(n)) {
3625 case import_as_name: {
3626 node *name_node = CHILD(n, 0);
3627 str = NULL;
3628 name = NEW_IDENTIFIER(name_node);
3629 if (!name)
3630 return NULL;
3631 if (NCH(n) == 3) {
3632 node *str_node = CHILD(n, 2);
3633 str = NEW_IDENTIFIER(str_node);
3634 if (!str)
3635 return NULL;
3636 if (store && forbidden_name(c, str, str_node, 0))
3637 return NULL;
3638 }
3639 else {
3640 if (forbidden_name(c, name, name_node, 0))
3641 return NULL;
3642 }
3643 return alias(name, str, c->c_arena);
3644 }
3645 case dotted_as_name:
3646 if (NCH(n) == 1) {
3647 n = CHILD(n, 0);
3648 goto loop;
3649 }
3650 else {
3651 node *asname_node = CHILD(n, 2);
3652 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
3653 if (!a)
3654 return NULL;
3655 assert(!a->asname);
3656 a->asname = NEW_IDENTIFIER(asname_node);
3657 if (!a->asname)
3658 return NULL;
3659 if (forbidden_name(c, a->asname, asname_node, 0))
3660 return NULL;
3661 return a;
3662 }
3663 case dotted_name:
3664 if (NCH(n) == 1) {
3665 node *name_node = CHILD(n, 0);
3666 name = NEW_IDENTIFIER(name_node);
3667 if (!name)
3668 return NULL;
3669 if (store && forbidden_name(c, name, name_node, 0))
3670 return NULL;
3671 return alias(name, NULL, c->c_arena);
3672 }
3673 else {
3674 /* Create a string of the form "a.b.c" */
3675 int i;
3676 size_t len;
3677 char *s;
3678 PyObject *uni;
3679
3680 len = 0;
3681 for (i = 0; i < NCH(n); i += 2)
3682 /* length of string plus one for the dot */
3683 len += strlen(STR(CHILD(n, i))) + 1;
3684 len--; /* the last name doesn't have a dot */
3685 str = PyBytes_FromStringAndSize(NULL, len);
3686 if (!str)
3687 return NULL;
3688 s = PyBytes_AS_STRING(str);
3689 if (!s)
3690 return NULL;
3691 for (i = 0; i < NCH(n); i += 2) {
3692 char *sch = STR(CHILD(n, i));
3693 strcpy(s, STR(CHILD(n, i)));
3694 s += strlen(sch);
3695 *s++ = '.';
3696 }
3697 --s;
3698 *s = '\0';
3699 uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str),
3700 PyBytes_GET_SIZE(str),
3701 NULL);
3702 Py_DECREF(str);
3703 if (!uni)
3704 return NULL;
3705 str = uni;
3706 PyUnicode_InternInPlace(&str);
3707 if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3708 Py_DECREF(str);
3709 return NULL;
3710 }
3711 return alias(str, NULL, c->c_arena);
3712 }
3713 case STAR:
3714 str = PyUnicode_InternFromString("*");
3715 if (!str)
3716 return NULL;
3717 if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3718 Py_DECREF(str);
3719 return NULL;
3720 }
3721 return alias(str, NULL, c->c_arena);
3722 default:
3723 PyErr_Format(PyExc_SystemError,
3724 "unexpected import name: %d", TYPE(n));
3725 return NULL;
3726 }
3727
3728 PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
3729 return NULL;
3730 }
3731
3732 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)3733 ast_for_import_stmt(struct compiling *c, const node *n)
3734 {
3735 /*
3736 import_stmt: import_name | import_from
3737 import_name: 'import' dotted_as_names
3738 import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+)
3739 'import' ('*' | '(' import_as_names ')' | import_as_names)
3740 */
3741 int lineno;
3742 int col_offset;
3743 int i;
3744 asdl_seq *aliases;
3745
3746 REQ(n, import_stmt);
3747 lineno = LINENO(n);
3748 col_offset = n->n_col_offset;
3749 n = CHILD(n, 0);
3750 if (TYPE(n) == import_name) {
3751 n = CHILD(n, 1);
3752 REQ(n, dotted_as_names);
3753 aliases = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3754 if (!aliases)
3755 return NULL;
3756 for (i = 0; i < NCH(n); i += 2) {
3757 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3758 if (!import_alias)
3759 return NULL;
3760 asdl_seq_SET(aliases, i / 2, import_alias);
3761 }
3762 // Even though n is modified above, the end position is not changed
3763 return Import(aliases, lineno, col_offset,
3764 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3765 }
3766 else if (TYPE(n) == import_from) {
3767 int n_children;
3768 int idx, ndots = 0;
3769 const node *n_copy = n;
3770 alias_ty mod = NULL;
3771 identifier modname = NULL;
3772
3773 /* Count the number of dots (for relative imports) and check for the
3774 optional module name */
3775 for (idx = 1; idx < NCH(n); idx++) {
3776 if (TYPE(CHILD(n, idx)) == dotted_name) {
3777 mod = alias_for_import_name(c, CHILD(n, idx), 0);
3778 if (!mod)
3779 return NULL;
3780 idx++;
3781 break;
3782 } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) {
3783 /* three consecutive dots are tokenized as one ELLIPSIS */
3784 ndots += 3;
3785 continue;
3786 } else if (TYPE(CHILD(n, idx)) != DOT) {
3787 break;
3788 }
3789 ndots++;
3790 }
3791 idx++; /* skip over the 'import' keyword */
3792 switch (TYPE(CHILD(n, idx))) {
3793 case STAR:
3794 /* from ... import * */
3795 n = CHILD(n, idx);
3796 n_children = 1;
3797 break;
3798 case LPAR:
3799 /* from ... import (x, y, z) */
3800 n = CHILD(n, idx + 1);
3801 n_children = NCH(n);
3802 break;
3803 case import_as_names:
3804 /* from ... import x, y, z */
3805 n = CHILD(n, idx);
3806 n_children = NCH(n);
3807 if (n_children % 2 == 0) {
3808 ast_error(c, n,
3809 "trailing comma not allowed without"
3810 " surrounding parentheses");
3811 return NULL;
3812 }
3813 break;
3814 default:
3815 ast_error(c, n, "Unexpected node-type in from-import");
3816 return NULL;
3817 }
3818
3819 aliases = _Py_asdl_seq_new((n_children + 1) / 2, c->c_arena);
3820 if (!aliases)
3821 return NULL;
3822
3823 /* handle "from ... import *" special b/c there's no children */
3824 if (TYPE(n) == STAR) {
3825 alias_ty import_alias = alias_for_import_name(c, n, 1);
3826 if (!import_alias)
3827 return NULL;
3828 asdl_seq_SET(aliases, 0, import_alias);
3829 }
3830 else {
3831 for (i = 0; i < NCH(n); i += 2) {
3832 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3833 if (!import_alias)
3834 return NULL;
3835 asdl_seq_SET(aliases, i / 2, import_alias);
3836 }
3837 }
3838 if (mod != NULL)
3839 modname = mod->name;
3840 return ImportFrom(modname, aliases, ndots, lineno, col_offset,
3841 n_copy->n_end_lineno, n_copy->n_end_col_offset,
3842 c->c_arena);
3843 }
3844 PyErr_Format(PyExc_SystemError,
3845 "unknown import statement: starts with command '%s'",
3846 STR(CHILD(n, 0)));
3847 return NULL;
3848 }
3849
3850 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)3851 ast_for_global_stmt(struct compiling *c, const node *n)
3852 {
3853 /* global_stmt: 'global' NAME (',' NAME)* */
3854 identifier name;
3855 asdl_seq *s;
3856 int i;
3857
3858 REQ(n, global_stmt);
3859 s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3860 if (!s)
3861 return NULL;
3862 for (i = 1; i < NCH(n); i += 2) {
3863 name = NEW_IDENTIFIER(CHILD(n, i));
3864 if (!name)
3865 return NULL;
3866 asdl_seq_SET(s, i / 2, name);
3867 }
3868 return Global(s, LINENO(n), n->n_col_offset,
3869 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3870 }
3871
3872 static stmt_ty
ast_for_nonlocal_stmt(struct compiling * c,const node * n)3873 ast_for_nonlocal_stmt(struct compiling *c, const node *n)
3874 {
3875 /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */
3876 identifier name;
3877 asdl_seq *s;
3878 int i;
3879
3880 REQ(n, nonlocal_stmt);
3881 s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3882 if (!s)
3883 return NULL;
3884 for (i = 1; i < NCH(n); i += 2) {
3885 name = NEW_IDENTIFIER(CHILD(n, i));
3886 if (!name)
3887 return NULL;
3888 asdl_seq_SET(s, i / 2, name);
3889 }
3890 return Nonlocal(s, LINENO(n), n->n_col_offset,
3891 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3892 }
3893
3894 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)3895 ast_for_assert_stmt(struct compiling *c, const node *n)
3896 {
3897 /* assert_stmt: 'assert' test [',' test] */
3898 REQ(n, assert_stmt);
3899 if (NCH(n) == 2) {
3900 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
3901 if (!expression)
3902 return NULL;
3903 return Assert(expression, NULL, LINENO(n), n->n_col_offset,
3904 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3905 }
3906 else if (NCH(n) == 4) {
3907 expr_ty expr1, expr2;
3908
3909 expr1 = ast_for_expr(c, CHILD(n, 1));
3910 if (!expr1)
3911 return NULL;
3912 expr2 = ast_for_expr(c, CHILD(n, 3));
3913 if (!expr2)
3914 return NULL;
3915
3916 return Assert(expr1, expr2, LINENO(n), n->n_col_offset,
3917 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3918 }
3919 PyErr_Format(PyExc_SystemError,
3920 "improper number of parts to 'assert' statement: %d",
3921 NCH(n));
3922 return NULL;
3923 }
3924
3925 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)3926 ast_for_suite(struct compiling *c, const node *n)
3927 {
3928 /* suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
3929 asdl_seq *seq;
3930 stmt_ty s;
3931 int i, total, num, end, pos = 0;
3932 node *ch;
3933
3934 if (TYPE(n) != func_body_suite) {
3935 REQ(n, suite);
3936 }
3937
3938 total = num_stmts(n);
3939 seq = _Py_asdl_seq_new(total, c->c_arena);
3940 if (!seq)
3941 return NULL;
3942 if (TYPE(CHILD(n, 0)) == simple_stmt) {
3943 n = CHILD(n, 0);
3944 /* simple_stmt always ends with a NEWLINE,
3945 and may have a trailing SEMI
3946 */
3947 end = NCH(n) - 1;
3948 if (TYPE(CHILD(n, end - 1)) == SEMI)
3949 end--;
3950 /* loop by 2 to skip semi-colons */
3951 for (i = 0; i < end; i += 2) {
3952 ch = CHILD(n, i);
3953 s = ast_for_stmt(c, ch);
3954 if (!s)
3955 return NULL;
3956 asdl_seq_SET(seq, pos++, s);
3957 }
3958 }
3959 else {
3960 i = 2;
3961 if (TYPE(CHILD(n, 1)) == TYPE_COMMENT) {
3962 i += 2;
3963 REQ(CHILD(n, 2), NEWLINE);
3964 }
3965
3966 for (; i < (NCH(n) - 1); i++) {
3967 ch = CHILD(n, i);
3968 REQ(ch, stmt);
3969 num = num_stmts(ch);
3970 if (num == 1) {
3971 /* small_stmt or compound_stmt with only one child */
3972 s = ast_for_stmt(c, ch);
3973 if (!s)
3974 return NULL;
3975 asdl_seq_SET(seq, pos++, s);
3976 }
3977 else {
3978 int j;
3979 ch = CHILD(ch, 0);
3980 REQ(ch, simple_stmt);
3981 for (j = 0; j < NCH(ch); j += 2) {
3982 /* statement terminates with a semi-colon ';' */
3983 if (NCH(CHILD(ch, j)) == 0) {
3984 assert((j + 1) == NCH(ch));
3985 break;
3986 }
3987 s = ast_for_stmt(c, CHILD(ch, j));
3988 if (!s)
3989 return NULL;
3990 asdl_seq_SET(seq, pos++, s);
3991 }
3992 }
3993 }
3994 }
3995 assert(pos == seq->size);
3996 return seq;
3997 }
3998
3999 static void
get_last_end_pos(asdl_seq * s,int * end_lineno,int * end_col_offset)4000 get_last_end_pos(asdl_seq *s, int *end_lineno, int *end_col_offset)
4001 {
4002 Py_ssize_t tot = asdl_seq_LEN(s);
4003 // There must be no empty suites.
4004 assert(tot > 0);
4005 stmt_ty last = asdl_seq_GET(s, tot - 1);
4006 *end_lineno = last->end_lineno;
4007 *end_col_offset = last->end_col_offset;
4008 }
4009
4010 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)4011 ast_for_if_stmt(struct compiling *c, const node *n)
4012 {
4013 /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
4014 ['else' ':' suite]
4015 */
4016 char *s;
4017 int end_lineno, end_col_offset;
4018
4019 REQ(n, if_stmt);
4020
4021 if (NCH(n) == 4) {
4022 expr_ty expression;
4023 asdl_seq *suite_seq;
4024
4025 expression = ast_for_expr(c, CHILD(n, 1));
4026 if (!expression)
4027 return NULL;
4028 suite_seq = ast_for_suite(c, CHILD(n, 3));
4029 if (!suite_seq)
4030 return NULL;
4031 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4032
4033 return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
4034 end_lineno, end_col_offset, c->c_arena);
4035 }
4036
4037 s = STR(CHILD(n, 4));
4038 /* s[2], the third character in the string, will be
4039 's' for el_s_e, or
4040 'i' for el_i_f
4041 */
4042 if (s[2] == 's') {
4043 expr_ty expression;
4044 asdl_seq *seq1, *seq2;
4045
4046 expression = ast_for_expr(c, CHILD(n, 1));
4047 if (!expression)
4048 return NULL;
4049 seq1 = ast_for_suite(c, CHILD(n, 3));
4050 if (!seq1)
4051 return NULL;
4052 seq2 = ast_for_suite(c, CHILD(n, 6));
4053 if (!seq2)
4054 return NULL;
4055 get_last_end_pos(seq2, &end_lineno, &end_col_offset);
4056
4057 return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
4058 end_lineno, end_col_offset, c->c_arena);
4059 }
4060 else if (s[2] == 'i') {
4061 int i, n_elif, has_else = 0;
4062 expr_ty expression;
4063 asdl_seq *suite_seq;
4064 asdl_seq *orelse = NULL;
4065 n_elif = NCH(n) - 4;
4066 /* must reference the child n_elif+1 since 'else' token is third,
4067 not fourth, child from the end. */
4068 if (TYPE(CHILD(n, (n_elif + 1))) == NAME
4069 && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
4070 has_else = 1;
4071 n_elif -= 3;
4072 }
4073 n_elif /= 4;
4074
4075 if (has_else) {
4076 asdl_seq *suite_seq2;
4077
4078 orelse = _Py_asdl_seq_new(1, c->c_arena);
4079 if (!orelse)
4080 return NULL;
4081 expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
4082 if (!expression)
4083 return NULL;
4084 suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
4085 if (!suite_seq)
4086 return NULL;
4087 suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
4088 if (!suite_seq2)
4089 return NULL;
4090 get_last_end_pos(suite_seq2, &end_lineno, &end_col_offset);
4091
4092 asdl_seq_SET(orelse, 0,
4093 If(expression, suite_seq, suite_seq2,
4094 LINENO(CHILD(n, NCH(n) - 7)),
4095 CHILD(n, NCH(n) - 7)->n_col_offset,
4096 end_lineno, end_col_offset, c->c_arena));
4097 /* the just-created orelse handled the last elif */
4098 n_elif--;
4099 }
4100
4101 for (i = 0; i < n_elif; i++) {
4102 int off = 5 + (n_elif - i - 1) * 4;
4103 asdl_seq *newobj = _Py_asdl_seq_new(1, c->c_arena);
4104 if (!newobj)
4105 return NULL;
4106 expression = ast_for_expr(c, CHILD(n, off));
4107 if (!expression)
4108 return NULL;
4109 suite_seq = ast_for_suite(c, CHILD(n, off + 2));
4110 if (!suite_seq)
4111 return NULL;
4112
4113 if (orelse != NULL) {
4114 get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4115 } else {
4116 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4117 }
4118 asdl_seq_SET(newobj, 0,
4119 If(expression, suite_seq, orelse,
4120 LINENO(CHILD(n, off - 1)),
4121 CHILD(n, off - 1)->n_col_offset,
4122 end_lineno, end_col_offset, c->c_arena));
4123 orelse = newobj;
4124 }
4125 expression = ast_for_expr(c, CHILD(n, 1));
4126 if (!expression)
4127 return NULL;
4128 suite_seq = ast_for_suite(c, CHILD(n, 3));
4129 if (!suite_seq)
4130 return NULL;
4131 get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4132 return If(expression, suite_seq, orelse,
4133 LINENO(n), n->n_col_offset,
4134 end_lineno, end_col_offset, c->c_arena);
4135 }
4136
4137 PyErr_Format(PyExc_SystemError,
4138 "unexpected token in 'if' statement: %s", s);
4139 return NULL;
4140 }
4141
4142 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)4143 ast_for_while_stmt(struct compiling *c, const node *n)
4144 {
4145 /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
4146 REQ(n, while_stmt);
4147 int end_lineno, end_col_offset;
4148
4149 if (NCH(n) == 4) {
4150 expr_ty expression;
4151 asdl_seq *suite_seq;
4152
4153 expression = ast_for_expr(c, CHILD(n, 1));
4154 if (!expression)
4155 return NULL;
4156 suite_seq = ast_for_suite(c, CHILD(n, 3));
4157 if (!suite_seq)
4158 return NULL;
4159 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4160 return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
4161 end_lineno, end_col_offset, c->c_arena);
4162 }
4163 else if (NCH(n) == 7) {
4164 expr_ty expression;
4165 asdl_seq *seq1, *seq2;
4166
4167 expression = ast_for_expr(c, CHILD(n, 1));
4168 if (!expression)
4169 return NULL;
4170 seq1 = ast_for_suite(c, CHILD(n, 3));
4171 if (!seq1)
4172 return NULL;
4173 seq2 = ast_for_suite(c, CHILD(n, 6));
4174 if (!seq2)
4175 return NULL;
4176 get_last_end_pos(seq2, &end_lineno, &end_col_offset);
4177
4178 return While(expression, seq1, seq2, LINENO(n), n->n_col_offset,
4179 end_lineno, end_col_offset, c->c_arena);
4180 }
4181
4182 PyErr_Format(PyExc_SystemError,
4183 "wrong number of tokens for 'while' statement: %d",
4184 NCH(n));
4185 return NULL;
4186 }
4187
4188 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n0,bool is_async)4189 ast_for_for_stmt(struct compiling *c, const node *n0, bool is_async)
4190 {
4191 const node * const n = is_async ? CHILD(n0, 1) : n0;
4192 asdl_seq *_target, *seq = NULL, *suite_seq;
4193 expr_ty expression;
4194 expr_ty target, first;
4195 const node *node_target;
4196 int end_lineno, end_col_offset;
4197 int has_type_comment;
4198 string type_comment;
4199
4200 if (is_async && c->c_feature_version < 5) {
4201 ast_error(c, n,
4202 "Async for loops are only supported in Python 3.5 and greater");
4203 return NULL;
4204 }
4205
4206 /* for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite] */
4207 REQ(n, for_stmt);
4208
4209 has_type_comment = TYPE(CHILD(n, 5)) == TYPE_COMMENT;
4210
4211 if (NCH(n) == 9 + has_type_comment) {
4212 seq = ast_for_suite(c, CHILD(n, 8 + has_type_comment));
4213 if (!seq)
4214 return NULL;
4215 }
4216
4217 node_target = CHILD(n, 1);
4218 _target = ast_for_exprlist(c, node_target, Store);
4219 if (!_target)
4220 return NULL;
4221 /* Check the # of children rather than the length of _target, since
4222 for x, in ... has 1 element in _target, but still requires a Tuple. */
4223 first = (expr_ty)asdl_seq_GET(_target, 0);
4224 if (NCH(node_target) == 1)
4225 target = first;
4226 else
4227 target = Tuple(_target, Store, first->lineno, first->col_offset,
4228 node_target->n_end_lineno, node_target->n_end_col_offset,
4229 c->c_arena);
4230
4231 expression = ast_for_testlist(c, CHILD(n, 3));
4232 if (!expression)
4233 return NULL;
4234 suite_seq = ast_for_suite(c, CHILD(n, 5 + has_type_comment));
4235 if (!suite_seq)
4236 return NULL;
4237
4238 if (seq != NULL) {
4239 get_last_end_pos(seq, &end_lineno, &end_col_offset);
4240 } else {
4241 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4242 }
4243
4244 if (has_type_comment) {
4245 type_comment = NEW_TYPE_COMMENT(CHILD(n, 5));
4246 if (!type_comment)
4247 return NULL;
4248 }
4249 else
4250 type_comment = NULL;
4251
4252 if (is_async)
4253 return AsyncFor(target, expression, suite_seq, seq, type_comment,
4254 LINENO(n0), n0->n_col_offset,
4255 end_lineno, end_col_offset, c->c_arena);
4256 else
4257 return For(target, expression, suite_seq, seq, type_comment,
4258 LINENO(n), n->n_col_offset,
4259 end_lineno, end_col_offset, c->c_arena);
4260 }
4261
4262 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)4263 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
4264 {
4265 /* except_clause: 'except' [test ['as' test]] */
4266 int end_lineno, end_col_offset;
4267 REQ(exc, except_clause);
4268 REQ(body, suite);
4269
4270 if (NCH(exc) == 1) {
4271 asdl_seq *suite_seq = ast_for_suite(c, body);
4272 if (!suite_seq)
4273 return NULL;
4274 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4275
4276 return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
4277 exc->n_col_offset,
4278 end_lineno, end_col_offset, c->c_arena);
4279 }
4280 else if (NCH(exc) == 2) {
4281 expr_ty expression;
4282 asdl_seq *suite_seq;
4283
4284 expression = ast_for_expr(c, CHILD(exc, 1));
4285 if (!expression)
4286 return NULL;
4287 suite_seq = ast_for_suite(c, body);
4288 if (!suite_seq)
4289 return NULL;
4290 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4291
4292 return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
4293 exc->n_col_offset,
4294 end_lineno, end_col_offset, c->c_arena);
4295 }
4296 else if (NCH(exc) == 4) {
4297 asdl_seq *suite_seq;
4298 expr_ty expression;
4299 identifier e = NEW_IDENTIFIER(CHILD(exc, 3));
4300 if (!e)
4301 return NULL;
4302 if (forbidden_name(c, e, CHILD(exc, 3), 0))
4303 return NULL;
4304 expression = ast_for_expr(c, CHILD(exc, 1));
4305 if (!expression)
4306 return NULL;
4307 suite_seq = ast_for_suite(c, body);
4308 if (!suite_seq)
4309 return NULL;
4310 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4311
4312 return ExceptHandler(expression, e, suite_seq, LINENO(exc),
4313 exc->n_col_offset,
4314 end_lineno, end_col_offset, c->c_arena);
4315 }
4316
4317 PyErr_Format(PyExc_SystemError,
4318 "wrong number of children for 'except' clause: %d",
4319 NCH(exc));
4320 return NULL;
4321 }
4322
4323 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)4324 ast_for_try_stmt(struct compiling *c, const node *n)
4325 {
4326 const int nch = NCH(n);
4327 int end_lineno, end_col_offset, n_except = (nch - 3)/3;
4328 asdl_seq *body, *handlers = NULL, *orelse = NULL, *finally = NULL;
4329 excepthandler_ty last_handler;
4330
4331 REQ(n, try_stmt);
4332
4333 body = ast_for_suite(c, CHILD(n, 2));
4334 if (body == NULL)
4335 return NULL;
4336
4337 if (TYPE(CHILD(n, nch - 3)) == NAME) {
4338 if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
4339 if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
4340 /* we can assume it's an "else",
4341 because nch >= 9 for try-else-finally and
4342 it would otherwise have a type of except_clause */
4343 orelse = ast_for_suite(c, CHILD(n, nch - 4));
4344 if (orelse == NULL)
4345 return NULL;
4346 n_except--;
4347 }
4348
4349 finally = ast_for_suite(c, CHILD(n, nch - 1));
4350 if (finally == NULL)
4351 return NULL;
4352 n_except--;
4353 }
4354 else {
4355 /* we can assume it's an "else",
4356 otherwise it would have a type of except_clause */
4357 orelse = ast_for_suite(c, CHILD(n, nch - 1));
4358 if (orelse == NULL)
4359 return NULL;
4360 n_except--;
4361 }
4362 }
4363 else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
4364 ast_error(c, n, "malformed 'try' statement");
4365 return NULL;
4366 }
4367
4368 if (n_except > 0) {
4369 int i;
4370 /* process except statements to create a try ... except */
4371 handlers = _Py_asdl_seq_new(n_except, c->c_arena);
4372 if (handlers == NULL)
4373 return NULL;
4374
4375 for (i = 0; i < n_except; i++) {
4376 excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
4377 CHILD(n, 5 + i * 3));
4378 if (!e)
4379 return NULL;
4380 asdl_seq_SET(handlers, i, e);
4381 }
4382 }
4383
4384 assert(finally != NULL || asdl_seq_LEN(handlers));
4385 if (finally != NULL) {
4386 // finally is always last
4387 get_last_end_pos(finally, &end_lineno, &end_col_offset);
4388 } else if (orelse != NULL) {
4389 // otherwise else is last
4390 get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4391 } else {
4392 // inline the get_last_end_pos logic due to layout mismatch
4393 last_handler = (excepthandler_ty) asdl_seq_GET(handlers, n_except - 1);
4394 end_lineno = last_handler->end_lineno;
4395 end_col_offset = last_handler->end_col_offset;
4396 }
4397 return Try(body, handlers, orelse, finally, LINENO(n), n->n_col_offset,
4398 end_lineno, end_col_offset, c->c_arena);
4399 }
4400
4401 /* with_item: test ['as' expr] */
4402 static withitem_ty
ast_for_with_item(struct compiling * c,const node * n)4403 ast_for_with_item(struct compiling *c, const node *n)
4404 {
4405 expr_ty context_expr, optional_vars = NULL;
4406
4407 REQ(n, with_item);
4408 context_expr = ast_for_expr(c, CHILD(n, 0));
4409 if (!context_expr)
4410 return NULL;
4411 if (NCH(n) == 3) {
4412 optional_vars = ast_for_expr(c, CHILD(n, 2));
4413
4414 if (!optional_vars) {
4415 return NULL;
4416 }
4417 if (!set_context(c, optional_vars, Store, n)) {
4418 return NULL;
4419 }
4420 }
4421
4422 return withitem(context_expr, optional_vars, c->c_arena);
4423 }
4424
4425 /* with_stmt: 'with' with_item (',' with_item)* ':' [TYPE_COMMENT] suite */
4426 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n0,bool is_async)4427 ast_for_with_stmt(struct compiling *c, const node *n0, bool is_async)
4428 {
4429 const node * const n = is_async ? CHILD(n0, 1) : n0;
4430 int i, n_items, nch_minus_type, has_type_comment, end_lineno, end_col_offset;
4431 asdl_seq *items, *body;
4432 string type_comment;
4433
4434 if (is_async && c->c_feature_version < 5) {
4435 ast_error(c, n,
4436 "Async with statements are only supported in Python 3.5 and greater");
4437 return NULL;
4438 }
4439
4440 REQ(n, with_stmt);
4441
4442 has_type_comment = TYPE(CHILD(n, NCH(n) - 2)) == TYPE_COMMENT;
4443 nch_minus_type = NCH(n) - has_type_comment;
4444
4445 n_items = (nch_minus_type - 2) / 2;
4446 items = _Py_asdl_seq_new(n_items, c->c_arena);
4447 if (!items)
4448 return NULL;
4449 for (i = 1; i < nch_minus_type - 2; i += 2) {
4450 withitem_ty item = ast_for_with_item(c, CHILD(n, i));
4451 if (!item)
4452 return NULL;
4453 asdl_seq_SET(items, (i - 1) / 2, item);
4454 }
4455
4456 body = ast_for_suite(c, CHILD(n, NCH(n) - 1));
4457 if (!body)
4458 return NULL;
4459 get_last_end_pos(body, &end_lineno, &end_col_offset);
4460
4461 if (has_type_comment) {
4462 type_comment = NEW_TYPE_COMMENT(CHILD(n, NCH(n) - 2));
4463 if (!type_comment)
4464 return NULL;
4465 }
4466 else
4467 type_comment = NULL;
4468
4469 if (is_async)
4470 return AsyncWith(items, body, type_comment, LINENO(n0), n0->n_col_offset,
4471 end_lineno, end_col_offset, c->c_arena);
4472 else
4473 return With(items, body, type_comment, LINENO(n), n->n_col_offset,
4474 end_lineno, end_col_offset, c->c_arena);
4475 }
4476
4477 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)4478 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
4479 {
4480 /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */
4481 PyObject *classname;
4482 asdl_seq *s;
4483 expr_ty call;
4484 int end_lineno, end_col_offset;
4485
4486 REQ(n, classdef);
4487
4488 if (NCH(n) == 4) { /* class NAME ':' suite */
4489 s = ast_for_suite(c, CHILD(n, 3));
4490 if (!s)
4491 return NULL;
4492 get_last_end_pos(s, &end_lineno, &end_col_offset);
4493
4494 classname = NEW_IDENTIFIER(CHILD(n, 1));
4495 if (!classname)
4496 return NULL;
4497 if (forbidden_name(c, classname, CHILD(n, 3), 0))
4498 return NULL;
4499 return ClassDef(classname, NULL, NULL, s, decorator_seq,
4500 LINENO(n), n->n_col_offset,
4501 end_lineno, end_col_offset, c->c_arena);
4502 }
4503
4504 if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */
4505 s = ast_for_suite(c, CHILD(n, 5));
4506 if (!s)
4507 return NULL;
4508 get_last_end_pos(s, &end_lineno, &end_col_offset);
4509
4510 classname = NEW_IDENTIFIER(CHILD(n, 1));
4511 if (!classname)
4512 return NULL;
4513 if (forbidden_name(c, classname, CHILD(n, 3), 0))
4514 return NULL;
4515 return ClassDef(classname, NULL, NULL, s, decorator_seq,
4516 LINENO(n), n->n_col_offset,
4517 end_lineno, end_col_offset, c->c_arena);
4518 }
4519
4520 /* class NAME '(' arglist ')' ':' suite */
4521 /* build up a fake Call node so we can extract its pieces */
4522 {
4523 PyObject *dummy_name;
4524 expr_ty dummy;
4525 dummy_name = NEW_IDENTIFIER(CHILD(n, 1));
4526 if (!dummy_name)
4527 return NULL;
4528 dummy = Name(dummy_name, Load, LINENO(n), n->n_col_offset,
4529 CHILD(n, 1)->n_end_lineno, CHILD(n, 1)->n_end_col_offset,
4530 c->c_arena);
4531 call = ast_for_call(c, CHILD(n, 3), dummy,
4532 CHILD(n, 1), NULL, CHILD(n, 4));
4533 if (!call)
4534 return NULL;
4535 }
4536 s = ast_for_suite(c, CHILD(n, 6));
4537 if (!s)
4538 return NULL;
4539 get_last_end_pos(s, &end_lineno, &end_col_offset);
4540
4541 classname = NEW_IDENTIFIER(CHILD(n, 1));
4542 if (!classname)
4543 return NULL;
4544 if (forbidden_name(c, classname, CHILD(n, 1), 0))
4545 return NULL;
4546
4547 return ClassDef(classname, call->v.Call.args, call->v.Call.keywords, s,
4548 decorator_seq, LINENO(n), n->n_col_offset,
4549 end_lineno, end_col_offset, c->c_arena);
4550 }
4551
4552 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)4553 ast_for_stmt(struct compiling *c, const node *n)
4554 {
4555 if (TYPE(n) == stmt) {
4556 assert(NCH(n) == 1);
4557 n = CHILD(n, 0);
4558 }
4559 if (TYPE(n) == simple_stmt) {
4560 assert(num_stmts(n) == 1);
4561 n = CHILD(n, 0);
4562 }
4563 if (TYPE(n) == small_stmt) {
4564 n = CHILD(n, 0);
4565 /* small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt
4566 | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
4567 */
4568 switch (TYPE(n)) {
4569 case expr_stmt:
4570 return ast_for_expr_stmt(c, n);
4571 case del_stmt:
4572 return ast_for_del_stmt(c, n);
4573 case pass_stmt:
4574 return Pass(LINENO(n), n->n_col_offset,
4575 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
4576 case flow_stmt:
4577 return ast_for_flow_stmt(c, n);
4578 case import_stmt:
4579 return ast_for_import_stmt(c, n);
4580 case global_stmt:
4581 return ast_for_global_stmt(c, n);
4582 case nonlocal_stmt:
4583 return ast_for_nonlocal_stmt(c, n);
4584 case assert_stmt:
4585 return ast_for_assert_stmt(c, n);
4586 default:
4587 PyErr_Format(PyExc_SystemError,
4588 "unhandled small_stmt: TYPE=%d NCH=%d\n",
4589 TYPE(n), NCH(n));
4590 return NULL;
4591 }
4592 }
4593 else {
4594 /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
4595 | funcdef | classdef | decorated | async_stmt
4596 */
4597 node *ch = CHILD(n, 0);
4598 REQ(n, compound_stmt);
4599 switch (TYPE(ch)) {
4600 case if_stmt:
4601 return ast_for_if_stmt(c, ch);
4602 case while_stmt:
4603 return ast_for_while_stmt(c, ch);
4604 case for_stmt:
4605 return ast_for_for_stmt(c, ch, 0);
4606 case try_stmt:
4607 return ast_for_try_stmt(c, ch);
4608 case with_stmt:
4609 return ast_for_with_stmt(c, ch, 0);
4610 case funcdef:
4611 return ast_for_funcdef(c, ch, NULL);
4612 case classdef:
4613 return ast_for_classdef(c, ch, NULL);
4614 case decorated:
4615 return ast_for_decorated(c, ch);
4616 case async_stmt:
4617 return ast_for_async_stmt(c, ch);
4618 default:
4619 PyErr_Format(PyExc_SystemError,
4620 "unhandled compound_stmt: TYPE=%d NCH=%d\n",
4621 TYPE(n), NCH(n));
4622 return NULL;
4623 }
4624 }
4625 }
4626
4627 static PyObject *
parsenumber_raw(struct compiling * c,const char * s)4628 parsenumber_raw(struct compiling *c, const char *s)
4629 {
4630 const char *end;
4631 long x;
4632 double dx;
4633 Py_complex compl;
4634 int imflag;
4635
4636 assert(s != NULL);
4637 errno = 0;
4638 end = s + strlen(s) - 1;
4639 imflag = *end == 'j' || *end == 'J';
4640 if (s[0] == '0') {
4641 x = (long) PyOS_strtoul(s, (char **)&end, 0);
4642 if (x < 0 && errno == 0) {
4643 return PyLong_FromString(s, (char **)0, 0);
4644 }
4645 }
4646 else
4647 x = PyOS_strtol(s, (char **)&end, 0);
4648 if (*end == '\0') {
4649 if (errno != 0)
4650 return PyLong_FromString(s, (char **)0, 0);
4651 return PyLong_FromLong(x);
4652 }
4653 /* XXX Huge floats may silently fail */
4654 if (imflag) {
4655 compl.real = 0.;
4656 compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
4657 if (compl.imag == -1.0 && PyErr_Occurred())
4658 return NULL;
4659 return PyComplex_FromCComplex(compl);
4660 }
4661 else
4662 {
4663 dx = PyOS_string_to_double(s, NULL, NULL);
4664 if (dx == -1.0 && PyErr_Occurred())
4665 return NULL;
4666 return PyFloat_FromDouble(dx);
4667 }
4668 }
4669
4670 static PyObject *
parsenumber(struct compiling * c,const char * s)4671 parsenumber(struct compiling *c, const char *s)
4672 {
4673 char *dup, *end;
4674 PyObject *res = NULL;
4675
4676 assert(s != NULL);
4677
4678 if (strchr(s, '_') == NULL) {
4679 return parsenumber_raw(c, s);
4680 }
4681 /* Create a duplicate without underscores. */
4682 dup = PyMem_Malloc(strlen(s) + 1);
4683 if (dup == NULL) {
4684 return PyErr_NoMemory();
4685 }
4686 end = dup;
4687 for (; *s; s++) {
4688 if (*s != '_') {
4689 *end++ = *s;
4690 }
4691 }
4692 *end = '\0';
4693 res = parsenumber_raw(c, dup);
4694 PyMem_Free(dup);
4695 return res;
4696 }
4697
4698 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end)4699 decode_utf8(struct compiling *c, const char **sPtr, const char *end)
4700 {
4701 const char *s, *t;
4702 t = s = *sPtr;
4703 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
4704 while (s < end && (*s & 0x80)) s++;
4705 *sPtr = s;
4706 return PyUnicode_DecodeUTF8(t, s - t, NULL);
4707 }
4708
4709 static int
warn_invalid_escape_sequence(struct compiling * c,const node * n,unsigned char first_invalid_escape_char)4710 warn_invalid_escape_sequence(struct compiling *c, const node *n,
4711 unsigned char first_invalid_escape_char)
4712 {
4713 PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
4714 first_invalid_escape_char);
4715 if (msg == NULL) {
4716 return -1;
4717 }
4718 if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
4719 c->c_filename, LINENO(n),
4720 NULL, NULL) < 0)
4721 {
4722 if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
4723 /* Replace the DeprecationWarning exception with a SyntaxError
4724 to get a more accurate error report */
4725 PyErr_Clear();
4726 ast_error(c, n, "%U", msg);
4727 }
4728 Py_DECREF(msg);
4729 return -1;
4730 }
4731 Py_DECREF(msg);
4732 return 0;
4733 }
4734
4735 static PyObject *
decode_unicode_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4736 decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
4737 size_t len)
4738 {
4739 PyObject *v, *u;
4740 char *buf;
4741 char *p;
4742 const char *end;
4743
4744 /* check for integer overflow */
4745 if (len > SIZE_MAX / 6)
4746 return NULL;
4747 /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
4748 "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
4749 u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
4750 if (u == NULL)
4751 return NULL;
4752 p = buf = PyBytes_AsString(u);
4753 end = s + len;
4754 while (s < end) {
4755 if (*s == '\\') {
4756 *p++ = *s++;
4757 if (s >= end || *s & 0x80) {
4758 strcpy(p, "u005c");
4759 p += 5;
4760 if (s >= end)
4761 break;
4762 }
4763 }
4764 if (*s & 0x80) { /* XXX inefficient */
4765 PyObject *w;
4766 int kind;
4767 void *data;
4768 Py_ssize_t len, i;
4769 w = decode_utf8(c, &s, end);
4770 if (w == NULL) {
4771 Py_DECREF(u);
4772 return NULL;
4773 }
4774 kind = PyUnicode_KIND(w);
4775 data = PyUnicode_DATA(w);
4776 len = PyUnicode_GET_LENGTH(w);
4777 for (i = 0; i < len; i++) {
4778 Py_UCS4 chr = PyUnicode_READ(kind, data, i);
4779 sprintf(p, "\\U%08x", chr);
4780 p += 10;
4781 }
4782 /* Should be impossible to overflow */
4783 assert(p - buf <= PyBytes_GET_SIZE(u));
4784 Py_DECREF(w);
4785 } else {
4786 *p++ = *s++;
4787 }
4788 }
4789 len = p - buf;
4790 s = buf;
4791
4792 const char *first_invalid_escape;
4793 v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
4794
4795 if (v != NULL && first_invalid_escape != NULL) {
4796 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4797 /* We have not decref u before because first_invalid_escape points
4798 inside u. */
4799 Py_XDECREF(u);
4800 Py_DECREF(v);
4801 return NULL;
4802 }
4803 }
4804 Py_XDECREF(u);
4805 return v;
4806 }
4807
4808 static PyObject *
decode_bytes_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4809 decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
4810 size_t len)
4811 {
4812 const char *first_invalid_escape;
4813 PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
4814 &first_invalid_escape);
4815 if (result == NULL)
4816 return NULL;
4817
4818 if (first_invalid_escape != NULL) {
4819 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4820 Py_DECREF(result);
4821 return NULL;
4822 }
4823 }
4824 return result;
4825 }
4826
4827 /* Shift locations for the given node and all its children by adding `lineno`
4828 and `col_offset` to existing locations. */
fstring_shift_node_locations(node * n,int lineno,int col_offset)4829 static void fstring_shift_node_locations(node *n, int lineno, int col_offset)
4830 {
4831 n->n_col_offset = n->n_col_offset + col_offset;
4832 n->n_end_col_offset = n->n_end_col_offset + col_offset;
4833 for (int i = 0; i < NCH(n); ++i) {
4834 if (n->n_lineno && n->n_lineno < CHILD(n, i)->n_lineno) {
4835 /* Shifting column offsets unnecessary if there's been newlines. */
4836 col_offset = 0;
4837 }
4838 fstring_shift_node_locations(CHILD(n, i), lineno, col_offset);
4839 }
4840 n->n_lineno = n->n_lineno + lineno;
4841 n->n_end_lineno = n->n_end_lineno + lineno;
4842 }
4843
4844 /* Fix locations for the given node and its children.
4845
4846 `parent` is the enclosing node.
4847 `n` is the node which locations are going to be fixed relative to parent.
4848 `expr_str` is the child node's string representation, including braces.
4849 */
4850 static void
fstring_fix_node_location(const node * parent,node * n,char * expr_str)4851 fstring_fix_node_location(const node *parent, node *n, char *expr_str)
4852 {
4853 char *substr = NULL;
4854 char *start;
4855 int lines = LINENO(parent) - 1;
4856 int cols = parent->n_col_offset;
4857 /* Find the full fstring to fix location information in `n`. */
4858 while (parent && parent->n_type != STRING)
4859 parent = parent->n_child;
4860 if (parent && parent->n_str) {
4861 substr = strstr(parent->n_str, expr_str);
4862 if (substr) {
4863 start = substr;
4864 while (start > parent->n_str) {
4865 if (start[0] == '\n')
4866 break;
4867 start--;
4868 }
4869 cols += (int)(substr - start);
4870 /* adjust the start based on the number of newlines encountered
4871 before the f-string expression */
4872 for (char* p = parent->n_str; p < substr; p++) {
4873 if (*p == '\n') {
4874 lines++;
4875 }
4876 }
4877 }
4878 }
4879 fstring_shift_node_locations(n, lines, cols);
4880 }
4881
4882 /* Compile this expression in to an expr_ty. Add parens around the
4883 expression, in order to allow leading spaces in the expression. */
4884 static expr_ty
fstring_compile_expr(const char * expr_start,const char * expr_end,struct compiling * c,const node * n)4885 fstring_compile_expr(const char *expr_start, const char *expr_end,
4886 struct compiling *c, const node *n)
4887
4888 {
4889 node *mod_n;
4890 mod_ty mod;
4891 char *str;
4892 Py_ssize_t len;
4893 const char *s;
4894
4895 assert(expr_end >= expr_start);
4896 assert(*(expr_start-1) == '{');
4897 assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':' ||
4898 *expr_end == '=');
4899
4900 /* If the substring is all whitespace, it's an error. We need to catch this
4901 here, and not when we call PyParser_SimpleParseStringFlagsFilename,
4902 because turning the expression '' in to '()' would go from being invalid
4903 to valid. */
4904 for (s = expr_start; s != expr_end; s++) {
4905 char c = *s;
4906 /* The Python parser ignores only the following whitespace
4907 characters (\r already is converted to \n). */
4908 if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) {
4909 break;
4910 }
4911 }
4912 if (s == expr_end) {
4913 ast_error(c, n, "f-string: empty expression not allowed");
4914 return NULL;
4915 }
4916
4917 len = expr_end - expr_start;
4918 /* Allocate 3 extra bytes: open paren, close paren, null byte. */
4919 str = PyMem_Malloc(len + 3);
4920 if (str == NULL) {
4921 PyErr_NoMemory();
4922 return NULL;
4923 }
4924
4925 str[0] = '(';
4926 memcpy(str+1, expr_start, len);
4927 str[len+1] = ')';
4928 str[len+2] = 0;
4929
4930 PyCompilerFlags cf = _PyCompilerFlags_INIT;
4931 cf.cf_flags = PyCF_ONLY_AST;
4932 mod_n = PyParser_SimpleParseStringFlagsFilename(str, "<fstring>",
4933 Py_eval_input, 0);
4934 if (!mod_n) {
4935 PyMem_Free(str);
4936 return NULL;
4937 }
4938 /* Reuse str to find the correct column offset. */
4939 str[0] = '{';
4940 str[len+1] = '}';
4941 fstring_fix_node_location(n, mod_n, str);
4942 mod = PyAST_FromNode(mod_n, &cf, "<fstring>", c->c_arena);
4943 PyMem_Free(str);
4944 PyNode_Free(mod_n);
4945 if (!mod)
4946 return NULL;
4947 return mod->v.Expression.body;
4948 }
4949
4950 /* Return -1 on error.
4951
4952 Return 0 if we reached the end of the literal.
4953
4954 Return 1 if we haven't reached the end of the literal, but we want
4955 the caller to process the literal up to this point. Used for
4956 doubled braces.
4957 */
4958 static int
fstring_find_literal(const char ** str,const char * end,int raw,PyObject ** literal,int recurse_lvl,struct compiling * c,const node * n)4959 fstring_find_literal(const char **str, const char *end, int raw,
4960 PyObject **literal, int recurse_lvl,
4961 struct compiling *c, const node *n)
4962 {
4963 /* Get any literal string. It ends when we hit an un-doubled left
4964 brace (which isn't part of a unicode name escape such as
4965 "\N{EULER CONSTANT}"), or the end of the string. */
4966
4967 const char *s = *str;
4968 const char *literal_start = s;
4969 int result = 0;
4970
4971 assert(*literal == NULL);
4972 while (s < end) {
4973 char ch = *s++;
4974 if (!raw && ch == '\\' && s < end) {
4975 ch = *s++;
4976 if (ch == 'N') {
4977 if (s < end && *s++ == '{') {
4978 while (s < end && *s++ != '}') {
4979 }
4980 continue;
4981 }
4982 break;
4983 }
4984 if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
4985 return -1;
4986 }
4987 }
4988 if (ch == '{' || ch == '}') {
4989 /* Check for doubled braces, but only at the top level. If
4990 we checked at every level, then f'{0:{3}}' would fail
4991 with the two closing braces. */
4992 if (recurse_lvl == 0) {
4993 if (s < end && *s == ch) {
4994 /* We're going to tell the caller that the literal ends
4995 here, but that they should continue scanning. But also
4996 skip over the second brace when we resume scanning. */
4997 *str = s + 1;
4998 result = 1;
4999 goto done;
5000 }
5001
5002 /* Where a single '{' is the start of a new expression, a
5003 single '}' is not allowed. */
5004 if (ch == '}') {
5005 *str = s - 1;
5006 ast_error(c, n, "f-string: single '}' is not allowed");
5007 return -1;
5008 }
5009 }
5010 /* We're either at a '{', which means we're starting another
5011 expression; or a '}', which means we're at the end of this
5012 f-string (for a nested format_spec). */
5013 s--;
5014 break;
5015 }
5016 }
5017 *str = s;
5018 assert(s <= end);
5019 assert(s == end || *s == '{' || *s == '}');
5020 done:
5021 if (literal_start != s) {
5022 if (raw)
5023 *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
5024 s - literal_start,
5025 NULL, NULL);
5026 else
5027 *literal = decode_unicode_with_escapes(c, n, literal_start,
5028 s - literal_start);
5029 if (!*literal)
5030 return -1;
5031 }
5032 return result;
5033 }
5034
5035 /* Forward declaration because parsing is recursive. */
5036 static expr_ty
5037 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
5038 struct compiling *c, const node *n);
5039
5040 /* Parse the f-string at *str, ending at end. We know *str starts an
5041 expression (so it must be a '{'). Returns the FormattedValue node, which
5042 includes the expression, conversion character, format_spec expression, and
5043 optionally the text of the expression (if = is used).
5044
5045 Note that I don't do a perfect job here: I don't make sure that a
5046 closing brace doesn't match an opening paren, for example. It
5047 doesn't need to error on all invalid expressions, just correctly
5048 find the end of all valid ones. Any errors inside the expression
5049 will be caught when we parse it later.
5050
5051 *expression is set to the expression. For an '=' "debug" expression,
5052 *expr_text is set to the debug text (the original text of the expression,
5053 including the '=' and any whitespace around it, as a string object). If
5054 not a debug expression, *expr_text set to NULL. */
5055 static int
fstring_find_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** expr_text,expr_ty * expression,struct compiling * c,const node * n)5056 fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
5057 PyObject **expr_text, expr_ty *expression,
5058 struct compiling *c, const node *n)
5059 {
5060 /* Return -1 on error, else 0. */
5061
5062 const char *expr_start;
5063 const char *expr_end;
5064 expr_ty simple_expression;
5065 expr_ty format_spec = NULL; /* Optional format specifier. */
5066 int conversion = -1; /* The conversion char. Use default if not
5067 specified, or !r if using = and no format
5068 spec. */
5069
5070 /* 0 if we're not in a string, else the quote char we're trying to
5071 match (single or double quote). */
5072 char quote_char = 0;
5073
5074 /* If we're inside a string, 1=normal, 3=triple-quoted. */
5075 int string_type = 0;
5076
5077 /* Keep track of nesting level for braces/parens/brackets in
5078 expressions. */
5079 Py_ssize_t nested_depth = 0;
5080 char parenstack[MAXLEVEL];
5081
5082 *expr_text = NULL;
5083
5084 /* Can only nest one level deep. */
5085 if (recurse_lvl >= 2) {
5086 ast_error(c, n, "f-string: expressions nested too deeply");
5087 goto error;
5088 }
5089
5090 /* The first char must be a left brace, or we wouldn't have gotten
5091 here. Skip over it. */
5092 assert(**str == '{');
5093 *str += 1;
5094
5095 expr_start = *str;
5096 for (; *str < end; (*str)++) {
5097 char ch;
5098
5099 /* Loop invariants. */
5100 assert(nested_depth >= 0);
5101 assert(*str >= expr_start && *str < end);
5102 if (quote_char)
5103 assert(string_type == 1 || string_type == 3);
5104 else
5105 assert(string_type == 0);
5106
5107 ch = **str;
5108 /* Nowhere inside an expression is a backslash allowed. */
5109 if (ch == '\\') {
5110 /* Error: can't include a backslash character, inside
5111 parens or strings or not. */
5112 ast_error(c, n,
5113 "f-string expression part "
5114 "cannot include a backslash");
5115 goto error;
5116 }
5117 if (quote_char) {
5118 /* We're inside a string. See if we're at the end. */
5119 /* This code needs to implement the same non-error logic
5120 as tok_get from tokenizer.c, at the letter_quote
5121 label. To actually share that code would be a
5122 nightmare. But, it's unlikely to change and is small,
5123 so duplicate it here. Note we don't need to catch all
5124 of the errors, since they'll be caught when parsing the
5125 expression. We just need to match the non-error
5126 cases. Thus we can ignore \n in single-quoted strings,
5127 for example. Or non-terminated strings. */
5128 if (ch == quote_char) {
5129 /* Does this match the string_type (single or triple
5130 quoted)? */
5131 if (string_type == 3) {
5132 if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
5133 /* We're at the end of a triple quoted string. */
5134 *str += 2;
5135 string_type = 0;
5136 quote_char = 0;
5137 continue;
5138 }
5139 } else {
5140 /* We're at the end of a normal string. */
5141 quote_char = 0;
5142 string_type = 0;
5143 continue;
5144 }
5145 }
5146 } else if (ch == '\'' || ch == '"') {
5147 /* Is this a triple quoted string? */
5148 if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
5149 string_type = 3;
5150 *str += 2;
5151 } else {
5152 /* Start of a normal string. */
5153 string_type = 1;
5154 }
5155 /* Start looking for the end of the string. */
5156 quote_char = ch;
5157 } else if (ch == '[' || ch == '{' || ch == '(') {
5158 if (nested_depth >= MAXLEVEL) {
5159 ast_error(c, n, "f-string: too many nested parenthesis");
5160 goto error;
5161 }
5162 parenstack[nested_depth] = ch;
5163 nested_depth++;
5164 } else if (ch == '#') {
5165 /* Error: can't include a comment character, inside parens
5166 or not. */
5167 ast_error(c, n, "f-string expression part cannot include '#'");
5168 goto error;
5169 } else if (nested_depth == 0 &&
5170 (ch == '!' || ch == ':' || ch == '}' ||
5171 ch == '=' || ch == '>' || ch == '<')) {
5172 /* See if there's a next character. */
5173 if (*str+1 < end) {
5174 char next = *(*str+1);
5175
5176 /* For "!=". since '=' is not an allowed conversion character,
5177 nothing is lost in this test. */
5178 if ((ch == '!' && next == '=') || /* != */
5179 (ch == '=' && next == '=') || /* == */
5180 (ch == '<' && next == '=') || /* <= */
5181 (ch == '>' && next == '=') /* >= */
5182 ) {
5183 *str += 1;
5184 continue;
5185 }
5186 /* Don't get out of the loop for these, if they're single
5187 chars (not part of 2-char tokens). If by themselves, they
5188 don't end an expression (unlike say '!'). */
5189 if (ch == '>' || ch == '<') {
5190 continue;
5191 }
5192 }
5193
5194 /* Normal way out of this loop. */
5195 break;
5196 } else if (ch == ']' || ch == '}' || ch == ')') {
5197 if (!nested_depth) {
5198 ast_error(c, n, "f-string: unmatched '%c'", ch);
5199 goto error;
5200 }
5201 nested_depth--;
5202 int opening = parenstack[nested_depth];
5203 if (!((opening == '(' && ch == ')') ||
5204 (opening == '[' && ch == ']') ||
5205 (opening == '{' && ch == '}')))
5206 {
5207 ast_error(c, n,
5208 "f-string: closing parenthesis '%c' "
5209 "does not match opening parenthesis '%c'",
5210 ch, opening);
5211 goto error;
5212 }
5213 } else {
5214 /* Just consume this char and loop around. */
5215 }
5216 }
5217 expr_end = *str;
5218 /* If we leave this loop in a string or with mismatched parens, we
5219 don't care. We'll get a syntax error when compiling the
5220 expression. But, we can produce a better error message, so
5221 let's just do that.*/
5222 if (quote_char) {
5223 ast_error(c, n, "f-string: unterminated string");
5224 goto error;
5225 }
5226 if (nested_depth) {
5227 int opening = parenstack[nested_depth - 1];
5228 ast_error(c, n, "f-string: unmatched '%c'", opening);
5229 goto error;
5230 }
5231
5232 if (*str >= end)
5233 goto unexpected_end_of_string;
5234
5235 /* Compile the expression as soon as possible, so we show errors
5236 related to the expression before errors related to the
5237 conversion or format_spec. */
5238 simple_expression = fstring_compile_expr(expr_start, expr_end, c, n);
5239 if (!simple_expression)
5240 goto error;
5241
5242 /* Check for =, which puts the text value of the expression in
5243 expr_text. */
5244 if (**str == '=') {
5245 if (c->c_feature_version < 8) {
5246 ast_error(c, n,
5247 "f-string: self documenting expressions are "
5248 "only supported in Python 3.8 and greater");
5249 goto error;
5250 }
5251 *str += 1;
5252
5253 /* Skip over ASCII whitespace. No need to test for end of string
5254 here, since we know there's at least a trailing quote somewhere
5255 ahead. */
5256 while (Py_ISSPACE(**str)) {
5257 *str += 1;
5258 }
5259
5260 /* Set *expr_text to the text of the expression. */
5261 *expr_text = PyUnicode_FromStringAndSize(expr_start, *str-expr_start);
5262 if (!*expr_text) {
5263 goto error;
5264 }
5265 }
5266
5267 /* Check for a conversion char, if present. */
5268 if (**str == '!') {
5269 *str += 1;
5270 if (*str >= end)
5271 goto unexpected_end_of_string;
5272
5273 conversion = **str;
5274 *str += 1;
5275
5276 /* Validate the conversion. */
5277 if (!(conversion == 's' || conversion == 'r' || conversion == 'a')) {
5278 ast_error(c, n,
5279 "f-string: invalid conversion character: "
5280 "expected 's', 'r', or 'a'");
5281 goto error;
5282 }
5283
5284 }
5285
5286 /* Check for the format spec, if present. */
5287 if (*str >= end)
5288 goto unexpected_end_of_string;
5289 if (**str == ':') {
5290 *str += 1;
5291 if (*str >= end)
5292 goto unexpected_end_of_string;
5293
5294 /* Parse the format spec. */
5295 format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n);
5296 if (!format_spec)
5297 goto error;
5298 }
5299
5300 if (*str >= end || **str != '}')
5301 goto unexpected_end_of_string;
5302
5303 /* We're at a right brace. Consume it. */
5304 assert(*str < end);
5305 assert(**str == '}');
5306 *str += 1;
5307
5308 /* If we're in = mode (detected by non-NULL expr_text), and have no format
5309 spec and no explict conversion, set the conversion to 'r'. */
5310 if (*expr_text && format_spec == NULL && conversion == -1) {
5311 conversion = 'r';
5312 }
5313
5314 /* And now create the FormattedValue node that represents this
5315 entire expression with the conversion and format spec. */
5316 *expression = FormattedValue(simple_expression, conversion,
5317 format_spec, LINENO(n),
5318 n->n_col_offset, n->n_end_lineno,
5319 n->n_end_col_offset, c->c_arena);
5320 if (!*expression)
5321 goto error;
5322
5323 return 0;
5324
5325 unexpected_end_of_string:
5326 ast_error(c, n, "f-string: expecting '}'");
5327 /* Falls through to error. */
5328
5329 error:
5330 Py_XDECREF(*expr_text);
5331 return -1;
5332
5333 }
5334
5335 /* Return -1 on error.
5336
5337 Return 0 if we have a literal (possible zero length) and an
5338 expression (zero length if at the end of the string.
5339
5340 Return 1 if we have a literal, but no expression, and we want the
5341 caller to call us again. This is used to deal with doubled
5342 braces.
5343
5344 When called multiple times on the string 'a{{b{0}c', this function
5345 will return:
5346
5347 1. the literal 'a{' with no expression, and a return value
5348 of 1. Despite the fact that there's no expression, the return
5349 value of 1 means we're not finished yet.
5350
5351 2. the literal 'b' and the expression '0', with a return value of
5352 0. The fact that there's an expression means we're not finished.
5353
5354 3. literal 'c' with no expression and a return value of 0. The
5355 combination of the return value of 0 with no expression means
5356 we're finished.
5357 */
5358 static int
fstring_find_literal_and_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** literal,PyObject ** expr_text,expr_ty * expression,struct compiling * c,const node * n)5359 fstring_find_literal_and_expr(const char **str, const char *end, int raw,
5360 int recurse_lvl, PyObject **literal,
5361 PyObject **expr_text, expr_ty *expression,
5362 struct compiling *c, const node *n)
5363 {
5364 int result;
5365
5366 assert(*literal == NULL && *expression == NULL);
5367
5368 /* Get any literal string. */
5369 result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n);
5370 if (result < 0)
5371 goto error;
5372
5373 assert(result == 0 || result == 1);
5374
5375 if (result == 1)
5376 /* We have a literal, but don't look at the expression. */
5377 return 1;
5378
5379 if (*str >= end || **str == '}')
5380 /* We're at the end of the string or the end of a nested
5381 f-string: no expression. The top-level error case where we
5382 expect to be at the end of the string but we're at a '}' is
5383 handled later. */
5384 return 0;
5385
5386 /* We must now be the start of an expression, on a '{'. */
5387 assert(**str == '{');
5388
5389 if (fstring_find_expr(str, end, raw, recurse_lvl, expr_text,
5390 expression, c, n) < 0)
5391 goto error;
5392
5393 return 0;
5394
5395 error:
5396 Py_CLEAR(*literal);
5397 return -1;
5398 }
5399
5400 #define EXPRLIST_N_CACHED 64
5401
5402 typedef struct {
5403 /* Incrementally build an array of expr_ty, so be used in an
5404 asdl_seq. Cache some small but reasonably sized number of
5405 expr_ty's, and then after that start dynamically allocating,
5406 doubling the number allocated each time. Note that the f-string
5407 f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
5408 Constant for the literal 'a'. So you add expr_ty's about twice as
5409 fast as you add expressions in an f-string. */
5410
5411 Py_ssize_t allocated; /* Number we've allocated. */
5412 Py_ssize_t size; /* Number we've used. */
5413 expr_ty *p; /* Pointer to the memory we're actually
5414 using. Will point to 'data' until we
5415 start dynamically allocating. */
5416 expr_ty data[EXPRLIST_N_CACHED];
5417 } ExprList;
5418
5419 #ifdef NDEBUG
5420 #define ExprList_check_invariants(l)
5421 #else
5422 static void
ExprList_check_invariants(ExprList * l)5423 ExprList_check_invariants(ExprList *l)
5424 {
5425 /* Check our invariants. Make sure this object is "live", and
5426 hasn't been deallocated. */
5427 assert(l->size >= 0);
5428 assert(l->p != NULL);
5429 if (l->size <= EXPRLIST_N_CACHED)
5430 assert(l->data == l->p);
5431 }
5432 #endif
5433
5434 static void
ExprList_Init(ExprList * l)5435 ExprList_Init(ExprList *l)
5436 {
5437 l->allocated = EXPRLIST_N_CACHED;
5438 l->size = 0;
5439
5440 /* Until we start allocating dynamically, p points to data. */
5441 l->p = l->data;
5442
5443 ExprList_check_invariants(l);
5444 }
5445
5446 static int
ExprList_Append(ExprList * l,expr_ty exp)5447 ExprList_Append(ExprList *l, expr_ty exp)
5448 {
5449 ExprList_check_invariants(l);
5450 if (l->size >= l->allocated) {
5451 /* We need to alloc (or realloc) the memory. */
5452 Py_ssize_t new_size = l->allocated * 2;
5453
5454 /* See if we've ever allocated anything dynamically. */
5455 if (l->p == l->data) {
5456 Py_ssize_t i;
5457 /* We're still using the cached data. Switch to
5458 alloc-ing. */
5459 l->p = PyMem_Malloc(sizeof(expr_ty) * new_size);
5460 if (!l->p)
5461 return -1;
5462 /* Copy the cached data into the new buffer. */
5463 for (i = 0; i < l->size; i++)
5464 l->p[i] = l->data[i];
5465 } else {
5466 /* Just realloc. */
5467 expr_ty *tmp = PyMem_Realloc(l->p, sizeof(expr_ty) * new_size);
5468 if (!tmp) {
5469 PyMem_Free(l->p);
5470 l->p = NULL;
5471 return -1;
5472 }
5473 l->p = tmp;
5474 }
5475
5476 l->allocated = new_size;
5477 assert(l->allocated == 2 * l->size);
5478 }
5479
5480 l->p[l->size++] = exp;
5481
5482 ExprList_check_invariants(l);
5483 return 0;
5484 }
5485
5486 static void
ExprList_Dealloc(ExprList * l)5487 ExprList_Dealloc(ExprList *l)
5488 {
5489 ExprList_check_invariants(l);
5490
5491 /* If there's been an error, or we've never dynamically allocated,
5492 do nothing. */
5493 if (!l->p || l->p == l->data) {
5494 /* Do nothing. */
5495 } else {
5496 /* We have dynamically allocated. Free the memory. */
5497 PyMem_Free(l->p);
5498 }
5499 l->p = NULL;
5500 l->size = -1;
5501 }
5502
5503 static asdl_seq *
ExprList_Finish(ExprList * l,PyArena * arena)5504 ExprList_Finish(ExprList *l, PyArena *arena)
5505 {
5506 asdl_seq *seq;
5507
5508 ExprList_check_invariants(l);
5509
5510 /* Allocate the asdl_seq and copy the expressions in to it. */
5511 seq = _Py_asdl_seq_new(l->size, arena);
5512 if (seq) {
5513 Py_ssize_t i;
5514 for (i = 0; i < l->size; i++)
5515 asdl_seq_SET(seq, i, l->p[i]);
5516 }
5517 ExprList_Dealloc(l);
5518 return seq;
5519 }
5520
5521 /* The FstringParser is designed to add a mix of strings and
5522 f-strings, and concat them together as needed. Ultimately, it
5523 generates an expr_ty. */
5524 typedef struct {
5525 PyObject *last_str;
5526 ExprList expr_list;
5527 int fmode;
5528 } FstringParser;
5529
5530 #ifdef NDEBUG
5531 #define FstringParser_check_invariants(state)
5532 #else
5533 static void
FstringParser_check_invariants(FstringParser * state)5534 FstringParser_check_invariants(FstringParser *state)
5535 {
5536 if (state->last_str)
5537 assert(PyUnicode_CheckExact(state->last_str));
5538 ExprList_check_invariants(&state->expr_list);
5539 }
5540 #endif
5541
5542 static void
FstringParser_Init(FstringParser * state)5543 FstringParser_Init(FstringParser *state)
5544 {
5545 state->last_str = NULL;
5546 state->fmode = 0;
5547 ExprList_Init(&state->expr_list);
5548 FstringParser_check_invariants(state);
5549 }
5550
5551 static void
FstringParser_Dealloc(FstringParser * state)5552 FstringParser_Dealloc(FstringParser *state)
5553 {
5554 FstringParser_check_invariants(state);
5555
5556 Py_XDECREF(state->last_str);
5557 ExprList_Dealloc(&state->expr_list);
5558 }
5559
5560 /* Constants for the following */
5561 static PyObject *u_kind;
5562
5563 /* Compute 'kind' field for string Constant (either 'u' or None) */
5564 static PyObject *
make_kind(struct compiling * c,const node * n)5565 make_kind(struct compiling *c, const node *n)
5566 {
5567 char *s = NULL;
5568 PyObject *kind = NULL;
5569
5570 /* Find the first string literal, if any */
5571 while (TYPE(n) != STRING) {
5572 if (NCH(n) == 0)
5573 return NULL;
5574 n = CHILD(n, 0);
5575 }
5576 REQ(n, STRING);
5577
5578 /* If it starts with 'u', return a PyUnicode "u" string */
5579 s = STR(n);
5580 if (s && *s == 'u') {
5581 if (!u_kind) {
5582 u_kind = PyUnicode_InternFromString("u");
5583 if (!u_kind)
5584 return NULL;
5585 }
5586 kind = u_kind;
5587 if (PyArena_AddPyObject(c->c_arena, kind) < 0) {
5588 return NULL;
5589 }
5590 Py_INCREF(kind);
5591 }
5592 return kind;
5593 }
5594
5595 /* Make a Constant node, but decref the PyUnicode object being added. */
5596 static expr_ty
make_str_node_and_del(PyObject ** str,struct compiling * c,const node * n)5597 make_str_node_and_del(PyObject **str, struct compiling *c, const node* n)
5598 {
5599 PyObject *s = *str;
5600 PyObject *kind = NULL;
5601 *str = NULL;
5602 assert(PyUnicode_CheckExact(s));
5603 if (PyArena_AddPyObject(c->c_arena, s) < 0) {
5604 Py_DECREF(s);
5605 return NULL;
5606 }
5607 kind = make_kind(c, n);
5608 if (kind == NULL && PyErr_Occurred())
5609 return NULL;
5610 return Constant(s, kind, LINENO(n), n->n_col_offset,
5611 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5612 }
5613
5614 /* Add a non-f-string (that is, a regular literal string). str is
5615 decref'd. */
5616 static int
FstringParser_ConcatAndDel(FstringParser * state,PyObject * str)5617 FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
5618 {
5619 FstringParser_check_invariants(state);
5620
5621 assert(PyUnicode_CheckExact(str));
5622
5623 if (PyUnicode_GET_LENGTH(str) == 0) {
5624 Py_DECREF(str);
5625 return 0;
5626 }
5627
5628 if (!state->last_str) {
5629 /* We didn't have a string before, so just remember this one. */
5630 state->last_str = str;
5631 } else {
5632 /* Concatenate this with the previous string. */
5633 PyUnicode_AppendAndDel(&state->last_str, str);
5634 if (!state->last_str)
5635 return -1;
5636 }
5637 FstringParser_check_invariants(state);
5638 return 0;
5639 }
5640
5641 /* Parse an f-string. The f-string is in *str to end, with no
5642 'f' or quotes. */
5643 static int
FstringParser_ConcatFstring(FstringParser * state,const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5644 FstringParser_ConcatFstring(FstringParser *state, const char **str,
5645 const char *end, int raw, int recurse_lvl,
5646 struct compiling *c, const node *n)
5647 {
5648 FstringParser_check_invariants(state);
5649 state->fmode = 1;
5650
5651 /* Parse the f-string. */
5652 while (1) {
5653 PyObject *literal = NULL;
5654 PyObject *expr_text = NULL;
5655 expr_ty expression = NULL;
5656
5657 /* If there's a zero length literal in front of the
5658 expression, literal will be NULL. If we're at the end of
5659 the f-string, expression will be NULL (unless result == 1,
5660 see below). */
5661 int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
5662 &literal, &expr_text,
5663 &expression, c, n);
5664 if (result < 0)
5665 return -1;
5666
5667 /* Add the literal, if any. */
5668 if (literal && FstringParser_ConcatAndDel(state, literal) < 0) {
5669 Py_XDECREF(expr_text);
5670 return -1;
5671 }
5672 /* Add the expr_text, if any. */
5673 if (expr_text && FstringParser_ConcatAndDel(state, expr_text) < 0) {
5674 return -1;
5675 }
5676
5677 /* We've dealt with the literal and expr_text, their ownership has
5678 been transferred to the state object. Don't look at them again. */
5679
5680 /* See if we should just loop around to get the next literal
5681 and expression, while ignoring the expression this
5682 time. This is used for un-doubling braces, as an
5683 optimization. */
5684 if (result == 1)
5685 continue;
5686
5687 if (!expression)
5688 /* We're done with this f-string. */
5689 break;
5690
5691 /* We know we have an expression. Convert any existing string
5692 to a Constant node. */
5693 if (!state->last_str) {
5694 /* Do nothing. No previous literal. */
5695 } else {
5696 /* Convert the existing last_str literal to a Constant node. */
5697 expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5698 if (!str || ExprList_Append(&state->expr_list, str) < 0)
5699 return -1;
5700 }
5701
5702 if (ExprList_Append(&state->expr_list, expression) < 0)
5703 return -1;
5704 }
5705
5706 /* If recurse_lvl is zero, then we must be at the end of the
5707 string. Otherwise, we must be at a right brace. */
5708
5709 if (recurse_lvl == 0 && *str < end-1) {
5710 ast_error(c, n, "f-string: unexpected end of string");
5711 return -1;
5712 }
5713 if (recurse_lvl != 0 && **str != '}') {
5714 ast_error(c, n, "f-string: expecting '}'");
5715 return -1;
5716 }
5717
5718 FstringParser_check_invariants(state);
5719 return 0;
5720 }
5721
5722 /* Convert the partial state reflected in last_str and expr_list to an
5723 expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
5724 static expr_ty
FstringParser_Finish(FstringParser * state,struct compiling * c,const node * n)5725 FstringParser_Finish(FstringParser *state, struct compiling *c,
5726 const node *n)
5727 {
5728 asdl_seq *seq;
5729
5730 FstringParser_check_invariants(state);
5731
5732 /* If we're just a constant string with no expressions, return
5733 that. */
5734 if (!state->fmode) {
5735 assert(!state->expr_list.size);
5736 if (!state->last_str) {
5737 /* Create a zero length string. */
5738 state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
5739 if (!state->last_str)
5740 goto error;
5741 }
5742 return make_str_node_and_del(&state->last_str, c, n);
5743 }
5744
5745 /* Create a Constant node out of last_str, if needed. It will be the
5746 last node in our expression list. */
5747 if (state->last_str) {
5748 expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5749 if (!str || ExprList_Append(&state->expr_list, str) < 0)
5750 goto error;
5751 }
5752 /* This has already been freed. */
5753 assert(state->last_str == NULL);
5754
5755 seq = ExprList_Finish(&state->expr_list, c->c_arena);
5756 if (!seq)
5757 goto error;
5758
5759 return JoinedStr(seq, LINENO(n), n->n_col_offset,
5760 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5761
5762 error:
5763 FstringParser_Dealloc(state);
5764 return NULL;
5765 }
5766
5767 /* Given an f-string (with no 'f' or quotes) that's in *str and ends
5768 at end, parse it into an expr_ty. Return NULL on error. Adjust
5769 str to point past the parsed portion. */
5770 static expr_ty
fstring_parse(const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5771 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
5772 struct compiling *c, const node *n)
5773 {
5774 FstringParser state;
5775
5776 FstringParser_Init(&state);
5777 if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl,
5778 c, n) < 0) {
5779 FstringParser_Dealloc(&state);
5780 return NULL;
5781 }
5782
5783 return FstringParser_Finish(&state, c, n);
5784 }
5785
5786 /* n is a Python string literal, including the bracketing quote
5787 characters, and r, b, u, &/or f prefixes (if any), and embedded
5788 escape sequences (if any). parsestr parses it, and sets *result to
5789 decoded Python string object. If the string is an f-string, set
5790 *fstr and *fstrlen to the unparsed string object. Return 0 if no
5791 errors occurred.
5792 */
5793 static int
parsestr(struct compiling * c,const node * n,int * bytesmode,int * rawmode,PyObject ** result,const char ** fstr,Py_ssize_t * fstrlen)5794 parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
5795 PyObject **result, const char **fstr, Py_ssize_t *fstrlen)
5796 {
5797 size_t len;
5798 const char *s = STR(n);
5799 int quote = Py_CHARMASK(*s);
5800 int fmode = 0;
5801 *bytesmode = 0;
5802 *rawmode = 0;
5803 *result = NULL;
5804 *fstr = NULL;
5805 if (Py_ISALPHA(quote)) {
5806 while (!*bytesmode || !*rawmode) {
5807 if (quote == 'b' || quote == 'B') {
5808 quote = *++s;
5809 *bytesmode = 1;
5810 }
5811 else if (quote == 'u' || quote == 'U') {
5812 quote = *++s;
5813 }
5814 else if (quote == 'r' || quote == 'R') {
5815 quote = *++s;
5816 *rawmode = 1;
5817 }
5818 else if (quote == 'f' || quote == 'F') {
5819 quote = *++s;
5820 fmode = 1;
5821 }
5822 else {
5823 break;
5824 }
5825 }
5826 }
5827
5828 /* fstrings are only allowed in Python 3.6 and greater */
5829 if (fmode && c->c_feature_version < 6) {
5830 ast_error(c, n, "Format strings are only supported in Python 3.6 and greater");
5831 return -1;
5832 }
5833
5834 if (fmode && *bytesmode) {
5835 PyErr_BadInternalCall();
5836 return -1;
5837 }
5838 if (quote != '\'' && quote != '\"') {
5839 PyErr_BadInternalCall();
5840 return -1;
5841 }
5842 /* Skip the leading quote char. */
5843 s++;
5844 len = strlen(s);
5845 if (len > INT_MAX) {
5846 PyErr_SetString(PyExc_OverflowError,
5847 "string to parse is too long");
5848 return -1;
5849 }
5850 if (s[--len] != quote) {
5851 /* Last quote char must match the first. */
5852 PyErr_BadInternalCall();
5853 return -1;
5854 }
5855 if (len >= 4 && s[0] == quote && s[1] == quote) {
5856 /* A triple quoted string. We've already skipped one quote at
5857 the start and one at the end of the string. Now skip the
5858 two at the start. */
5859 s += 2;
5860 len -= 2;
5861 /* And check that the last two match. */
5862 if (s[--len] != quote || s[--len] != quote) {
5863 PyErr_BadInternalCall();
5864 return -1;
5865 }
5866 }
5867
5868 if (fmode) {
5869 /* Just return the bytes. The caller will parse the resulting
5870 string. */
5871 *fstr = s;
5872 *fstrlen = len;
5873 return 0;
5874 }
5875
5876 /* Not an f-string. */
5877 /* Avoid invoking escape decoding routines if possible. */
5878 *rawmode = *rawmode || strchr(s, '\\') == NULL;
5879 if (*bytesmode) {
5880 /* Disallow non-ASCII characters. */
5881 const char *ch;
5882 for (ch = s; *ch; ch++) {
5883 if (Py_CHARMASK(*ch) >= 0x80) {
5884 ast_error(c, n,
5885 "bytes can only contain ASCII "
5886 "literal characters.");
5887 return -1;
5888 }
5889 }
5890 if (*rawmode)
5891 *result = PyBytes_FromStringAndSize(s, len);
5892 else
5893 *result = decode_bytes_with_escapes(c, n, s, len);
5894 } else {
5895 if (*rawmode)
5896 *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
5897 else
5898 *result = decode_unicode_with_escapes(c, n, s, len);
5899 }
5900 return *result == NULL ? -1 : 0;
5901 }
5902
5903 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through
5904 each STRING atom, and process it as needed. For bytes, just
5905 concatenate them together, and the result will be a Constant node. For
5906 normal strings and f-strings, concatenate them together. The result
5907 will be a Constant node if there were no f-strings; a FormattedValue
5908 node if there's just an f-string (with no leading or trailing
5909 literals), or a JoinedStr node if there are multiple f-strings or
5910 any literals involved. */
5911 static expr_ty
parsestrplus(struct compiling * c,const node * n)5912 parsestrplus(struct compiling *c, const node *n)
5913 {
5914 int bytesmode = 0;
5915 PyObject *bytes_str = NULL;
5916 int i;
5917
5918 FstringParser state;
5919 FstringParser_Init(&state);
5920
5921 for (i = 0; i < NCH(n); i++) {
5922 int this_bytesmode;
5923 int this_rawmode;
5924 PyObject *s;
5925 const char *fstr;
5926 Py_ssize_t fstrlen = -1; /* Silence a compiler warning. */
5927
5928 REQ(CHILD(n, i), STRING);
5929 if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s,
5930 &fstr, &fstrlen) != 0)
5931 goto error;
5932
5933 /* Check that we're not mixing bytes with unicode. */
5934 if (i != 0 && bytesmode != this_bytesmode) {
5935 ast_error(c, n, "cannot mix bytes and nonbytes literals");
5936 /* s is NULL if the current string part is an f-string. */
5937 Py_XDECREF(s);
5938 goto error;
5939 }
5940 bytesmode = this_bytesmode;
5941
5942 if (fstr != NULL) {
5943 int result;
5944 assert(s == NULL && !bytesmode);
5945 /* This is an f-string. Parse and concatenate it. */
5946 result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen,
5947 this_rawmode, 0, c, n);
5948 if (result < 0)
5949 goto error;
5950 } else {
5951 /* A string or byte string. */
5952 assert(s != NULL && fstr == NULL);
5953
5954 assert(bytesmode ? PyBytes_CheckExact(s) :
5955 PyUnicode_CheckExact(s));
5956
5957 if (bytesmode) {
5958 /* For bytes, concat as we go. */
5959 if (i == 0) {
5960 /* First time, just remember this value. */
5961 bytes_str = s;
5962 } else {
5963 PyBytes_ConcatAndDel(&bytes_str, s);
5964 if (!bytes_str)
5965 goto error;
5966 }
5967 } else {
5968 /* This is a regular string. Concatenate it. */
5969 if (FstringParser_ConcatAndDel(&state, s) < 0)
5970 goto error;
5971 }
5972 }
5973 }
5974 if (bytesmode) {
5975 /* Just return the bytes object and we're done. */
5976 if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)
5977 goto error;
5978 return Constant(bytes_str, NULL, LINENO(n), n->n_col_offset,
5979 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5980 }
5981
5982 /* We're not a bytes string, bytes_str should never have been set. */
5983 assert(bytes_str == NULL);
5984
5985 return FstringParser_Finish(&state, c, n);
5986
5987 error:
5988 Py_XDECREF(bytes_str);
5989 FstringParser_Dealloc(&state);
5990 return NULL;
5991 }
5992
5993 PyObject *
_PyAST_GetDocString(asdl_seq * body)5994 _PyAST_GetDocString(asdl_seq *body)
5995 {
5996 if (!asdl_seq_LEN(body)) {
5997 return NULL;
5998 }
5999 stmt_ty st = (stmt_ty)asdl_seq_GET(body, 0);
6000 if (st->kind != Expr_kind) {
6001 return NULL;
6002 }
6003 expr_ty e = st->v.Expr.value;
6004 if (e->kind == Constant_kind && PyUnicode_CheckExact(e->v.Constant.value)) {
6005 return e->v.Constant.value;
6006 }
6007 return NULL;
6008 }
6009