1 #include <Python.h>
2
3 #include "pegen.h"
4 #include "string_parser.h"
5 #include "pycore_runtime.h" // _PyRuntime
6 #include "pycore_pystate.h" // _PyInterpreterState_GET()
7
8 void *
_PyPegen_dummy_name(Parser * p,...)9 _PyPegen_dummy_name(Parser *p, ...)
10 {
11 return &_PyRuntime.parser.dummy_name;
12 }
13
14 /* Creates a single-element asdl_seq* that contains a */
15 asdl_seq *
_PyPegen_singleton_seq(Parser * p,void * a)16 _PyPegen_singleton_seq(Parser *p, void *a)
17 {
18 assert(a != NULL);
19 asdl_seq *seq = (asdl_seq*)_Py_asdl_generic_seq_new(1, p->arena);
20 if (!seq) {
21 return NULL;
22 }
23 asdl_seq_SET_UNTYPED(seq, 0, a);
24 return seq;
25 }
26
27 /* Creates a copy of seq and prepends a to it */
28 asdl_seq *
_PyPegen_seq_insert_in_front(Parser * p,void * a,asdl_seq * seq)29 _PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq)
30 {
31 assert(a != NULL);
32 if (!seq) {
33 return _PyPegen_singleton_seq(p, a);
34 }
35
36 asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
37 if (!new_seq) {
38 return NULL;
39 }
40
41 asdl_seq_SET_UNTYPED(new_seq, 0, a);
42 for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) {
43 asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i - 1));
44 }
45 return new_seq;
46 }
47
48 /* Creates a copy of seq and appends a to it */
49 asdl_seq *
_PyPegen_seq_append_to_end(Parser * p,asdl_seq * seq,void * a)50 _PyPegen_seq_append_to_end(Parser *p, asdl_seq *seq, void *a)
51 {
52 assert(a != NULL);
53 if (!seq) {
54 return _PyPegen_singleton_seq(p, a);
55 }
56
57 asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
58 if (!new_seq) {
59 return NULL;
60 }
61
62 for (Py_ssize_t i = 0, l = asdl_seq_LEN(new_seq); i + 1 < l; i++) {
63 asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i));
64 }
65 asdl_seq_SET_UNTYPED(new_seq, asdl_seq_LEN(new_seq) - 1, a);
66 return new_seq;
67 }
68
69 static Py_ssize_t
_get_flattened_seq_size(asdl_seq * seqs)70 _get_flattened_seq_size(asdl_seq *seqs)
71 {
72 Py_ssize_t size = 0;
73 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
74 asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
75 size += asdl_seq_LEN(inner_seq);
76 }
77 return size;
78 }
79
80 /* Flattens an asdl_seq* of asdl_seq*s */
81 asdl_seq *
_PyPegen_seq_flatten(Parser * p,asdl_seq * seqs)82 _PyPegen_seq_flatten(Parser *p, asdl_seq *seqs)
83 {
84 Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs);
85 assert(flattened_seq_size > 0);
86
87 asdl_seq *flattened_seq = (asdl_seq*)_Py_asdl_generic_seq_new(flattened_seq_size, p->arena);
88 if (!flattened_seq) {
89 return NULL;
90 }
91
92 int flattened_seq_idx = 0;
93 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
94 asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
95 for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) {
96 asdl_seq_SET_UNTYPED(flattened_seq, flattened_seq_idx++, asdl_seq_GET_UNTYPED(inner_seq, j));
97 }
98 }
99 assert(flattened_seq_idx == flattened_seq_size);
100
101 return flattened_seq;
102 }
103
104 void *
_PyPegen_seq_last_item(asdl_seq * seq)105 _PyPegen_seq_last_item(asdl_seq *seq)
106 {
107 Py_ssize_t len = asdl_seq_LEN(seq);
108 return asdl_seq_GET_UNTYPED(seq, len - 1);
109 }
110
111 void *
_PyPegen_seq_first_item(asdl_seq * seq)112 _PyPegen_seq_first_item(asdl_seq *seq)
113 {
114 return asdl_seq_GET_UNTYPED(seq, 0);
115 }
116
117 /* Creates a new name of the form <first_name>.<second_name> */
118 expr_ty
_PyPegen_join_names_with_dot(Parser * p,expr_ty first_name,expr_ty second_name)119 _PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
120 {
121 assert(first_name != NULL && second_name != NULL);
122 PyObject *uni = PyUnicode_FromFormat("%U.%U",
123 first_name->v.Name.id, second_name->v.Name.id);
124 if (!uni) {
125 return NULL;
126 }
127 PyInterpreterState *interp = _PyInterpreterState_GET();
128 _PyUnicode_InternImmortal(interp, &uni);
129 if (_PyArena_AddPyObject(p->arena, uni) < 0) {
130 Py_DECREF(uni);
131 return NULL;
132 }
133
134 return _PyAST_Name(uni, Load, EXTRA_EXPR(first_name, second_name));
135 }
136
137 /* Counts the total number of dots in seq's tokens */
138 int
_PyPegen_seq_count_dots(asdl_seq * seq)139 _PyPegen_seq_count_dots(asdl_seq *seq)
140 {
141 int number_of_dots = 0;
142 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
143 Token *current_expr = asdl_seq_GET_UNTYPED(seq, i);
144 switch (current_expr->type) {
145 case ELLIPSIS:
146 number_of_dots += 3;
147 break;
148 case DOT:
149 number_of_dots += 1;
150 break;
151 default:
152 Py_UNREACHABLE();
153 }
154 }
155
156 return number_of_dots;
157 }
158
159 /* Creates an alias with '*' as the identifier name */
160 alias_ty
_PyPegen_alias_for_star(Parser * p,int lineno,int col_offset,int end_lineno,int end_col_offset,PyArena * arena)161 _PyPegen_alias_for_star(Parser *p, int lineno, int col_offset, int end_lineno,
162 int end_col_offset, PyArena *arena) {
163 PyObject *str = PyUnicode_InternFromString("*");
164 if (!str) {
165 return NULL;
166 }
167 if (_PyArena_AddPyObject(p->arena, str) < 0) {
168 Py_DECREF(str);
169 return NULL;
170 }
171 return _PyAST_alias(str, NULL, lineno, col_offset, end_lineno, end_col_offset, arena);
172 }
173
174 /* Creates a new asdl_seq* with the identifiers of all the names in seq */
175 asdl_identifier_seq *
_PyPegen_map_names_to_ids(Parser * p,asdl_expr_seq * seq)176 _PyPegen_map_names_to_ids(Parser *p, asdl_expr_seq *seq)
177 {
178 Py_ssize_t len = asdl_seq_LEN(seq);
179 assert(len > 0);
180
181 asdl_identifier_seq *new_seq = _Py_asdl_identifier_seq_new(len, p->arena);
182 if (!new_seq) {
183 return NULL;
184 }
185 for (Py_ssize_t i = 0; i < len; i++) {
186 expr_ty e = asdl_seq_GET(seq, i);
187 asdl_seq_SET(new_seq, i, e->v.Name.id);
188 }
189 return new_seq;
190 }
191
192 /* Constructs a CmpopExprPair */
193 CmpopExprPair *
_PyPegen_cmpop_expr_pair(Parser * p,cmpop_ty cmpop,expr_ty expr)194 _PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr)
195 {
196 assert(expr != NULL);
197 CmpopExprPair *a = _PyArena_Malloc(p->arena, sizeof(CmpopExprPair));
198 if (!a) {
199 return NULL;
200 }
201 a->cmpop = cmpop;
202 a->expr = expr;
203 return a;
204 }
205
206 asdl_int_seq *
_PyPegen_get_cmpops(Parser * p,asdl_seq * seq)207 _PyPegen_get_cmpops(Parser *p, asdl_seq *seq)
208 {
209 Py_ssize_t len = asdl_seq_LEN(seq);
210 assert(len > 0);
211
212 asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena);
213 if (!new_seq) {
214 return NULL;
215 }
216 for (Py_ssize_t i = 0; i < len; i++) {
217 CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
218 asdl_seq_SET(new_seq, i, pair->cmpop);
219 }
220 return new_seq;
221 }
222
223 asdl_expr_seq *
_PyPegen_get_exprs(Parser * p,asdl_seq * seq)224 _PyPegen_get_exprs(Parser *p, asdl_seq *seq)
225 {
226 Py_ssize_t len = asdl_seq_LEN(seq);
227 assert(len > 0);
228
229 asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
230 if (!new_seq) {
231 return NULL;
232 }
233 for (Py_ssize_t i = 0; i < len; i++) {
234 CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
235 asdl_seq_SET(new_seq, i, pair->expr);
236 }
237 return new_seq;
238 }
239
240 /* Creates an asdl_seq* where all the elements have been changed to have ctx as context */
241 static asdl_expr_seq *
_set_seq_context(Parser * p,asdl_expr_seq * seq,expr_context_ty ctx)242 _set_seq_context(Parser *p, asdl_expr_seq *seq, expr_context_ty ctx)
243 {
244 Py_ssize_t len = asdl_seq_LEN(seq);
245 if (len == 0) {
246 return NULL;
247 }
248
249 asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
250 if (!new_seq) {
251 return NULL;
252 }
253 for (Py_ssize_t i = 0; i < len; i++) {
254 expr_ty e = asdl_seq_GET(seq, i);
255 asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx));
256 }
257 return new_seq;
258 }
259
260 static expr_ty
_set_name_context(Parser * p,expr_ty e,expr_context_ty ctx)261 _set_name_context(Parser *p, expr_ty e, expr_context_ty ctx)
262 {
263 return _PyAST_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e));
264 }
265
266 static expr_ty
_set_tuple_context(Parser * p,expr_ty e,expr_context_ty ctx)267 _set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx)
268 {
269 return _PyAST_Tuple(
270 _set_seq_context(p, e->v.Tuple.elts, ctx),
271 ctx,
272 EXTRA_EXPR(e, e));
273 }
274
275 static expr_ty
_set_list_context(Parser * p,expr_ty e,expr_context_ty ctx)276 _set_list_context(Parser *p, expr_ty e, expr_context_ty ctx)
277 {
278 return _PyAST_List(
279 _set_seq_context(p, e->v.List.elts, ctx),
280 ctx,
281 EXTRA_EXPR(e, e));
282 }
283
284 static expr_ty
_set_subscript_context(Parser * p,expr_ty e,expr_context_ty ctx)285 _set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx)
286 {
287 return _PyAST_Subscript(e->v.Subscript.value, e->v.Subscript.slice,
288 ctx, EXTRA_EXPR(e, e));
289 }
290
291 static expr_ty
_set_attribute_context(Parser * p,expr_ty e,expr_context_ty ctx)292 _set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx)
293 {
294 return _PyAST_Attribute(e->v.Attribute.value, e->v.Attribute.attr,
295 ctx, EXTRA_EXPR(e, e));
296 }
297
298 static expr_ty
_set_starred_context(Parser * p,expr_ty e,expr_context_ty ctx)299 _set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx)
300 {
301 return _PyAST_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx),
302 ctx, EXTRA_EXPR(e, e));
303 }
304
305 /* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */
306 expr_ty
_PyPegen_set_expr_context(Parser * p,expr_ty expr,expr_context_ty ctx)307 _PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx)
308 {
309 assert(expr != NULL);
310
311 expr_ty new = NULL;
312 switch (expr->kind) {
313 case Name_kind:
314 new = _set_name_context(p, expr, ctx);
315 break;
316 case Tuple_kind:
317 new = _set_tuple_context(p, expr, ctx);
318 break;
319 case List_kind:
320 new = _set_list_context(p, expr, ctx);
321 break;
322 case Subscript_kind:
323 new = _set_subscript_context(p, expr, ctx);
324 break;
325 case Attribute_kind:
326 new = _set_attribute_context(p, expr, ctx);
327 break;
328 case Starred_kind:
329 new = _set_starred_context(p, expr, ctx);
330 break;
331 default:
332 new = expr;
333 }
334 return new;
335 }
336
337 /* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */
338 KeyValuePair *
_PyPegen_key_value_pair(Parser * p,expr_ty key,expr_ty value)339 _PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value)
340 {
341 KeyValuePair *a = _PyArena_Malloc(p->arena, sizeof(KeyValuePair));
342 if (!a) {
343 return NULL;
344 }
345 a->key = key;
346 a->value = value;
347 return a;
348 }
349
350 /* Extracts all keys from an asdl_seq* of KeyValuePair*'s */
351 asdl_expr_seq *
_PyPegen_get_keys(Parser * p,asdl_seq * seq)352 _PyPegen_get_keys(Parser *p, asdl_seq *seq)
353 {
354 Py_ssize_t len = asdl_seq_LEN(seq);
355 asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
356 if (!new_seq) {
357 return NULL;
358 }
359 for (Py_ssize_t i = 0; i < len; i++) {
360 KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
361 asdl_seq_SET(new_seq, i, pair->key);
362 }
363 return new_seq;
364 }
365
366 /* Extracts all values from an asdl_seq* of KeyValuePair*'s */
367 asdl_expr_seq *
_PyPegen_get_values(Parser * p,asdl_seq * seq)368 _PyPegen_get_values(Parser *p, asdl_seq *seq)
369 {
370 Py_ssize_t len = asdl_seq_LEN(seq);
371 asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
372 if (!new_seq) {
373 return NULL;
374 }
375 for (Py_ssize_t i = 0; i < len; i++) {
376 KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
377 asdl_seq_SET(new_seq, i, pair->value);
378 }
379 return new_seq;
380 }
381
382 /* Constructs a KeyPatternPair that is used when parsing mapping & class patterns */
383 KeyPatternPair *
_PyPegen_key_pattern_pair(Parser * p,expr_ty key,pattern_ty pattern)384 _PyPegen_key_pattern_pair(Parser *p, expr_ty key, pattern_ty pattern)
385 {
386 KeyPatternPair *a = _PyArena_Malloc(p->arena, sizeof(KeyPatternPair));
387 if (!a) {
388 return NULL;
389 }
390 a->key = key;
391 a->pattern = pattern;
392 return a;
393 }
394
395 /* Extracts all keys from an asdl_seq* of KeyPatternPair*'s */
396 asdl_expr_seq *
_PyPegen_get_pattern_keys(Parser * p,asdl_seq * seq)397 _PyPegen_get_pattern_keys(Parser *p, asdl_seq *seq)
398 {
399 Py_ssize_t len = asdl_seq_LEN(seq);
400 asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
401 if (!new_seq) {
402 return NULL;
403 }
404 for (Py_ssize_t i = 0; i < len; i++) {
405 KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
406 asdl_seq_SET(new_seq, i, pair->key);
407 }
408 return new_seq;
409 }
410
411 /* Extracts all patterns from an asdl_seq* of KeyPatternPair*'s */
412 asdl_pattern_seq *
_PyPegen_get_patterns(Parser * p,asdl_seq * seq)413 _PyPegen_get_patterns(Parser *p, asdl_seq *seq)
414 {
415 Py_ssize_t len = asdl_seq_LEN(seq);
416 asdl_pattern_seq *new_seq = _Py_asdl_pattern_seq_new(len, p->arena);
417 if (!new_seq) {
418 return NULL;
419 }
420 for (Py_ssize_t i = 0; i < len; i++) {
421 KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
422 asdl_seq_SET(new_seq, i, pair->pattern);
423 }
424 return new_seq;
425 }
426
427 /* Constructs a NameDefaultPair */
428 NameDefaultPair *
_PyPegen_name_default_pair(Parser * p,arg_ty arg,expr_ty value,Token * tc)429 _PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value, Token *tc)
430 {
431 NameDefaultPair *a = _PyArena_Malloc(p->arena, sizeof(NameDefaultPair));
432 if (!a) {
433 return NULL;
434 }
435 a->arg = _PyPegen_add_type_comment_to_arg(p, arg, tc);
436 a->value = value;
437 return a;
438 }
439
440 /* Constructs a SlashWithDefault */
441 SlashWithDefault *
_PyPegen_slash_with_default(Parser * p,asdl_arg_seq * plain_names,asdl_seq * names_with_defaults)442 _PyPegen_slash_with_default(Parser *p, asdl_arg_seq *plain_names, asdl_seq *names_with_defaults)
443 {
444 SlashWithDefault *a = _PyArena_Malloc(p->arena, sizeof(SlashWithDefault));
445 if (!a) {
446 return NULL;
447 }
448 a->plain_names = plain_names;
449 a->names_with_defaults = names_with_defaults;
450 return a;
451 }
452
453 /* Constructs a StarEtc */
454 StarEtc *
_PyPegen_star_etc(Parser * p,arg_ty vararg,asdl_seq * kwonlyargs,arg_ty kwarg)455 _PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg)
456 {
457 StarEtc *a = _PyArena_Malloc(p->arena, sizeof(StarEtc));
458 if (!a) {
459 return NULL;
460 }
461 a->vararg = vararg;
462 a->kwonlyargs = kwonlyargs;
463 a->kwarg = kwarg;
464 return a;
465 }
466
467 asdl_seq *
_PyPegen_join_sequences(Parser * p,asdl_seq * a,asdl_seq * b)468 _PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b)
469 {
470 Py_ssize_t first_len = asdl_seq_LEN(a);
471 Py_ssize_t second_len = asdl_seq_LEN(b);
472 asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(first_len + second_len, p->arena);
473 if (!new_seq) {
474 return NULL;
475 }
476
477 int k = 0;
478 for (Py_ssize_t i = 0; i < first_len; i++) {
479 asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(a, i));
480 }
481 for (Py_ssize_t i = 0; i < second_len; i++) {
482 asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(b, i));
483 }
484
485 return new_seq;
486 }
487
488 static asdl_arg_seq*
_get_names(Parser * p,asdl_seq * names_with_defaults)489 _get_names(Parser *p, asdl_seq *names_with_defaults)
490 {
491 Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
492 asdl_arg_seq *seq = _Py_asdl_arg_seq_new(len, p->arena);
493 if (!seq) {
494 return NULL;
495 }
496 for (Py_ssize_t i = 0; i < len; i++) {
497 NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
498 asdl_seq_SET(seq, i, pair->arg);
499 }
500 return seq;
501 }
502
503 static asdl_expr_seq *
_get_defaults(Parser * p,asdl_seq * names_with_defaults)504 _get_defaults(Parser *p, asdl_seq *names_with_defaults)
505 {
506 Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
507 asdl_expr_seq *seq = _Py_asdl_expr_seq_new(len, p->arena);
508 if (!seq) {
509 return NULL;
510 }
511 for (Py_ssize_t i = 0; i < len; i++) {
512 NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
513 asdl_seq_SET(seq, i, pair->value);
514 }
515 return seq;
516 }
517
518 static int
_make_posonlyargs(Parser * p,asdl_arg_seq * slash_without_default,SlashWithDefault * slash_with_default,asdl_arg_seq ** posonlyargs)519 _make_posonlyargs(Parser *p,
520 asdl_arg_seq *slash_without_default,
521 SlashWithDefault *slash_with_default,
522 asdl_arg_seq **posonlyargs) {
523 if (slash_without_default != NULL) {
524 *posonlyargs = slash_without_default;
525 }
526 else if (slash_with_default != NULL) {
527 asdl_arg_seq *slash_with_default_names =
528 _get_names(p, slash_with_default->names_with_defaults);
529 if (!slash_with_default_names) {
530 return -1;
531 }
532 *posonlyargs = (asdl_arg_seq*)_PyPegen_join_sequences(
533 p,
534 (asdl_seq*)slash_with_default->plain_names,
535 (asdl_seq*)slash_with_default_names);
536 }
537 else {
538 *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
539 }
540 return *posonlyargs == NULL ? -1 : 0;
541 }
542
543 static int
_make_posargs(Parser * p,asdl_arg_seq * plain_names,asdl_seq * names_with_default,asdl_arg_seq ** posargs)544 _make_posargs(Parser *p,
545 asdl_arg_seq *plain_names,
546 asdl_seq *names_with_default,
547 asdl_arg_seq **posargs) {
548 if (plain_names != NULL && names_with_default != NULL) {
549 asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default);
550 if (!names_with_default_names) {
551 return -1;
552 }
553 *posargs = (asdl_arg_seq*)_PyPegen_join_sequences(
554 p,(asdl_seq*)plain_names, (asdl_seq*)names_with_default_names);
555 }
556 else if (plain_names == NULL && names_with_default != NULL) {
557 *posargs = _get_names(p, names_with_default);
558 }
559 else if (plain_names != NULL && names_with_default == NULL) {
560 *posargs = plain_names;
561 }
562 else {
563 *posargs = _Py_asdl_arg_seq_new(0, p->arena);
564 }
565 return *posargs == NULL ? -1 : 0;
566 }
567
568 static int
_make_posdefaults(Parser * p,SlashWithDefault * slash_with_default,asdl_seq * names_with_default,asdl_expr_seq ** posdefaults)569 _make_posdefaults(Parser *p,
570 SlashWithDefault *slash_with_default,
571 asdl_seq *names_with_default,
572 asdl_expr_seq **posdefaults) {
573 if (slash_with_default != NULL && names_with_default != NULL) {
574 asdl_expr_seq *slash_with_default_values =
575 _get_defaults(p, slash_with_default->names_with_defaults);
576 if (!slash_with_default_values) {
577 return -1;
578 }
579 asdl_expr_seq *names_with_default_values = _get_defaults(p, names_with_default);
580 if (!names_with_default_values) {
581 return -1;
582 }
583 *posdefaults = (asdl_expr_seq*)_PyPegen_join_sequences(
584 p,
585 (asdl_seq*)slash_with_default_values,
586 (asdl_seq*)names_with_default_values);
587 }
588 else if (slash_with_default == NULL && names_with_default != NULL) {
589 *posdefaults = _get_defaults(p, names_with_default);
590 }
591 else if (slash_with_default != NULL && names_with_default == NULL) {
592 *posdefaults = _get_defaults(p, slash_with_default->names_with_defaults);
593 }
594 else {
595 *posdefaults = _Py_asdl_expr_seq_new(0, p->arena);
596 }
597 return *posdefaults == NULL ? -1 : 0;
598 }
599
600 static int
_make_kwargs(Parser * p,StarEtc * star_etc,asdl_arg_seq ** kwonlyargs,asdl_expr_seq ** kwdefaults)601 _make_kwargs(Parser *p, StarEtc *star_etc,
602 asdl_arg_seq **kwonlyargs,
603 asdl_expr_seq **kwdefaults) {
604 if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
605 *kwonlyargs = _get_names(p, star_etc->kwonlyargs);
606 }
607 else {
608 *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
609 }
610
611 if (*kwonlyargs == NULL) {
612 return -1;
613 }
614
615 if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
616 *kwdefaults = _get_defaults(p, star_etc->kwonlyargs);
617 }
618 else {
619 *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena);
620 }
621
622 if (*kwdefaults == NULL) {
623 return -1;
624 }
625
626 return 0;
627 }
628
629 /* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */
630 arguments_ty
_PyPegen_make_arguments(Parser * p,asdl_arg_seq * slash_without_default,SlashWithDefault * slash_with_default,asdl_arg_seq * plain_names,asdl_seq * names_with_default,StarEtc * star_etc)631 _PyPegen_make_arguments(Parser *p, asdl_arg_seq *slash_without_default,
632 SlashWithDefault *slash_with_default, asdl_arg_seq *plain_names,
633 asdl_seq *names_with_default, StarEtc *star_etc)
634 {
635 asdl_arg_seq *posonlyargs;
636 if (_make_posonlyargs(p, slash_without_default, slash_with_default, &posonlyargs) == -1) {
637 return NULL;
638 }
639
640 asdl_arg_seq *posargs;
641 if (_make_posargs(p, plain_names, names_with_default, &posargs) == -1) {
642 return NULL;
643 }
644
645 asdl_expr_seq *posdefaults;
646 if (_make_posdefaults(p,slash_with_default, names_with_default, &posdefaults) == -1) {
647 return NULL;
648 }
649
650 arg_ty vararg = NULL;
651 if (star_etc != NULL && star_etc->vararg != NULL) {
652 vararg = star_etc->vararg;
653 }
654
655 asdl_arg_seq *kwonlyargs;
656 asdl_expr_seq *kwdefaults;
657 if (_make_kwargs(p, star_etc, &kwonlyargs, &kwdefaults) == -1) {
658 return NULL;
659 }
660
661 arg_ty kwarg = NULL;
662 if (star_etc != NULL && star_etc->kwarg != NULL) {
663 kwarg = star_etc->kwarg;
664 }
665
666 return _PyAST_arguments(posonlyargs, posargs, vararg, kwonlyargs,
667 kwdefaults, kwarg, posdefaults, p->arena);
668 }
669
670
671 /* Constructs an empty arguments_ty object, that gets used when a function accepts no
672 * arguments. */
673 arguments_ty
_PyPegen_empty_arguments(Parser * p)674 _PyPegen_empty_arguments(Parser *p)
675 {
676 asdl_arg_seq *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
677 if (!posonlyargs) {
678 return NULL;
679 }
680 asdl_arg_seq *posargs = _Py_asdl_arg_seq_new(0, p->arena);
681 if (!posargs) {
682 return NULL;
683 }
684 asdl_expr_seq *posdefaults = _Py_asdl_expr_seq_new(0, p->arena);
685 if (!posdefaults) {
686 return NULL;
687 }
688 asdl_arg_seq *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
689 if (!kwonlyargs) {
690 return NULL;
691 }
692 asdl_expr_seq *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena);
693 if (!kwdefaults) {
694 return NULL;
695 }
696
697 return _PyAST_arguments(posonlyargs, posargs, NULL, kwonlyargs,
698 kwdefaults, NULL, posdefaults, p->arena);
699 }
700
701 /* Encapsulates the value of an operator_ty into an AugOperator struct */
702 AugOperator *
_PyPegen_augoperator(Parser * p,operator_ty kind)703 _PyPegen_augoperator(Parser *p, operator_ty kind)
704 {
705 AugOperator *a = _PyArena_Malloc(p->arena, sizeof(AugOperator));
706 if (!a) {
707 return NULL;
708 }
709 a->kind = kind;
710 return a;
711 }
712
713 /* Construct a FunctionDef equivalent to function_def, but with decorators */
714 stmt_ty
_PyPegen_function_def_decorators(Parser * p,asdl_expr_seq * decorators,stmt_ty function_def)715 _PyPegen_function_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty function_def)
716 {
717 assert(function_def != NULL);
718 if (function_def->kind == AsyncFunctionDef_kind) {
719 return _PyAST_AsyncFunctionDef(
720 function_def->v.AsyncFunctionDef.name,
721 function_def->v.AsyncFunctionDef.args,
722 function_def->v.AsyncFunctionDef.body, decorators,
723 function_def->v.AsyncFunctionDef.returns,
724 function_def->v.AsyncFunctionDef.type_comment,
725 function_def->v.AsyncFunctionDef.type_params,
726 function_def->lineno, function_def->col_offset,
727 function_def->end_lineno, function_def->end_col_offset, p->arena);
728 }
729
730 return _PyAST_FunctionDef(
731 function_def->v.FunctionDef.name,
732 function_def->v.FunctionDef.args,
733 function_def->v.FunctionDef.body, decorators,
734 function_def->v.FunctionDef.returns,
735 function_def->v.FunctionDef.type_comment,
736 function_def->v.FunctionDef.type_params,
737 function_def->lineno, function_def->col_offset,
738 function_def->end_lineno, function_def->end_col_offset, p->arena);
739 }
740
741 /* Construct a ClassDef equivalent to class_def, but with decorators */
742 stmt_ty
_PyPegen_class_def_decorators(Parser * p,asdl_expr_seq * decorators,stmt_ty class_def)743 _PyPegen_class_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty class_def)
744 {
745 assert(class_def != NULL);
746 return _PyAST_ClassDef(
747 class_def->v.ClassDef.name,
748 class_def->v.ClassDef.bases, class_def->v.ClassDef.keywords,
749 class_def->v.ClassDef.body, decorators,
750 class_def->v.ClassDef.type_params,
751 class_def->lineno, class_def->col_offset, class_def->end_lineno,
752 class_def->end_col_offset, p->arena);
753 }
754
755 /* Construct a KeywordOrStarred */
756 KeywordOrStarred *
_PyPegen_keyword_or_starred(Parser * p,void * element,int is_keyword)757 _PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword)
758 {
759 KeywordOrStarred *a = _PyArena_Malloc(p->arena, sizeof(KeywordOrStarred));
760 if (!a) {
761 return NULL;
762 }
763 a->element = element;
764 a->is_keyword = is_keyword;
765 return a;
766 }
767
768 /* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */
769 static int
_seq_number_of_starred_exprs(asdl_seq * seq)770 _seq_number_of_starred_exprs(asdl_seq *seq)
771 {
772 int n = 0;
773 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
774 KeywordOrStarred *k = asdl_seq_GET_UNTYPED(seq, i);
775 if (!k->is_keyword) {
776 n++;
777 }
778 }
779 return n;
780 }
781
782 /* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */
783 asdl_expr_seq *
_PyPegen_seq_extract_starred_exprs(Parser * p,asdl_seq * kwargs)784 _PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs)
785 {
786 int new_len = _seq_number_of_starred_exprs(kwargs);
787 if (new_len == 0) {
788 return NULL;
789 }
790 asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(new_len, p->arena);
791 if (!new_seq) {
792 return NULL;
793 }
794
795 int idx = 0;
796 for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) {
797 KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
798 if (!k->is_keyword) {
799 asdl_seq_SET(new_seq, idx++, k->element);
800 }
801 }
802 return new_seq;
803 }
804
805 /* Return a new asdl_seq* with only the keywords in kwargs */
806 asdl_keyword_seq*
_PyPegen_seq_delete_starred_exprs(Parser * p,asdl_seq * kwargs)807 _PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
808 {
809 Py_ssize_t len = asdl_seq_LEN(kwargs);
810 Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs);
811 if (new_len == 0) {
812 return NULL;
813 }
814 asdl_keyword_seq *new_seq = _Py_asdl_keyword_seq_new(new_len, p->arena);
815 if (!new_seq) {
816 return NULL;
817 }
818
819 int idx = 0;
820 for (Py_ssize_t i = 0; i < len; i++) {
821 KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
822 if (k->is_keyword) {
823 asdl_seq_SET(new_seq, idx++, k->element);
824 }
825 }
826 return new_seq;
827 }
828
829 expr_ty
_PyPegen_ensure_imaginary(Parser * p,expr_ty exp)830 _PyPegen_ensure_imaginary(Parser *p, expr_ty exp)
831 {
832 if (exp->kind != Constant_kind || !PyComplex_CheckExact(exp->v.Constant.value)) {
833 RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "imaginary number required in complex literal");
834 return NULL;
835 }
836 return exp;
837 }
838
839 expr_ty
_PyPegen_ensure_real(Parser * p,expr_ty exp)840 _PyPegen_ensure_real(Parser *p, expr_ty exp)
841 {
842 if (exp->kind != Constant_kind || PyComplex_CheckExact(exp->v.Constant.value)) {
843 RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "real number required in complex literal");
844 return NULL;
845 }
846 return exp;
847 }
848
849 mod_ty
_PyPegen_make_module(Parser * p,asdl_stmt_seq * a)850 _PyPegen_make_module(Parser *p, asdl_stmt_seq *a) {
851 asdl_type_ignore_seq *type_ignores = NULL;
852 Py_ssize_t num = p->type_ignore_comments.num_items;
853 if (num > 0) {
854 // Turn the raw (comment, lineno) pairs into TypeIgnore objects in the arena
855 type_ignores = _Py_asdl_type_ignore_seq_new(num, p->arena);
856 if (type_ignores == NULL) {
857 return NULL;
858 }
859 for (int i = 0; i < num; i++) {
860 PyObject *tag = _PyPegen_new_type_comment(p, p->type_ignore_comments.items[i].comment);
861 if (tag == NULL) {
862 return NULL;
863 }
864 type_ignore_ty ti = _PyAST_TypeIgnore(p->type_ignore_comments.items[i].lineno,
865 tag, p->arena);
866 if (ti == NULL) {
867 return NULL;
868 }
869 asdl_seq_SET(type_ignores, i, ti);
870 }
871 }
872 return _PyAST_Module(a, type_ignores, p->arena);
873 }
874
875 PyObject *
_PyPegen_new_type_comment(Parser * p,const char * s)876 _PyPegen_new_type_comment(Parser *p, const char *s)
877 {
878 PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
879 if (res == NULL) {
880 return NULL;
881 }
882 if (_PyArena_AddPyObject(p->arena, res) < 0) {
883 Py_DECREF(res);
884 return NULL;
885 }
886 return res;
887 }
888
889 arg_ty
_PyPegen_add_type_comment_to_arg(Parser * p,arg_ty a,Token * tc)890 _PyPegen_add_type_comment_to_arg(Parser *p, arg_ty a, Token *tc)
891 {
892 if (tc == NULL) {
893 return a;
894 }
895 const char *bytes = PyBytes_AsString(tc->bytes);
896 if (bytes == NULL) {
897 return NULL;
898 }
899 PyObject *tco = _PyPegen_new_type_comment(p, bytes);
900 if (tco == NULL) {
901 return NULL;
902 }
903 return _PyAST_arg(a->arg, a->annotation, tco,
904 a->lineno, a->col_offset, a->end_lineno, a->end_col_offset,
905 p->arena);
906 }
907
908 /* Checks if the NOTEQUAL token is valid given the current parser flags
909 0 indicates success and nonzero indicates failure (an exception may be set) */
910 int
_PyPegen_check_barry_as_flufl(Parser * p,Token * t)911 _PyPegen_check_barry_as_flufl(Parser *p, Token* t) {
912 assert(t->bytes != NULL);
913 assert(t->type == NOTEQUAL);
914
915 const char* tok_str = PyBytes_AS_STRING(t->bytes);
916 if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>") != 0) {
917 RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='");
918 return -1;
919 }
920 if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) {
921 return strcmp(tok_str, "!=");
922 }
923 return 0;
924 }
925
926 int
_PyPegen_check_legacy_stmt(Parser * p,expr_ty name)927 _PyPegen_check_legacy_stmt(Parser *p, expr_ty name) {
928 if (name->kind != Name_kind) {
929 return 0;
930 }
931 const char* candidates[2] = {"print", "exec"};
932 for (int i=0; i<2; i++) {
933 if (PyUnicode_CompareWithASCIIString(name->v.Name.id, candidates[i]) == 0) {
934 return 1;
935 }
936 }
937 return 0;
938 }
939
940 static ResultTokenWithMetadata *
result_token_with_metadata(Parser * p,void * result,PyObject * metadata)941 result_token_with_metadata(Parser *p, void *result, PyObject *metadata)
942 {
943 ResultTokenWithMetadata *res = _PyArena_Malloc(p->arena, sizeof(ResultTokenWithMetadata));
944 if (res == NULL) {
945 return NULL;
946 }
947 res->metadata = metadata;
948 res->result = result;
949 return res;
950 }
951
952 ResultTokenWithMetadata *
_PyPegen_check_fstring_conversion(Parser * p,Token * conv_token,expr_ty conv)953 _PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
954 {
955 if (conv_token->lineno != conv->lineno || conv_token->end_col_offset != conv->col_offset) {
956 return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
957 conv_token, conv,
958 "f-string: conversion type must come right after the exclamanation mark"
959 );
960 }
961 return result_token_with_metadata(p, conv, conv_token->metadata);
962 }
963
964 static asdl_expr_seq *
965 unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions);
966 ResultTokenWithMetadata *
_PyPegen_setup_full_format_spec(Parser * p,Token * colon,asdl_expr_seq * spec,int lineno,int col_offset,int end_lineno,int end_col_offset,PyArena * arena)967 _PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset,
968 int end_lineno, int end_col_offset, PyArena *arena)
969 {
970 if (!spec) {
971 return NULL;
972 }
973
974 // This is needed to keep compatibility with 3.11, where an empty format
975 // spec is parsed as an *empty* JoinedStr node, instead of having an empty
976 // constant in it.
977 Py_ssize_t n_items = asdl_seq_LEN(spec);
978 Py_ssize_t non_empty_count = 0;
979 for (Py_ssize_t i = 0; i < n_items; i++) {
980 expr_ty item = asdl_seq_GET(spec, i);
981 non_empty_count += !(item->kind == Constant_kind &&
982 PyUnicode_CheckExact(item->v.Constant.value) &&
983 PyUnicode_GET_LENGTH(item->v.Constant.value) == 0);
984 }
985 if (non_empty_count != n_items) {
986 asdl_expr_seq *resized_spec =
987 _Py_asdl_expr_seq_new(non_empty_count, p->arena);
988 if (resized_spec == NULL) {
989 return NULL;
990 }
991 Py_ssize_t j = 0;
992 for (Py_ssize_t i = 0; i < n_items; i++) {
993 expr_ty item = asdl_seq_GET(spec, i);
994 if (item->kind == Constant_kind &&
995 PyUnicode_CheckExact(item->v.Constant.value) &&
996 PyUnicode_GET_LENGTH(item->v.Constant.value) == 0) {
997 continue;
998 }
999 asdl_seq_SET(resized_spec, j++, item);
1000 }
1001 assert(j == non_empty_count);
1002 spec = resized_spec;
1003 }
1004 expr_ty res;
1005 Py_ssize_t n = asdl_seq_LEN(spec);
1006 if (n == 0 || (n == 1 && asdl_seq_GET(spec, 0)->kind == Constant_kind)) {
1007 res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno,
1008 end_col_offset, p->arena);
1009 } else {
1010 res = _PyPegen_concatenate_strings(p, spec,
1011 lineno, col_offset, end_lineno,
1012 end_col_offset, arena);
1013 }
1014 if (!res) {
1015 return NULL;
1016 }
1017 return result_token_with_metadata(p, res, colon->metadata);
1018 }
1019
1020 const char *
_PyPegen_get_expr_name(expr_ty e)1021 _PyPegen_get_expr_name(expr_ty e)
1022 {
1023 assert(e != NULL);
1024 switch (e->kind) {
1025 case Attribute_kind:
1026 return "attribute";
1027 case Subscript_kind:
1028 return "subscript";
1029 case Starred_kind:
1030 return "starred";
1031 case Name_kind:
1032 return "name";
1033 case List_kind:
1034 return "list";
1035 case Tuple_kind:
1036 return "tuple";
1037 case Lambda_kind:
1038 return "lambda";
1039 case Call_kind:
1040 return "function call";
1041 case BoolOp_kind:
1042 case BinOp_kind:
1043 case UnaryOp_kind:
1044 return "expression";
1045 case GeneratorExp_kind:
1046 return "generator expression";
1047 case Yield_kind:
1048 case YieldFrom_kind:
1049 return "yield expression";
1050 case Await_kind:
1051 return "await expression";
1052 case ListComp_kind:
1053 return "list comprehension";
1054 case SetComp_kind:
1055 return "set comprehension";
1056 case DictComp_kind:
1057 return "dict comprehension";
1058 case Dict_kind:
1059 return "dict literal";
1060 case Set_kind:
1061 return "set display";
1062 case JoinedStr_kind:
1063 case FormattedValue_kind:
1064 return "f-string expression";
1065 case Constant_kind: {
1066 PyObject *value = e->v.Constant.value;
1067 if (value == Py_None) {
1068 return "None";
1069 }
1070 if (value == Py_False) {
1071 return "False";
1072 }
1073 if (value == Py_True) {
1074 return "True";
1075 }
1076 if (value == Py_Ellipsis) {
1077 return "ellipsis";
1078 }
1079 return "literal";
1080 }
1081 case Compare_kind:
1082 return "comparison";
1083 case IfExp_kind:
1084 return "conditional expression";
1085 case NamedExpr_kind:
1086 return "named expression";
1087 default:
1088 PyErr_Format(PyExc_SystemError,
1089 "unexpected expression in assignment %d (line %d)",
1090 e->kind, e->lineno);
1091 return NULL;
1092 }
1093 }
1094
1095 expr_ty
_PyPegen_get_last_comprehension_item(comprehension_ty comprehension)1096 _PyPegen_get_last_comprehension_item(comprehension_ty comprehension) {
1097 if (comprehension->ifs == NULL || asdl_seq_LEN(comprehension->ifs) == 0) {
1098 return comprehension->iter;
1099 }
1100 return PyPegen_last_item(comprehension->ifs, expr_ty);
1101 }
1102
_PyPegen_collect_call_seqs(Parser * p,asdl_expr_seq * a,asdl_seq * b,int lineno,int col_offset,int end_lineno,int end_col_offset,PyArena * arena)1103 expr_ty _PyPegen_collect_call_seqs(Parser *p, asdl_expr_seq *a, asdl_seq *b,
1104 int lineno, int col_offset, int end_lineno,
1105 int end_col_offset, PyArena *arena) {
1106 Py_ssize_t args_len = asdl_seq_LEN(a);
1107 Py_ssize_t total_len = args_len;
1108
1109 if (b == NULL) {
1110 return _PyAST_Call(_PyPegen_dummy_name(p), a, NULL, lineno, col_offset,
1111 end_lineno, end_col_offset, arena);
1112
1113 }
1114
1115 asdl_expr_seq *starreds = _PyPegen_seq_extract_starred_exprs(p, b);
1116 asdl_keyword_seq *keywords = _PyPegen_seq_delete_starred_exprs(p, b);
1117
1118 if (starreds) {
1119 total_len += asdl_seq_LEN(starreds);
1120 }
1121
1122 asdl_expr_seq *args = _Py_asdl_expr_seq_new(total_len, arena);
1123 if (args == NULL) {
1124 return NULL;
1125 }
1126
1127 Py_ssize_t i = 0;
1128 for (i = 0; i < args_len; i++) {
1129 asdl_seq_SET(args, i, asdl_seq_GET(a, i));
1130 }
1131 for (; i < total_len; i++) {
1132 asdl_seq_SET(args, i, asdl_seq_GET(starreds, i - args_len));
1133 }
1134
1135 return _PyAST_Call(_PyPegen_dummy_name(p), args, keywords, lineno,
1136 col_offset, end_lineno, end_col_offset, arena);
1137 }
1138
1139 // AST Error reporting helpers
1140
1141 expr_ty
_PyPegen_get_invalid_target(expr_ty e,TARGETS_TYPE targets_type)1142 _PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type)
1143 {
1144 if (e == NULL) {
1145 return NULL;
1146 }
1147
1148 #define VISIT_CONTAINER(CONTAINER, TYPE) do { \
1149 Py_ssize_t len = asdl_seq_LEN((CONTAINER)->v.TYPE.elts);\
1150 for (Py_ssize_t i = 0; i < len; i++) {\
1151 expr_ty other = asdl_seq_GET((CONTAINER)->v.TYPE.elts, i);\
1152 expr_ty child = _PyPegen_get_invalid_target(other, targets_type);\
1153 if (child != NULL) {\
1154 return child;\
1155 }\
1156 }\
1157 } while (0)
1158
1159 // We only need to visit List and Tuple nodes recursively as those
1160 // are the only ones that can contain valid names in targets when
1161 // they are parsed as expressions. Any other kind of expression
1162 // that is a container (like Sets or Dicts) is directly invalid and
1163 // we don't need to visit it recursively.
1164
1165 switch (e->kind) {
1166 case List_kind:
1167 VISIT_CONTAINER(e, List);
1168 return NULL;
1169 case Tuple_kind:
1170 VISIT_CONTAINER(e, Tuple);
1171 return NULL;
1172 case Starred_kind:
1173 if (targets_type == DEL_TARGETS) {
1174 return e;
1175 }
1176 return _PyPegen_get_invalid_target(e->v.Starred.value, targets_type);
1177 case Compare_kind:
1178 // This is needed, because the `a in b` in `for a in b` gets parsed
1179 // as a comparison, and so we need to search the left side of the comparison
1180 // for invalid targets.
1181 if (targets_type == FOR_TARGETS) {
1182 cmpop_ty cmpop = (cmpop_ty) asdl_seq_GET(e->v.Compare.ops, 0);
1183 if (cmpop == In) {
1184 return _PyPegen_get_invalid_target(e->v.Compare.left, targets_type);
1185 }
1186 return NULL;
1187 }
1188 return e;
1189 case Name_kind:
1190 case Subscript_kind:
1191 case Attribute_kind:
1192 return NULL;
1193 default:
1194 return e;
1195 }
1196 }
1197
_PyPegen_arguments_parsing_error(Parser * p,expr_ty e)1198 void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) {
1199 int kwarg_unpacking = 0;
1200 for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) {
1201 keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i);
1202 if (!keyword->arg) {
1203 kwarg_unpacking = 1;
1204 }
1205 }
1206
1207 const char *msg = NULL;
1208 if (kwarg_unpacking) {
1209 msg = "positional argument follows keyword argument unpacking";
1210 } else {
1211 msg = "positional argument follows keyword argument";
1212 }
1213
1214 return RAISE_SYNTAX_ERROR(msg);
1215 }
1216
1217 void *
_PyPegen_nonparen_genexp_in_call(Parser * p,expr_ty args,asdl_comprehension_seq * comprehensions)1218 _PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions)
1219 {
1220 /* The rule that calls this function is 'args for_if_clauses'.
1221 For the input f(L, x for x in y), L and x are in args and
1222 the for is parsed as a for_if_clause. We have to check if
1223 len <= 1, so that input like dict((a, b) for a, b in x)
1224 gets successfully parsed and then we pass the last
1225 argument (x in the above example) as the location of the
1226 error */
1227 Py_ssize_t len = asdl_seq_LEN(args->v.Call.args);
1228 if (len <= 1) {
1229 return NULL;
1230 }
1231
1232 comprehension_ty last_comprehension = PyPegen_last_item(comprehensions, comprehension_ty);
1233
1234 return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
1235 (expr_ty) asdl_seq_GET(args->v.Call.args, len - 1),
1236 _PyPegen_get_last_comprehension_item(last_comprehension),
1237 "Generator expression must be parenthesized"
1238 );
1239 }
1240
1241 // Fstring stuff
1242
1243 static expr_ty
_PyPegen_decode_fstring_part(Parser * p,int is_raw,expr_ty constant,Token * token)1244 _PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant, Token* token) {
1245 assert(PyUnicode_CheckExact(constant->v.Constant.value));
1246
1247 const char* bstr = PyUnicode_AsUTF8(constant->v.Constant.value);
1248 if (bstr == NULL) {
1249 return NULL;
1250 }
1251
1252 size_t len;
1253 if (strcmp(bstr, "{{") == 0 || strcmp(bstr, "}}") == 0) {
1254 len = 1;
1255 } else {
1256 len = strlen(bstr);
1257 }
1258
1259 is_raw = is_raw || strchr(bstr, '\\') == NULL;
1260 PyObject *str = _PyPegen_decode_string(p, is_raw, bstr, len, token);
1261 if (str == NULL) {
1262 _Pypegen_raise_decode_error(p);
1263 return NULL;
1264 }
1265 if (_PyArena_AddPyObject(p->arena, str) < 0) {
1266 Py_DECREF(str);
1267 return NULL;
1268 }
1269 return _PyAST_Constant(str, NULL, constant->lineno, constant->col_offset,
1270 constant->end_lineno, constant->end_col_offset,
1271 p->arena);
1272 }
1273
1274 static asdl_expr_seq *
unpack_top_level_joined_strs(Parser * p,asdl_expr_seq * raw_expressions)1275 unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions)
1276 {
1277 /* The parser might put multiple f-string values into an individual
1278 * JoinedStr node at the top level due to stuff like f-string debugging
1279 * expressions. This function flattens those and promotes them to the
1280 * upper level. Only simplifies AST, but the compiler already takes care
1281 * of the regular output, so this is not necessary if you are not going
1282 * to expose the output AST to Python level. */
1283
1284 Py_ssize_t i, req_size, raw_size;
1285
1286 req_size = raw_size = asdl_seq_LEN(raw_expressions);
1287 expr_ty expr;
1288 for (i = 0; i < raw_size; i++) {
1289 expr = asdl_seq_GET(raw_expressions, i);
1290 if (expr->kind == JoinedStr_kind) {
1291 req_size += asdl_seq_LEN(expr->v.JoinedStr.values) - 1;
1292 }
1293 }
1294
1295 asdl_expr_seq *expressions = _Py_asdl_expr_seq_new(req_size, p->arena);
1296 if (expressions == NULL) {
1297 return NULL;
1298 }
1299
1300 Py_ssize_t raw_index, req_index = 0;
1301 for (raw_index = 0; raw_index < raw_size; raw_index++) {
1302 expr = asdl_seq_GET(raw_expressions, raw_index);
1303 if (expr->kind == JoinedStr_kind) {
1304 asdl_expr_seq *values = expr->v.JoinedStr.values;
1305 for (Py_ssize_t n = 0; n < asdl_seq_LEN(values); n++) {
1306 asdl_seq_SET(expressions, req_index, asdl_seq_GET(values, n));
1307 req_index++;
1308 }
1309 } else {
1310 asdl_seq_SET(expressions, req_index, expr);
1311 req_index++;
1312 }
1313 }
1314 return expressions;
1315 }
1316
1317 expr_ty
_PyPegen_joined_str(Parser * p,Token * a,asdl_expr_seq * raw_expressions,Token * b)1318 _PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b) {
1319
1320 asdl_expr_seq *expr = unpack_top_level_joined_strs(p, raw_expressions);
1321 Py_ssize_t n_items = asdl_seq_LEN(expr);
1322
1323 const char* quote_str = PyBytes_AsString(a->bytes);
1324 if (quote_str == NULL) {
1325 return NULL;
1326 }
1327 int is_raw = strpbrk(quote_str, "rR") != NULL;
1328
1329 asdl_expr_seq *seq = _Py_asdl_expr_seq_new(n_items, p->arena);
1330 if (seq == NULL) {
1331 return NULL;
1332 }
1333
1334 Py_ssize_t index = 0;
1335 for (Py_ssize_t i = 0; i < n_items; i++) {
1336 expr_ty item = asdl_seq_GET(expr, i);
1337 if (item->kind == Constant_kind) {
1338 item = _PyPegen_decode_fstring_part(p, is_raw, item, b);
1339 if (item == NULL) {
1340 return NULL;
1341 }
1342
1343 /* Tokenizer emits string parts even when the underlying string
1344 might become an empty value (e.g. FSTRING_MIDDLE with the value \\n)
1345 so we need to check for them and simplify it here. */
1346 if (PyUnicode_CheckExact(item->v.Constant.value)
1347 && PyUnicode_GET_LENGTH(item->v.Constant.value) == 0) {
1348 continue;
1349 }
1350 }
1351 asdl_seq_SET(seq, index++, item);
1352 }
1353
1354 asdl_expr_seq *resized_exprs;
1355 if (index != n_items) {
1356 resized_exprs = _Py_asdl_expr_seq_new(index, p->arena);
1357 if (resized_exprs == NULL) {
1358 return NULL;
1359 }
1360 for (Py_ssize_t i = 0; i < index; i++) {
1361 asdl_seq_SET(resized_exprs, i, asdl_seq_GET(seq, i));
1362 }
1363 }
1364 else {
1365 resized_exprs = seq;
1366 }
1367
1368 return _PyAST_JoinedStr(resized_exprs, a->lineno, a->col_offset,
1369 b->end_lineno, b->end_col_offset,
1370 p->arena);
1371 }
1372
_PyPegen_decoded_constant_from_token(Parser * p,Token * tok)1373 expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok) {
1374 Py_ssize_t bsize;
1375 char* bstr;
1376 if (PyBytes_AsStringAndSize(tok->bytes, &bstr, &bsize) == -1) {
1377 return NULL;
1378 }
1379 PyObject* str = _PyPegen_decode_string(p, 0, bstr, bsize, tok);
1380 if (str == NULL) {
1381 return NULL;
1382 }
1383 if (_PyArena_AddPyObject(p->arena, str) < 0) {
1384 Py_DECREF(str);
1385 return NULL;
1386 }
1387 return _PyAST_Constant(str, NULL, tok->lineno, tok->col_offset,
1388 tok->end_lineno, tok->end_col_offset,
1389 p->arena);
1390 }
1391
_PyPegen_constant_from_token(Parser * p,Token * tok)1392 expr_ty _PyPegen_constant_from_token(Parser* p, Token* tok) {
1393 char* bstr = PyBytes_AsString(tok->bytes);
1394 if (bstr == NULL) {
1395 return NULL;
1396 }
1397 PyObject* str = PyUnicode_FromString(bstr);
1398 if (str == NULL) {
1399 return NULL;
1400 }
1401 if (_PyArena_AddPyObject(p->arena, str) < 0) {
1402 Py_DECREF(str);
1403 return NULL;
1404 }
1405 return _PyAST_Constant(str, NULL, tok->lineno, tok->col_offset,
1406 tok->end_lineno, tok->end_col_offset,
1407 p->arena);
1408 }
1409
_PyPegen_constant_from_string(Parser * p,Token * tok)1410 expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok) {
1411 char* the_str = PyBytes_AsString(tok->bytes);
1412 if (the_str == NULL) {
1413 return NULL;
1414 }
1415 PyObject *s = _PyPegen_parse_string(p, tok);
1416 if (s == NULL) {
1417 _Pypegen_raise_decode_error(p);
1418 return NULL;
1419 }
1420 if (_PyArena_AddPyObject(p->arena, s) < 0) {
1421 Py_DECREF(s);
1422 return NULL;
1423 }
1424 PyObject *kind = NULL;
1425 if (the_str && the_str[0] == 'u') {
1426 kind = _PyPegen_new_identifier(p, "u");
1427 if (kind == NULL) {
1428 return NULL;
1429 }
1430 }
1431 return _PyAST_Constant(s, kind, tok->lineno, tok->col_offset, tok->end_lineno, tok->end_col_offset, p->arena);
1432 }
1433
_PyPegen_formatted_value(Parser * p,expr_ty expression,Token * debug,ResultTokenWithMetadata * conversion,ResultTokenWithMetadata * format,Token * closing_brace,int lineno,int col_offset,int end_lineno,int end_col_offset,PyArena * arena)1434 expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ResultTokenWithMetadata *conversion,
1435 ResultTokenWithMetadata *format, Token *closing_brace, int lineno, int col_offset,
1436 int end_lineno, int end_col_offset, PyArena *arena) {
1437 int conversion_val = -1;
1438 if (conversion != NULL) {
1439 expr_ty conversion_expr = (expr_ty) conversion->result;
1440 assert(conversion_expr->kind == Name_kind);
1441 Py_UCS4 first = PyUnicode_READ_CHAR(conversion_expr->v.Name.id, 0);
1442
1443 if (PyUnicode_GET_LENGTH(conversion_expr->v.Name.id) > 1 ||
1444 !(first == 's' || first == 'r' || first == 'a')) {
1445 RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conversion_expr,
1446 "f-string: invalid conversion character %R: expected 's', 'r', or 'a'",
1447 conversion_expr->v.Name.id);
1448 return NULL;
1449 }
1450
1451 conversion_val = Py_SAFE_DOWNCAST(first, Py_UCS4, int);
1452 }
1453 else if (debug && !format) {
1454 /* If no conversion is specified, use !r for debug expressions */
1455 conversion_val = (int)'r';
1456 }
1457
1458 expr_ty formatted_value = _PyAST_FormattedValue(
1459 expression, conversion_val, format ? (expr_ty) format->result : NULL,
1460 lineno, col_offset, end_lineno,
1461 end_col_offset, arena
1462 );
1463
1464 if (debug) {
1465 /* Find the non whitespace token after the "=" */
1466 int debug_end_line, debug_end_offset;
1467 PyObject *debug_metadata;
1468
1469 if (conversion) {
1470 debug_end_line = ((expr_ty) conversion->result)->lineno;
1471 debug_end_offset = ((expr_ty) conversion->result)->col_offset;
1472 debug_metadata = conversion->metadata;
1473 }
1474 else if (format) {
1475 debug_end_line = ((expr_ty) format->result)->lineno;
1476 debug_end_offset = ((expr_ty) format->result)->col_offset + 1;
1477 debug_metadata = format->metadata;
1478 }
1479 else {
1480 debug_end_line = end_lineno;
1481 debug_end_offset = end_col_offset;
1482 debug_metadata = closing_brace->metadata;
1483 }
1484 expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
1485 debug_end_offset - 1, p->arena);
1486 if (!debug_text) {
1487 return NULL;
1488 }
1489
1490 asdl_expr_seq *values = _Py_asdl_expr_seq_new(2, arena);
1491 if (values == NULL) {
1492 return NULL;
1493 }
1494 asdl_seq_SET(values, 0, debug_text);
1495 asdl_seq_SET(values, 1, formatted_value);
1496 return _PyAST_JoinedStr(values, lineno, col_offset, debug_end_line, debug_end_offset, p->arena);
1497 }
1498 else {
1499 return formatted_value;
1500 }
1501 }
1502
1503 expr_ty
_PyPegen_concatenate_strings(Parser * p,asdl_expr_seq * strings,int lineno,int col_offset,int end_lineno,int end_col_offset,PyArena * arena)1504 _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
1505 int lineno, int col_offset, int end_lineno,
1506 int end_col_offset, PyArena *arena)
1507 {
1508 Py_ssize_t len = asdl_seq_LEN(strings);
1509 assert(len > 0);
1510
1511 int f_string_found = 0;
1512 int unicode_string_found = 0;
1513 int bytes_found = 0;
1514
1515 Py_ssize_t i = 0;
1516 Py_ssize_t n_flattened_elements = 0;
1517 for (i = 0; i < len; i++) {
1518 expr_ty elem = asdl_seq_GET(strings, i);
1519 switch(elem->kind) {
1520 case Constant_kind:
1521 if (PyBytes_CheckExact(elem->v.Constant.value)) {
1522 bytes_found = 1;
1523 } else {
1524 unicode_string_found = 1;
1525 }
1526 n_flattened_elements++;
1527 break;
1528 case JoinedStr_kind:
1529 n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values);
1530 f_string_found = 1;
1531 break;
1532 default:
1533 n_flattened_elements++;
1534 f_string_found = 1;
1535 break;
1536 }
1537 }
1538
1539 if ((unicode_string_found || f_string_found) && bytes_found) {
1540 RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
1541 return NULL;
1542 }
1543
1544 if (bytes_found) {
1545 PyObject* res = PyBytes_FromString("");
1546
1547 /* Bytes literals never get a kind, but just for consistency
1548 since they are represented as Constant nodes, we'll mirror
1549 the same behavior as unicode strings for determining the
1550 kind. */
1551 PyObject* kind = asdl_seq_GET(strings, 0)->v.Constant.kind;
1552 for (i = 0; i < len; i++) {
1553 expr_ty elem = asdl_seq_GET(strings, i);
1554 PyBytes_Concat(&res, elem->v.Constant.value);
1555 }
1556 if (!res || _PyArena_AddPyObject(arena, res) < 0) {
1557 Py_XDECREF(res);
1558 return NULL;
1559 }
1560 return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, end_col_offset, p->arena);
1561 }
1562
1563 if (!f_string_found && len == 1) {
1564 return asdl_seq_GET(strings, 0);
1565 }
1566
1567 asdl_expr_seq* flattened = _Py_asdl_expr_seq_new(n_flattened_elements, p->arena);
1568 if (flattened == NULL) {
1569 return NULL;
1570 }
1571
1572 /* build flattened list */
1573 Py_ssize_t current_pos = 0;
1574 Py_ssize_t j = 0;
1575 for (i = 0; i < len; i++) {
1576 expr_ty elem = asdl_seq_GET(strings, i);
1577 switch(elem->kind) {
1578 case JoinedStr_kind:
1579 for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
1580 expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j);
1581 if (subvalue == NULL) {
1582 return NULL;
1583 }
1584 asdl_seq_SET(flattened, current_pos++, subvalue);
1585 }
1586 break;
1587 default:
1588 asdl_seq_SET(flattened, current_pos++, elem);
1589 break;
1590 }
1591 }
1592
1593 /* calculate folded element count */
1594 Py_ssize_t n_elements = 0;
1595 int prev_is_constant = 0;
1596 for (i = 0; i < n_flattened_elements; i++) {
1597 expr_ty elem = asdl_seq_GET(flattened, i);
1598
1599 /* The concatenation of a FormattedValue and an empty Contant should
1600 lead to the FormattedValue itself. Thus, we will not take any empty
1601 constants into account, just as in `_PyPegen_joined_str` */
1602 if (f_string_found && elem->kind == Constant_kind &&
1603 PyUnicode_CheckExact(elem->v.Constant.value) &&
1604 PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0)
1605 continue;
1606
1607 if (!prev_is_constant || elem->kind != Constant_kind) {
1608 n_elements++;
1609 }
1610 prev_is_constant = elem->kind == Constant_kind;
1611 }
1612
1613 asdl_expr_seq* values = _Py_asdl_expr_seq_new(n_elements, p->arena);
1614 if (values == NULL) {
1615 return NULL;
1616 }
1617
1618 /* build folded list */
1619 _PyUnicodeWriter writer;
1620 current_pos = 0;
1621 for (i = 0; i < n_flattened_elements; i++) {
1622 expr_ty elem = asdl_seq_GET(flattened, i);
1623
1624 /* if the current elem and the following are constants,
1625 fold them and all consequent constants */
1626 if (elem->kind == Constant_kind) {
1627 if (i + 1 < n_flattened_elements &&
1628 asdl_seq_GET(flattened, i + 1)->kind == Constant_kind) {
1629 expr_ty first_elem = elem;
1630
1631 /* When a string is getting concatenated, the kind of the string
1632 is determined by the first string in the concatenation
1633 sequence.
1634
1635 u"abc" "def" -> u"abcdef"
1636 "abc" u"abc" -> "abcabc" */
1637 PyObject *kind = elem->v.Constant.kind;
1638
1639 _PyUnicodeWriter_Init(&writer);
1640 expr_ty last_elem = elem;
1641 for (j = i; j < n_flattened_elements; j++) {
1642 expr_ty current_elem = asdl_seq_GET(flattened, j);
1643 if (current_elem->kind == Constant_kind) {
1644 if (_PyUnicodeWriter_WriteStr(
1645 &writer, current_elem->v.Constant.value)) {
1646 _PyUnicodeWriter_Dealloc(&writer);
1647 return NULL;
1648 }
1649 last_elem = current_elem;
1650 } else {
1651 break;
1652 }
1653 }
1654 i = j - 1;
1655
1656 PyObject *concat_str = _PyUnicodeWriter_Finish(&writer);
1657 if (concat_str == NULL) {
1658 _PyUnicodeWriter_Dealloc(&writer);
1659 return NULL;
1660 }
1661 if (_PyArena_AddPyObject(p->arena, concat_str) < 0) {
1662 Py_DECREF(concat_str);
1663 return NULL;
1664 }
1665 elem = _PyAST_Constant(concat_str, kind, first_elem->lineno,
1666 first_elem->col_offset,
1667 last_elem->end_lineno,
1668 last_elem->end_col_offset, p->arena);
1669 if (elem == NULL) {
1670 return NULL;
1671 }
1672 }
1673
1674 /* Drop all empty contanst strings */
1675 if (f_string_found &&
1676 PyUnicode_CheckExact(elem->v.Constant.value) &&
1677 PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0) {
1678 continue;
1679 }
1680 }
1681
1682 asdl_seq_SET(values, current_pos++, elem);
1683 }
1684
1685 if (!f_string_found) {
1686 assert(n_elements == 1);
1687 expr_ty elem = asdl_seq_GET(values, 0);
1688 assert(elem->kind == Constant_kind);
1689 return elem;
1690 }
1691
1692 assert(current_pos == n_elements);
1693 return _PyAST_JoinedStr(values, lineno, col_offset, end_lineno, end_col_offset, p->arena);
1694 }
1695