• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <Python.h>
2 
3 #include "pegen.h"
4 #include "string_parser.h"
5 #include "pycore_runtime.h"         // _PyRuntime
6 #include "pycore_pystate.h"         // _PyInterpreterState_GET()
7 
8 void *
_PyPegen_dummy_name(Parser * p,...)9 _PyPegen_dummy_name(Parser *p, ...)
10 {
11     return &_PyRuntime.parser.dummy_name;
12 }
13 
14 /* Creates a single-element asdl_seq* that contains a */
15 asdl_seq *
_PyPegen_singleton_seq(Parser * p,void * a)16 _PyPegen_singleton_seq(Parser *p, void *a)
17 {
18     assert(a != NULL);
19     asdl_seq *seq = (asdl_seq*)_Py_asdl_generic_seq_new(1, p->arena);
20     if (!seq) {
21         return NULL;
22     }
23     asdl_seq_SET_UNTYPED(seq, 0, a);
24     return seq;
25 }
26 
27 /* Creates a copy of seq and prepends a to it */
28 asdl_seq *
_PyPegen_seq_insert_in_front(Parser * p,void * a,asdl_seq * seq)29 _PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq)
30 {
31     assert(a != NULL);
32     if (!seq) {
33         return _PyPegen_singleton_seq(p, a);
34     }
35 
36     asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
37     if (!new_seq) {
38         return NULL;
39     }
40 
41     asdl_seq_SET_UNTYPED(new_seq, 0, a);
42     for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) {
43         asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i - 1));
44     }
45     return new_seq;
46 }
47 
48 /* Creates a copy of seq and appends a to it */
49 asdl_seq *
_PyPegen_seq_append_to_end(Parser * p,asdl_seq * seq,void * a)50 _PyPegen_seq_append_to_end(Parser *p, asdl_seq *seq, void *a)
51 {
52     assert(a != NULL);
53     if (!seq) {
54         return _PyPegen_singleton_seq(p, a);
55     }
56 
57     asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
58     if (!new_seq) {
59         return NULL;
60     }
61 
62     for (Py_ssize_t i = 0, l = asdl_seq_LEN(new_seq); i + 1 < l; i++) {
63         asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i));
64     }
65     asdl_seq_SET_UNTYPED(new_seq, asdl_seq_LEN(new_seq) - 1, a);
66     return new_seq;
67 }
68 
69 static Py_ssize_t
_get_flattened_seq_size(asdl_seq * seqs)70 _get_flattened_seq_size(asdl_seq *seqs)
71 {
72     Py_ssize_t size = 0;
73     for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
74         asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
75         size += asdl_seq_LEN(inner_seq);
76     }
77     return size;
78 }
79 
80 /* Flattens an asdl_seq* of asdl_seq*s */
81 asdl_seq *
_PyPegen_seq_flatten(Parser * p,asdl_seq * seqs)82 _PyPegen_seq_flatten(Parser *p, asdl_seq *seqs)
83 {
84     Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs);
85     assert(flattened_seq_size > 0);
86 
87     asdl_seq *flattened_seq = (asdl_seq*)_Py_asdl_generic_seq_new(flattened_seq_size, p->arena);
88     if (!flattened_seq) {
89         return NULL;
90     }
91 
92     int flattened_seq_idx = 0;
93     for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
94         asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
95         for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) {
96             asdl_seq_SET_UNTYPED(flattened_seq, flattened_seq_idx++, asdl_seq_GET_UNTYPED(inner_seq, j));
97         }
98     }
99     assert(flattened_seq_idx == flattened_seq_size);
100 
101     return flattened_seq;
102 }
103 
104 void *
_PyPegen_seq_last_item(asdl_seq * seq)105 _PyPegen_seq_last_item(asdl_seq *seq)
106 {
107     Py_ssize_t len = asdl_seq_LEN(seq);
108     return asdl_seq_GET_UNTYPED(seq, len - 1);
109 }
110 
111 void *
_PyPegen_seq_first_item(asdl_seq * seq)112 _PyPegen_seq_first_item(asdl_seq *seq)
113 {
114     return asdl_seq_GET_UNTYPED(seq, 0);
115 }
116 
117 /* Creates a new name of the form <first_name>.<second_name> */
118 expr_ty
_PyPegen_join_names_with_dot(Parser * p,expr_ty first_name,expr_ty second_name)119 _PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
120 {
121     assert(first_name != NULL && second_name != NULL);
122     PyObject *uni = PyUnicode_FromFormat("%U.%U",
123             first_name->v.Name.id, second_name->v.Name.id);
124     if (!uni) {
125         return NULL;
126     }
127     PyInterpreterState *interp = _PyInterpreterState_GET();
128     _PyUnicode_InternImmortal(interp, &uni);
129     if (_PyArena_AddPyObject(p->arena, uni) < 0) {
130         Py_DECREF(uni);
131         return NULL;
132     }
133 
134     return _PyAST_Name(uni, Load, EXTRA_EXPR(first_name, second_name));
135 }
136 
137 /* Counts the total number of dots in seq's tokens */
138 int
_PyPegen_seq_count_dots(asdl_seq * seq)139 _PyPegen_seq_count_dots(asdl_seq *seq)
140 {
141     int number_of_dots = 0;
142     for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
143         Token *current_expr = asdl_seq_GET_UNTYPED(seq, i);
144         switch (current_expr->type) {
145             case ELLIPSIS:
146                 number_of_dots += 3;
147                 break;
148             case DOT:
149                 number_of_dots += 1;
150                 break;
151             default:
152                 Py_UNREACHABLE();
153         }
154     }
155 
156     return number_of_dots;
157 }
158 
159 /* Creates an alias with '*' as the identifier name */
160 alias_ty
_PyPegen_alias_for_star(Parser * p,int lineno,int col_offset,int end_lineno,int end_col_offset,PyArena * arena)161 _PyPegen_alias_for_star(Parser *p, int lineno, int col_offset, int end_lineno,
162                         int end_col_offset, PyArena *arena) {
163     PyObject *str = PyUnicode_InternFromString("*");
164     if (!str) {
165         return NULL;
166     }
167     if (_PyArena_AddPyObject(p->arena, str) < 0) {
168         Py_DECREF(str);
169         return NULL;
170     }
171     return _PyAST_alias(str, NULL, lineno, col_offset, end_lineno, end_col_offset, arena);
172 }
173 
174 /* Creates a new asdl_seq* with the identifiers of all the names in seq */
175 asdl_identifier_seq *
_PyPegen_map_names_to_ids(Parser * p,asdl_expr_seq * seq)176 _PyPegen_map_names_to_ids(Parser *p, asdl_expr_seq *seq)
177 {
178     Py_ssize_t len = asdl_seq_LEN(seq);
179     assert(len > 0);
180 
181     asdl_identifier_seq *new_seq = _Py_asdl_identifier_seq_new(len, p->arena);
182     if (!new_seq) {
183         return NULL;
184     }
185     for (Py_ssize_t i = 0; i < len; i++) {
186         expr_ty e = asdl_seq_GET(seq, i);
187         asdl_seq_SET(new_seq, i, e->v.Name.id);
188     }
189     return new_seq;
190 }
191 
192 /* Constructs a CmpopExprPair */
193 CmpopExprPair *
_PyPegen_cmpop_expr_pair(Parser * p,cmpop_ty cmpop,expr_ty expr)194 _PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr)
195 {
196     assert(expr != NULL);
197     CmpopExprPair *a = _PyArena_Malloc(p->arena, sizeof(CmpopExprPair));
198     if (!a) {
199         return NULL;
200     }
201     a->cmpop = cmpop;
202     a->expr = expr;
203     return a;
204 }
205 
206 asdl_int_seq *
_PyPegen_get_cmpops(Parser * p,asdl_seq * seq)207 _PyPegen_get_cmpops(Parser *p, asdl_seq *seq)
208 {
209     Py_ssize_t len = asdl_seq_LEN(seq);
210     assert(len > 0);
211 
212     asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena);
213     if (!new_seq) {
214         return NULL;
215     }
216     for (Py_ssize_t i = 0; i < len; i++) {
217         CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
218         asdl_seq_SET(new_seq, i, pair->cmpop);
219     }
220     return new_seq;
221 }
222 
223 asdl_expr_seq *
_PyPegen_get_exprs(Parser * p,asdl_seq * seq)224 _PyPegen_get_exprs(Parser *p, asdl_seq *seq)
225 {
226     Py_ssize_t len = asdl_seq_LEN(seq);
227     assert(len > 0);
228 
229     asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
230     if (!new_seq) {
231         return NULL;
232     }
233     for (Py_ssize_t i = 0; i < len; i++) {
234         CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
235         asdl_seq_SET(new_seq, i, pair->expr);
236     }
237     return new_seq;
238 }
239 
240 /* Creates an asdl_seq* where all the elements have been changed to have ctx as context */
241 static asdl_expr_seq *
_set_seq_context(Parser * p,asdl_expr_seq * seq,expr_context_ty ctx)242 _set_seq_context(Parser *p, asdl_expr_seq *seq, expr_context_ty ctx)
243 {
244     Py_ssize_t len = asdl_seq_LEN(seq);
245     if (len == 0) {
246         return NULL;
247     }
248 
249     asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
250     if (!new_seq) {
251         return NULL;
252     }
253     for (Py_ssize_t i = 0; i < len; i++) {
254         expr_ty e = asdl_seq_GET(seq, i);
255         asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx));
256     }
257     return new_seq;
258 }
259 
260 static expr_ty
_set_name_context(Parser * p,expr_ty e,expr_context_ty ctx)261 _set_name_context(Parser *p, expr_ty e, expr_context_ty ctx)
262 {
263     return _PyAST_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e));
264 }
265 
266 static expr_ty
_set_tuple_context(Parser * p,expr_ty e,expr_context_ty ctx)267 _set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx)
268 {
269     return _PyAST_Tuple(
270             _set_seq_context(p, e->v.Tuple.elts, ctx),
271             ctx,
272             EXTRA_EXPR(e, e));
273 }
274 
275 static expr_ty
_set_list_context(Parser * p,expr_ty e,expr_context_ty ctx)276 _set_list_context(Parser *p, expr_ty e, expr_context_ty ctx)
277 {
278     return _PyAST_List(
279             _set_seq_context(p, e->v.List.elts, ctx),
280             ctx,
281             EXTRA_EXPR(e, e));
282 }
283 
284 static expr_ty
_set_subscript_context(Parser * p,expr_ty e,expr_context_ty ctx)285 _set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx)
286 {
287     return _PyAST_Subscript(e->v.Subscript.value, e->v.Subscript.slice,
288                             ctx, EXTRA_EXPR(e, e));
289 }
290 
291 static expr_ty
_set_attribute_context(Parser * p,expr_ty e,expr_context_ty ctx)292 _set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx)
293 {
294     return _PyAST_Attribute(e->v.Attribute.value, e->v.Attribute.attr,
295                             ctx, EXTRA_EXPR(e, e));
296 }
297 
298 static expr_ty
_set_starred_context(Parser * p,expr_ty e,expr_context_ty ctx)299 _set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx)
300 {
301     return _PyAST_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx),
302                           ctx, EXTRA_EXPR(e, e));
303 }
304 
305 /* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */
306 expr_ty
_PyPegen_set_expr_context(Parser * p,expr_ty expr,expr_context_ty ctx)307 _PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx)
308 {
309     assert(expr != NULL);
310 
311     expr_ty new = NULL;
312     switch (expr->kind) {
313         case Name_kind:
314             new = _set_name_context(p, expr, ctx);
315             break;
316         case Tuple_kind:
317             new = _set_tuple_context(p, expr, ctx);
318             break;
319         case List_kind:
320             new = _set_list_context(p, expr, ctx);
321             break;
322         case Subscript_kind:
323             new = _set_subscript_context(p, expr, ctx);
324             break;
325         case Attribute_kind:
326             new = _set_attribute_context(p, expr, ctx);
327             break;
328         case Starred_kind:
329             new = _set_starred_context(p, expr, ctx);
330             break;
331         default:
332             new = expr;
333     }
334     return new;
335 }
336 
337 /* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */
338 KeyValuePair *
_PyPegen_key_value_pair(Parser * p,expr_ty key,expr_ty value)339 _PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value)
340 {
341     KeyValuePair *a = _PyArena_Malloc(p->arena, sizeof(KeyValuePair));
342     if (!a) {
343         return NULL;
344     }
345     a->key = key;
346     a->value = value;
347     return a;
348 }
349 
350 /* Extracts all keys from an asdl_seq* of KeyValuePair*'s */
351 asdl_expr_seq *
_PyPegen_get_keys(Parser * p,asdl_seq * seq)352 _PyPegen_get_keys(Parser *p, asdl_seq *seq)
353 {
354     Py_ssize_t len = asdl_seq_LEN(seq);
355     asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
356     if (!new_seq) {
357         return NULL;
358     }
359     for (Py_ssize_t i = 0; i < len; i++) {
360         KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
361         asdl_seq_SET(new_seq, i, pair->key);
362     }
363     return new_seq;
364 }
365 
366 /* Extracts all values from an asdl_seq* of KeyValuePair*'s */
367 asdl_expr_seq *
_PyPegen_get_values(Parser * p,asdl_seq * seq)368 _PyPegen_get_values(Parser *p, asdl_seq *seq)
369 {
370     Py_ssize_t len = asdl_seq_LEN(seq);
371     asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
372     if (!new_seq) {
373         return NULL;
374     }
375     for (Py_ssize_t i = 0; i < len; i++) {
376         KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
377         asdl_seq_SET(new_seq, i, pair->value);
378     }
379     return new_seq;
380 }
381 
382 /* Constructs a KeyPatternPair that is used when parsing mapping & class patterns */
383 KeyPatternPair *
_PyPegen_key_pattern_pair(Parser * p,expr_ty key,pattern_ty pattern)384 _PyPegen_key_pattern_pair(Parser *p, expr_ty key, pattern_ty pattern)
385 {
386     KeyPatternPair *a = _PyArena_Malloc(p->arena, sizeof(KeyPatternPair));
387     if (!a) {
388         return NULL;
389     }
390     a->key = key;
391     a->pattern = pattern;
392     return a;
393 }
394 
395 /* Extracts all keys from an asdl_seq* of KeyPatternPair*'s */
396 asdl_expr_seq *
_PyPegen_get_pattern_keys(Parser * p,asdl_seq * seq)397 _PyPegen_get_pattern_keys(Parser *p, asdl_seq *seq)
398 {
399     Py_ssize_t len = asdl_seq_LEN(seq);
400     asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
401     if (!new_seq) {
402         return NULL;
403     }
404     for (Py_ssize_t i = 0; i < len; i++) {
405         KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
406         asdl_seq_SET(new_seq, i, pair->key);
407     }
408     return new_seq;
409 }
410 
411 /* Extracts all patterns from an asdl_seq* of KeyPatternPair*'s */
412 asdl_pattern_seq *
_PyPegen_get_patterns(Parser * p,asdl_seq * seq)413 _PyPegen_get_patterns(Parser *p, asdl_seq *seq)
414 {
415     Py_ssize_t len = asdl_seq_LEN(seq);
416     asdl_pattern_seq *new_seq = _Py_asdl_pattern_seq_new(len, p->arena);
417     if (!new_seq) {
418         return NULL;
419     }
420     for (Py_ssize_t i = 0; i < len; i++) {
421         KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
422         asdl_seq_SET(new_seq, i, pair->pattern);
423     }
424     return new_seq;
425 }
426 
427 /* Constructs a NameDefaultPair */
428 NameDefaultPair *
_PyPegen_name_default_pair(Parser * p,arg_ty arg,expr_ty value,Token * tc)429 _PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value, Token *tc)
430 {
431     NameDefaultPair *a = _PyArena_Malloc(p->arena, sizeof(NameDefaultPair));
432     if (!a) {
433         return NULL;
434     }
435     a->arg = _PyPegen_add_type_comment_to_arg(p, arg, tc);
436     a->value = value;
437     return a;
438 }
439 
440 /* Constructs a SlashWithDefault */
441 SlashWithDefault *
_PyPegen_slash_with_default(Parser * p,asdl_arg_seq * plain_names,asdl_seq * names_with_defaults)442 _PyPegen_slash_with_default(Parser *p, asdl_arg_seq *plain_names, asdl_seq *names_with_defaults)
443 {
444     SlashWithDefault *a = _PyArena_Malloc(p->arena, sizeof(SlashWithDefault));
445     if (!a) {
446         return NULL;
447     }
448     a->plain_names = plain_names;
449     a->names_with_defaults = names_with_defaults;
450     return a;
451 }
452 
453 /* Constructs a StarEtc */
454 StarEtc *
_PyPegen_star_etc(Parser * p,arg_ty vararg,asdl_seq * kwonlyargs,arg_ty kwarg)455 _PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg)
456 {
457     StarEtc *a = _PyArena_Malloc(p->arena, sizeof(StarEtc));
458     if (!a) {
459         return NULL;
460     }
461     a->vararg = vararg;
462     a->kwonlyargs = kwonlyargs;
463     a->kwarg = kwarg;
464     return a;
465 }
466 
467 asdl_seq *
_PyPegen_join_sequences(Parser * p,asdl_seq * a,asdl_seq * b)468 _PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b)
469 {
470     Py_ssize_t first_len = asdl_seq_LEN(a);
471     Py_ssize_t second_len = asdl_seq_LEN(b);
472     asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(first_len + second_len, p->arena);
473     if (!new_seq) {
474         return NULL;
475     }
476 
477     int k = 0;
478     for (Py_ssize_t i = 0; i < first_len; i++) {
479         asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(a, i));
480     }
481     for (Py_ssize_t i = 0; i < second_len; i++) {
482         asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(b, i));
483     }
484 
485     return new_seq;
486 }
487 
488 static asdl_arg_seq*
_get_names(Parser * p,asdl_seq * names_with_defaults)489 _get_names(Parser *p, asdl_seq *names_with_defaults)
490 {
491     Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
492     asdl_arg_seq *seq = _Py_asdl_arg_seq_new(len, p->arena);
493     if (!seq) {
494         return NULL;
495     }
496     for (Py_ssize_t i = 0; i < len; i++) {
497         NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
498         asdl_seq_SET(seq, i, pair->arg);
499     }
500     return seq;
501 }
502 
503 static asdl_expr_seq *
_get_defaults(Parser * p,asdl_seq * names_with_defaults)504 _get_defaults(Parser *p, asdl_seq *names_with_defaults)
505 {
506     Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
507     asdl_expr_seq *seq = _Py_asdl_expr_seq_new(len, p->arena);
508     if (!seq) {
509         return NULL;
510     }
511     for (Py_ssize_t i = 0; i < len; i++) {
512         NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
513         asdl_seq_SET(seq, i, pair->value);
514     }
515     return seq;
516 }
517 
518 static int
_make_posonlyargs(Parser * p,asdl_arg_seq * slash_without_default,SlashWithDefault * slash_with_default,asdl_arg_seq ** posonlyargs)519 _make_posonlyargs(Parser *p,
520                   asdl_arg_seq *slash_without_default,
521                   SlashWithDefault *slash_with_default,
522                   asdl_arg_seq **posonlyargs) {
523     if (slash_without_default != NULL) {
524         *posonlyargs = slash_without_default;
525     }
526     else if (slash_with_default != NULL) {
527         asdl_arg_seq *slash_with_default_names =
528                 _get_names(p, slash_with_default->names_with_defaults);
529         if (!slash_with_default_names) {
530             return -1;
531         }
532         *posonlyargs = (asdl_arg_seq*)_PyPegen_join_sequences(
533                 p,
534                 (asdl_seq*)slash_with_default->plain_names,
535                 (asdl_seq*)slash_with_default_names);
536     }
537     else {
538         *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
539     }
540     return *posonlyargs == NULL ? -1 : 0;
541 }
542 
543 static int
_make_posargs(Parser * p,asdl_arg_seq * plain_names,asdl_seq * names_with_default,asdl_arg_seq ** posargs)544 _make_posargs(Parser *p,
545               asdl_arg_seq *plain_names,
546               asdl_seq *names_with_default,
547               asdl_arg_seq **posargs) {
548     if (plain_names != NULL && names_with_default != NULL) {
549         asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default);
550         if (!names_with_default_names) {
551             return -1;
552         }
553         *posargs = (asdl_arg_seq*)_PyPegen_join_sequences(
554                 p,(asdl_seq*)plain_names, (asdl_seq*)names_with_default_names);
555     }
556     else if (plain_names == NULL && names_with_default != NULL) {
557         *posargs = _get_names(p, names_with_default);
558     }
559     else if (plain_names != NULL && names_with_default == NULL) {
560         *posargs = plain_names;
561     }
562     else {
563         *posargs = _Py_asdl_arg_seq_new(0, p->arena);
564     }
565     return *posargs == NULL ? -1 : 0;
566 }
567 
568 static int
_make_posdefaults(Parser * p,SlashWithDefault * slash_with_default,asdl_seq * names_with_default,asdl_expr_seq ** posdefaults)569 _make_posdefaults(Parser *p,
570                   SlashWithDefault *slash_with_default,
571                   asdl_seq *names_with_default,
572                   asdl_expr_seq **posdefaults) {
573     if (slash_with_default != NULL && names_with_default != NULL) {
574         asdl_expr_seq *slash_with_default_values =
575                 _get_defaults(p, slash_with_default->names_with_defaults);
576         if (!slash_with_default_values) {
577             return -1;
578         }
579         asdl_expr_seq *names_with_default_values = _get_defaults(p, names_with_default);
580         if (!names_with_default_values) {
581             return -1;
582         }
583         *posdefaults = (asdl_expr_seq*)_PyPegen_join_sequences(
584                 p,
585                 (asdl_seq*)slash_with_default_values,
586                 (asdl_seq*)names_with_default_values);
587     }
588     else if (slash_with_default == NULL && names_with_default != NULL) {
589         *posdefaults = _get_defaults(p, names_with_default);
590     }
591     else if (slash_with_default != NULL && names_with_default == NULL) {
592         *posdefaults = _get_defaults(p, slash_with_default->names_with_defaults);
593     }
594     else {
595         *posdefaults = _Py_asdl_expr_seq_new(0, p->arena);
596     }
597     return *posdefaults == NULL ? -1 : 0;
598 }
599 
600 static int
_make_kwargs(Parser * p,StarEtc * star_etc,asdl_arg_seq ** kwonlyargs,asdl_expr_seq ** kwdefaults)601 _make_kwargs(Parser *p, StarEtc *star_etc,
602              asdl_arg_seq **kwonlyargs,
603              asdl_expr_seq **kwdefaults) {
604     if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
605         *kwonlyargs = _get_names(p, star_etc->kwonlyargs);
606     }
607     else {
608         *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
609     }
610 
611     if (*kwonlyargs == NULL) {
612         return -1;
613     }
614 
615     if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
616         *kwdefaults = _get_defaults(p, star_etc->kwonlyargs);
617     }
618     else {
619         *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena);
620     }
621 
622     if (*kwdefaults == NULL) {
623         return -1;
624     }
625 
626     return 0;
627 }
628 
629 /* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */
630 arguments_ty
_PyPegen_make_arguments(Parser * p,asdl_arg_seq * slash_without_default,SlashWithDefault * slash_with_default,asdl_arg_seq * plain_names,asdl_seq * names_with_default,StarEtc * star_etc)631 _PyPegen_make_arguments(Parser *p, asdl_arg_seq *slash_without_default,
632                         SlashWithDefault *slash_with_default, asdl_arg_seq *plain_names,
633                         asdl_seq *names_with_default, StarEtc *star_etc)
634 {
635     asdl_arg_seq *posonlyargs;
636     if (_make_posonlyargs(p, slash_without_default, slash_with_default, &posonlyargs) == -1) {
637         return NULL;
638     }
639 
640     asdl_arg_seq *posargs;
641     if (_make_posargs(p, plain_names, names_with_default, &posargs) == -1) {
642         return NULL;
643     }
644 
645     asdl_expr_seq *posdefaults;
646     if (_make_posdefaults(p,slash_with_default, names_with_default, &posdefaults) == -1) {
647         return NULL;
648     }
649 
650     arg_ty vararg = NULL;
651     if (star_etc != NULL && star_etc->vararg != NULL) {
652         vararg = star_etc->vararg;
653     }
654 
655     asdl_arg_seq *kwonlyargs;
656     asdl_expr_seq *kwdefaults;
657     if (_make_kwargs(p, star_etc, &kwonlyargs, &kwdefaults) == -1) {
658         return NULL;
659     }
660 
661     arg_ty kwarg = NULL;
662     if (star_etc != NULL && star_etc->kwarg != NULL) {
663         kwarg = star_etc->kwarg;
664     }
665 
666     return _PyAST_arguments(posonlyargs, posargs, vararg, kwonlyargs,
667                             kwdefaults, kwarg, posdefaults, p->arena);
668 }
669 
670 
671 /* Constructs an empty arguments_ty object, that gets used when a function accepts no
672  * arguments. */
673 arguments_ty
_PyPegen_empty_arguments(Parser * p)674 _PyPegen_empty_arguments(Parser *p)
675 {
676     asdl_arg_seq *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
677     if (!posonlyargs) {
678         return NULL;
679     }
680     asdl_arg_seq *posargs = _Py_asdl_arg_seq_new(0, p->arena);
681     if (!posargs) {
682         return NULL;
683     }
684     asdl_expr_seq *posdefaults = _Py_asdl_expr_seq_new(0, p->arena);
685     if (!posdefaults) {
686         return NULL;
687     }
688     asdl_arg_seq *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
689     if (!kwonlyargs) {
690         return NULL;
691     }
692     asdl_expr_seq *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena);
693     if (!kwdefaults) {
694         return NULL;
695     }
696 
697     return _PyAST_arguments(posonlyargs, posargs, NULL, kwonlyargs,
698                             kwdefaults, NULL, posdefaults, p->arena);
699 }
700 
701 /* Encapsulates the value of an operator_ty into an AugOperator struct */
702 AugOperator *
_PyPegen_augoperator(Parser * p,operator_ty kind)703 _PyPegen_augoperator(Parser *p, operator_ty kind)
704 {
705     AugOperator *a = _PyArena_Malloc(p->arena, sizeof(AugOperator));
706     if (!a) {
707         return NULL;
708     }
709     a->kind = kind;
710     return a;
711 }
712 
713 /* Construct a FunctionDef equivalent to function_def, but with decorators */
714 stmt_ty
_PyPegen_function_def_decorators(Parser * p,asdl_expr_seq * decorators,stmt_ty function_def)715 _PyPegen_function_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty function_def)
716 {
717     assert(function_def != NULL);
718     if (function_def->kind == AsyncFunctionDef_kind) {
719         return _PyAST_AsyncFunctionDef(
720             function_def->v.AsyncFunctionDef.name,
721             function_def->v.AsyncFunctionDef.args,
722             function_def->v.AsyncFunctionDef.body, decorators,
723             function_def->v.AsyncFunctionDef.returns,
724             function_def->v.AsyncFunctionDef.type_comment,
725             function_def->v.AsyncFunctionDef.type_params,
726             function_def->lineno, function_def->col_offset,
727             function_def->end_lineno, function_def->end_col_offset, p->arena);
728     }
729 
730     return _PyAST_FunctionDef(
731         function_def->v.FunctionDef.name,
732         function_def->v.FunctionDef.args,
733         function_def->v.FunctionDef.body, decorators,
734         function_def->v.FunctionDef.returns,
735         function_def->v.FunctionDef.type_comment,
736         function_def->v.FunctionDef.type_params,
737         function_def->lineno, function_def->col_offset,
738         function_def->end_lineno, function_def->end_col_offset, p->arena);
739 }
740 
741 /* Construct a ClassDef equivalent to class_def, but with decorators */
742 stmt_ty
_PyPegen_class_def_decorators(Parser * p,asdl_expr_seq * decorators,stmt_ty class_def)743 _PyPegen_class_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty class_def)
744 {
745     assert(class_def != NULL);
746     return _PyAST_ClassDef(
747         class_def->v.ClassDef.name,
748         class_def->v.ClassDef.bases, class_def->v.ClassDef.keywords,
749         class_def->v.ClassDef.body, decorators,
750         class_def->v.ClassDef.type_params,
751         class_def->lineno, class_def->col_offset, class_def->end_lineno,
752         class_def->end_col_offset, p->arena);
753 }
754 
755 /* Construct a KeywordOrStarred */
756 KeywordOrStarred *
_PyPegen_keyword_or_starred(Parser * p,void * element,int is_keyword)757 _PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword)
758 {
759     KeywordOrStarred *a = _PyArena_Malloc(p->arena, sizeof(KeywordOrStarred));
760     if (!a) {
761         return NULL;
762     }
763     a->element = element;
764     a->is_keyword = is_keyword;
765     return a;
766 }
767 
768 /* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */
769 static int
_seq_number_of_starred_exprs(asdl_seq * seq)770 _seq_number_of_starred_exprs(asdl_seq *seq)
771 {
772     int n = 0;
773     for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
774         KeywordOrStarred *k = asdl_seq_GET_UNTYPED(seq, i);
775         if (!k->is_keyword) {
776             n++;
777         }
778     }
779     return n;
780 }
781 
782 /* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */
783 asdl_expr_seq *
_PyPegen_seq_extract_starred_exprs(Parser * p,asdl_seq * kwargs)784 _PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs)
785 {
786     int new_len = _seq_number_of_starred_exprs(kwargs);
787     if (new_len == 0) {
788         return NULL;
789     }
790     asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(new_len, p->arena);
791     if (!new_seq) {
792         return NULL;
793     }
794 
795     int idx = 0;
796     for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) {
797         KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
798         if (!k->is_keyword) {
799             asdl_seq_SET(new_seq, idx++, k->element);
800         }
801     }
802     return new_seq;
803 }
804 
805 /* Return a new asdl_seq* with only the keywords in kwargs */
806 asdl_keyword_seq*
_PyPegen_seq_delete_starred_exprs(Parser * p,asdl_seq * kwargs)807 _PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
808 {
809     Py_ssize_t len = asdl_seq_LEN(kwargs);
810     Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs);
811     if (new_len == 0) {
812         return NULL;
813     }
814     asdl_keyword_seq *new_seq = _Py_asdl_keyword_seq_new(new_len, p->arena);
815     if (!new_seq) {
816         return NULL;
817     }
818 
819     int idx = 0;
820     for (Py_ssize_t i = 0; i < len; i++) {
821         KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
822         if (k->is_keyword) {
823             asdl_seq_SET(new_seq, idx++, k->element);
824         }
825     }
826     return new_seq;
827 }
828 
829 expr_ty
_PyPegen_ensure_imaginary(Parser * p,expr_ty exp)830 _PyPegen_ensure_imaginary(Parser *p, expr_ty exp)
831 {
832     if (exp->kind != Constant_kind || !PyComplex_CheckExact(exp->v.Constant.value)) {
833         RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "imaginary number required in complex literal");
834         return NULL;
835     }
836     return exp;
837 }
838 
839 expr_ty
_PyPegen_ensure_real(Parser * p,expr_ty exp)840 _PyPegen_ensure_real(Parser *p, expr_ty exp)
841 {
842     if (exp->kind != Constant_kind || PyComplex_CheckExact(exp->v.Constant.value)) {
843         RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "real number required in complex literal");
844         return NULL;
845     }
846     return exp;
847 }
848 
849 mod_ty
_PyPegen_make_module(Parser * p,asdl_stmt_seq * a)850 _PyPegen_make_module(Parser *p, asdl_stmt_seq *a) {
851     asdl_type_ignore_seq *type_ignores = NULL;
852     Py_ssize_t num = p->type_ignore_comments.num_items;
853     if (num > 0) {
854         // Turn the raw (comment, lineno) pairs into TypeIgnore objects in the arena
855         type_ignores = _Py_asdl_type_ignore_seq_new(num, p->arena);
856         if (type_ignores == NULL) {
857             return NULL;
858         }
859         for (int i = 0; i < num; i++) {
860             PyObject *tag = _PyPegen_new_type_comment(p, p->type_ignore_comments.items[i].comment);
861             if (tag == NULL) {
862                 return NULL;
863             }
864             type_ignore_ty ti = _PyAST_TypeIgnore(p->type_ignore_comments.items[i].lineno,
865                                                   tag, p->arena);
866             if (ti == NULL) {
867                 return NULL;
868             }
869             asdl_seq_SET(type_ignores, i, ti);
870         }
871     }
872     return _PyAST_Module(a, type_ignores, p->arena);
873 }
874 
875 PyObject *
_PyPegen_new_type_comment(Parser * p,const char * s)876 _PyPegen_new_type_comment(Parser *p, const char *s)
877 {
878     PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
879     if (res == NULL) {
880         return NULL;
881     }
882     if (_PyArena_AddPyObject(p->arena, res) < 0) {
883         Py_DECREF(res);
884         return NULL;
885     }
886     return res;
887 }
888 
889 arg_ty
_PyPegen_add_type_comment_to_arg(Parser * p,arg_ty a,Token * tc)890 _PyPegen_add_type_comment_to_arg(Parser *p, arg_ty a, Token *tc)
891 {
892     if (tc == NULL) {
893         return a;
894     }
895     const char *bytes = PyBytes_AsString(tc->bytes);
896     if (bytes == NULL) {
897         return NULL;
898     }
899     PyObject *tco = _PyPegen_new_type_comment(p, bytes);
900     if (tco == NULL) {
901         return NULL;
902     }
903     return _PyAST_arg(a->arg, a->annotation, tco,
904                       a->lineno, a->col_offset, a->end_lineno, a->end_col_offset,
905                       p->arena);
906 }
907 
908 /* Checks if the NOTEQUAL token is valid given the current parser flags
909 0 indicates success and nonzero indicates failure (an exception may be set) */
910 int
_PyPegen_check_barry_as_flufl(Parser * p,Token * t)911 _PyPegen_check_barry_as_flufl(Parser *p, Token* t) {
912     assert(t->bytes != NULL);
913     assert(t->type == NOTEQUAL);
914 
915     const char* tok_str = PyBytes_AS_STRING(t->bytes);
916     if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>") != 0) {
917         RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='");
918         return -1;
919     }
920     if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) {
921         return strcmp(tok_str, "!=");
922     }
923     return 0;
924 }
925 
926 int
_PyPegen_check_legacy_stmt(Parser * p,expr_ty name)927 _PyPegen_check_legacy_stmt(Parser *p, expr_ty name) {
928     if (name->kind != Name_kind) {
929         return 0;
930     }
931     const char* candidates[2] = {"print", "exec"};
932     for (int i=0; i<2; i++) {
933         if (PyUnicode_CompareWithASCIIString(name->v.Name.id, candidates[i]) == 0) {
934             return 1;
935         }
936     }
937     return 0;
938 }
939 
940 static ResultTokenWithMetadata *
result_token_with_metadata(Parser * p,void * result,PyObject * metadata)941 result_token_with_metadata(Parser *p, void *result, PyObject *metadata)
942 {
943     ResultTokenWithMetadata *res = _PyArena_Malloc(p->arena, sizeof(ResultTokenWithMetadata));
944     if (res == NULL) {
945         return NULL;
946     }
947     res->metadata = metadata;
948     res->result = result;
949     return res;
950 }
951 
952 ResultTokenWithMetadata *
_PyPegen_check_fstring_conversion(Parser * p,Token * conv_token,expr_ty conv)953 _PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
954 {
955     if (conv_token->lineno != conv->lineno || conv_token->end_col_offset != conv->col_offset) {
956         return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
957             conv_token, conv,
958             "f-string: conversion type must come right after the exclamanation mark"
959         );
960     }
961     return result_token_with_metadata(p, conv, conv_token->metadata);
962 }
963 
964 static asdl_expr_seq *
965 unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions);
966 ResultTokenWithMetadata *
_PyPegen_setup_full_format_spec(Parser * p,Token * colon,asdl_expr_seq * spec,int lineno,int col_offset,int end_lineno,int end_col_offset,PyArena * arena)967 _PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset,
968                                 int end_lineno, int end_col_offset, PyArena *arena)
969 {
970     if (!spec) {
971         return NULL;
972     }
973 
974     // This is needed to keep compatibility with 3.11, where an empty format
975     // spec is parsed as an *empty* JoinedStr node, instead of having an empty
976     // constant in it.
977     Py_ssize_t n_items = asdl_seq_LEN(spec);
978     Py_ssize_t non_empty_count = 0;
979     for (Py_ssize_t i = 0; i < n_items; i++) {
980         expr_ty item = asdl_seq_GET(spec, i);
981         non_empty_count += !(item->kind == Constant_kind &&
982                              PyUnicode_CheckExact(item->v.Constant.value) &&
983                              PyUnicode_GET_LENGTH(item->v.Constant.value) == 0);
984     }
985     if (non_empty_count != n_items) {
986         asdl_expr_seq *resized_spec =
987             _Py_asdl_expr_seq_new(non_empty_count, p->arena);
988         if (resized_spec == NULL) {
989             return NULL;
990         }
991         Py_ssize_t j = 0;
992         for (Py_ssize_t i = 0; i < n_items; i++) {
993             expr_ty item = asdl_seq_GET(spec, i);
994             if (item->kind == Constant_kind &&
995                 PyUnicode_CheckExact(item->v.Constant.value) &&
996                 PyUnicode_GET_LENGTH(item->v.Constant.value) == 0) {
997                 continue;
998             }
999             asdl_seq_SET(resized_spec, j++, item);
1000         }
1001         assert(j == non_empty_count);
1002         spec = resized_spec;
1003     }
1004     expr_ty res;
1005     Py_ssize_t n = asdl_seq_LEN(spec);
1006     if (n == 0 || (n == 1 && asdl_seq_GET(spec, 0)->kind == Constant_kind)) {
1007         res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno,
1008                                     end_col_offset, p->arena);
1009     } else {
1010         res = _PyPegen_concatenate_strings(p, spec,
1011                              lineno, col_offset, end_lineno,
1012                              end_col_offset, arena);
1013     }
1014     if (!res) {
1015         return NULL;
1016     }
1017     return result_token_with_metadata(p, res, colon->metadata);
1018 }
1019 
1020 const char *
_PyPegen_get_expr_name(expr_ty e)1021 _PyPegen_get_expr_name(expr_ty e)
1022 {
1023     assert(e != NULL);
1024     switch (e->kind) {
1025         case Attribute_kind:
1026             return "attribute";
1027         case Subscript_kind:
1028             return "subscript";
1029         case Starred_kind:
1030             return "starred";
1031         case Name_kind:
1032             return "name";
1033         case List_kind:
1034             return "list";
1035         case Tuple_kind:
1036             return "tuple";
1037         case Lambda_kind:
1038             return "lambda";
1039         case Call_kind:
1040             return "function call";
1041         case BoolOp_kind:
1042         case BinOp_kind:
1043         case UnaryOp_kind:
1044             return "expression";
1045         case GeneratorExp_kind:
1046             return "generator expression";
1047         case Yield_kind:
1048         case YieldFrom_kind:
1049             return "yield expression";
1050         case Await_kind:
1051             return "await expression";
1052         case ListComp_kind:
1053             return "list comprehension";
1054         case SetComp_kind:
1055             return "set comprehension";
1056         case DictComp_kind:
1057             return "dict comprehension";
1058         case Dict_kind:
1059             return "dict literal";
1060         case Set_kind:
1061             return "set display";
1062         case JoinedStr_kind:
1063         case FormattedValue_kind:
1064             return "f-string expression";
1065         case Constant_kind: {
1066             PyObject *value = e->v.Constant.value;
1067             if (value == Py_None) {
1068                 return "None";
1069             }
1070             if (value == Py_False) {
1071                 return "False";
1072             }
1073             if (value == Py_True) {
1074                 return "True";
1075             }
1076             if (value == Py_Ellipsis) {
1077                 return "ellipsis";
1078             }
1079             return "literal";
1080         }
1081         case Compare_kind:
1082             return "comparison";
1083         case IfExp_kind:
1084             return "conditional expression";
1085         case NamedExpr_kind:
1086             return "named expression";
1087         default:
1088             PyErr_Format(PyExc_SystemError,
1089                          "unexpected expression in assignment %d (line %d)",
1090                          e->kind, e->lineno);
1091             return NULL;
1092     }
1093 }
1094 
1095 expr_ty
_PyPegen_get_last_comprehension_item(comprehension_ty comprehension)1096 _PyPegen_get_last_comprehension_item(comprehension_ty comprehension) {
1097     if (comprehension->ifs == NULL || asdl_seq_LEN(comprehension->ifs) == 0) {
1098         return comprehension->iter;
1099     }
1100     return PyPegen_last_item(comprehension->ifs, expr_ty);
1101 }
1102 
_PyPegen_collect_call_seqs(Parser * p,asdl_expr_seq * a,asdl_seq * b,int lineno,int col_offset,int end_lineno,int end_col_offset,PyArena * arena)1103 expr_ty _PyPegen_collect_call_seqs(Parser *p, asdl_expr_seq *a, asdl_seq *b,
1104                      int lineno, int col_offset, int end_lineno,
1105                      int end_col_offset, PyArena *arena) {
1106     Py_ssize_t args_len = asdl_seq_LEN(a);
1107     Py_ssize_t total_len = args_len;
1108 
1109     if (b == NULL) {
1110         return _PyAST_Call(_PyPegen_dummy_name(p), a, NULL, lineno, col_offset,
1111                         end_lineno, end_col_offset, arena);
1112 
1113     }
1114 
1115     asdl_expr_seq *starreds = _PyPegen_seq_extract_starred_exprs(p, b);
1116     asdl_keyword_seq *keywords = _PyPegen_seq_delete_starred_exprs(p, b);
1117 
1118     if (starreds) {
1119         total_len += asdl_seq_LEN(starreds);
1120     }
1121 
1122     asdl_expr_seq *args = _Py_asdl_expr_seq_new(total_len, arena);
1123     if (args == NULL) {
1124         return NULL;
1125     }
1126 
1127     Py_ssize_t i = 0;
1128     for (i = 0; i < args_len; i++) {
1129         asdl_seq_SET(args, i, asdl_seq_GET(a, i));
1130     }
1131     for (; i < total_len; i++) {
1132         asdl_seq_SET(args, i, asdl_seq_GET(starreds, i - args_len));
1133     }
1134 
1135     return _PyAST_Call(_PyPegen_dummy_name(p), args, keywords, lineno,
1136                        col_offset, end_lineno, end_col_offset, arena);
1137 }
1138 
1139 // AST Error reporting helpers
1140 
1141 expr_ty
_PyPegen_get_invalid_target(expr_ty e,TARGETS_TYPE targets_type)1142 _PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type)
1143 {
1144     if (e == NULL) {
1145         return NULL;
1146     }
1147 
1148 #define VISIT_CONTAINER(CONTAINER, TYPE) do { \
1149         Py_ssize_t len = asdl_seq_LEN((CONTAINER)->v.TYPE.elts);\
1150         for (Py_ssize_t i = 0; i < len; i++) {\
1151             expr_ty other = asdl_seq_GET((CONTAINER)->v.TYPE.elts, i);\
1152             expr_ty child = _PyPegen_get_invalid_target(other, targets_type);\
1153             if (child != NULL) {\
1154                 return child;\
1155             }\
1156         }\
1157     } while (0)
1158 
1159     // We only need to visit List and Tuple nodes recursively as those
1160     // are the only ones that can contain valid names in targets when
1161     // they are parsed as expressions. Any other kind of expression
1162     // that is a container (like Sets or Dicts) is directly invalid and
1163     // we don't need to visit it recursively.
1164 
1165     switch (e->kind) {
1166         case List_kind:
1167             VISIT_CONTAINER(e, List);
1168             return NULL;
1169         case Tuple_kind:
1170             VISIT_CONTAINER(e, Tuple);
1171             return NULL;
1172         case Starred_kind:
1173             if (targets_type == DEL_TARGETS) {
1174                 return e;
1175             }
1176             return _PyPegen_get_invalid_target(e->v.Starred.value, targets_type);
1177         case Compare_kind:
1178             // This is needed, because the `a in b` in `for a in b` gets parsed
1179             // as a comparison, and so we need to search the left side of the comparison
1180             // for invalid targets.
1181             if (targets_type == FOR_TARGETS) {
1182                 cmpop_ty cmpop = (cmpop_ty) asdl_seq_GET(e->v.Compare.ops, 0);
1183                 if (cmpop == In) {
1184                     return _PyPegen_get_invalid_target(e->v.Compare.left, targets_type);
1185                 }
1186                 return NULL;
1187             }
1188             return e;
1189         case Name_kind:
1190         case Subscript_kind:
1191         case Attribute_kind:
1192             return NULL;
1193         default:
1194             return e;
1195     }
1196 }
1197 
_PyPegen_arguments_parsing_error(Parser * p,expr_ty e)1198 void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) {
1199     int kwarg_unpacking = 0;
1200     for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) {
1201         keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i);
1202         if (!keyword->arg) {
1203             kwarg_unpacking = 1;
1204         }
1205     }
1206 
1207     const char *msg = NULL;
1208     if (kwarg_unpacking) {
1209         msg = "positional argument follows keyword argument unpacking";
1210     } else {
1211         msg = "positional argument follows keyword argument";
1212     }
1213 
1214     return RAISE_SYNTAX_ERROR(msg);
1215 }
1216 
1217 void *
_PyPegen_nonparen_genexp_in_call(Parser * p,expr_ty args,asdl_comprehension_seq * comprehensions)1218 _PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions)
1219 {
1220     /* The rule that calls this function is 'args for_if_clauses'.
1221        For the input f(L, x for x in y), L and x are in args and
1222        the for is parsed as a for_if_clause. We have to check if
1223        len <= 1, so that input like dict((a, b) for a, b in x)
1224        gets successfully parsed and then we pass the last
1225        argument (x in the above example) as the location of the
1226        error */
1227     Py_ssize_t len = asdl_seq_LEN(args->v.Call.args);
1228     if (len <= 1) {
1229         return NULL;
1230     }
1231 
1232     comprehension_ty last_comprehension = PyPegen_last_item(comprehensions, comprehension_ty);
1233 
1234     return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
1235         (expr_ty) asdl_seq_GET(args->v.Call.args, len - 1),
1236         _PyPegen_get_last_comprehension_item(last_comprehension),
1237         "Generator expression must be parenthesized"
1238     );
1239 }
1240 
1241 // Fstring stuff
1242 
1243 static expr_ty
_PyPegen_decode_fstring_part(Parser * p,int is_raw,expr_ty constant,Token * token)1244 _PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant, Token* token) {
1245     assert(PyUnicode_CheckExact(constant->v.Constant.value));
1246 
1247     const char* bstr = PyUnicode_AsUTF8(constant->v.Constant.value);
1248     if (bstr == NULL) {
1249         return NULL;
1250     }
1251 
1252     size_t len;
1253     if (strcmp(bstr, "{{") == 0 || strcmp(bstr, "}}") == 0) {
1254         len = 1;
1255     } else {
1256         len = strlen(bstr);
1257     }
1258 
1259     is_raw = is_raw || strchr(bstr, '\\') == NULL;
1260     PyObject *str = _PyPegen_decode_string(p, is_raw, bstr, len, token);
1261     if (str == NULL) {
1262         _Pypegen_raise_decode_error(p);
1263         return NULL;
1264     }
1265     if (_PyArena_AddPyObject(p->arena, str) < 0) {
1266         Py_DECREF(str);
1267         return NULL;
1268     }
1269     return _PyAST_Constant(str, NULL, constant->lineno, constant->col_offset,
1270                            constant->end_lineno, constant->end_col_offset,
1271                            p->arena);
1272 }
1273 
1274 static asdl_expr_seq *
unpack_top_level_joined_strs(Parser * p,asdl_expr_seq * raw_expressions)1275 unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions)
1276 {
1277     /* The parser might put multiple f-string values into an individual
1278      * JoinedStr node at the top level due to stuff like f-string debugging
1279      * expressions. This function flattens those and promotes them to the
1280      * upper level. Only simplifies AST, but the compiler already takes care
1281      * of the regular output, so this is not necessary if you are not going
1282      * to expose the output AST to Python level. */
1283 
1284     Py_ssize_t i, req_size, raw_size;
1285 
1286     req_size = raw_size = asdl_seq_LEN(raw_expressions);
1287     expr_ty expr;
1288     for (i = 0; i < raw_size; i++) {
1289         expr = asdl_seq_GET(raw_expressions, i);
1290         if (expr->kind == JoinedStr_kind) {
1291             req_size += asdl_seq_LEN(expr->v.JoinedStr.values) - 1;
1292         }
1293     }
1294 
1295     asdl_expr_seq *expressions = _Py_asdl_expr_seq_new(req_size, p->arena);
1296     if (expressions == NULL) {
1297         return NULL;
1298     }
1299 
1300     Py_ssize_t raw_index, req_index = 0;
1301     for (raw_index = 0; raw_index < raw_size; raw_index++) {
1302         expr = asdl_seq_GET(raw_expressions, raw_index);
1303         if (expr->kind == JoinedStr_kind) {
1304             asdl_expr_seq *values = expr->v.JoinedStr.values;
1305             for (Py_ssize_t n = 0; n < asdl_seq_LEN(values); n++) {
1306                 asdl_seq_SET(expressions, req_index, asdl_seq_GET(values, n));
1307                 req_index++;
1308             }
1309         } else {
1310             asdl_seq_SET(expressions, req_index, expr);
1311             req_index++;
1312         }
1313     }
1314     return expressions;
1315 }
1316 
1317 expr_ty
_PyPegen_joined_str(Parser * p,Token * a,asdl_expr_seq * raw_expressions,Token * b)1318 _PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b) {
1319 
1320     asdl_expr_seq *expr = unpack_top_level_joined_strs(p, raw_expressions);
1321     Py_ssize_t n_items = asdl_seq_LEN(expr);
1322 
1323     const char* quote_str = PyBytes_AsString(a->bytes);
1324     if (quote_str == NULL) {
1325         return NULL;
1326     }
1327     int is_raw = strpbrk(quote_str, "rR") != NULL;
1328 
1329     asdl_expr_seq *seq = _Py_asdl_expr_seq_new(n_items, p->arena);
1330     if (seq == NULL) {
1331         return NULL;
1332     }
1333 
1334     Py_ssize_t index = 0;
1335     for (Py_ssize_t i = 0; i < n_items; i++) {
1336         expr_ty item = asdl_seq_GET(expr, i);
1337         if (item->kind == Constant_kind) {
1338             item = _PyPegen_decode_fstring_part(p, is_raw, item, b);
1339             if (item == NULL) {
1340                 return NULL;
1341             }
1342 
1343             /* Tokenizer emits string parts even when the underlying string
1344             might become an empty value (e.g. FSTRING_MIDDLE with the value \\n)
1345             so we need to check for them and simplify it here. */
1346             if (PyUnicode_CheckExact(item->v.Constant.value)
1347                 && PyUnicode_GET_LENGTH(item->v.Constant.value) == 0) {
1348                 continue;
1349             }
1350         }
1351         asdl_seq_SET(seq, index++, item);
1352     }
1353 
1354     asdl_expr_seq *resized_exprs;
1355     if (index != n_items) {
1356         resized_exprs = _Py_asdl_expr_seq_new(index, p->arena);
1357         if (resized_exprs == NULL) {
1358             return NULL;
1359         }
1360         for (Py_ssize_t i = 0; i < index; i++) {
1361             asdl_seq_SET(resized_exprs, i, asdl_seq_GET(seq, i));
1362         }
1363     }
1364     else {
1365         resized_exprs = seq;
1366     }
1367 
1368     return _PyAST_JoinedStr(resized_exprs, a->lineno, a->col_offset,
1369                             b->end_lineno, b->end_col_offset,
1370                             p->arena);
1371 }
1372 
_PyPegen_decoded_constant_from_token(Parser * p,Token * tok)1373 expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok) {
1374     Py_ssize_t bsize;
1375     char* bstr;
1376     if (PyBytes_AsStringAndSize(tok->bytes, &bstr, &bsize) == -1) {
1377         return NULL;
1378     }
1379     PyObject* str = _PyPegen_decode_string(p, 0, bstr, bsize, tok);
1380     if (str == NULL) {
1381         return NULL;
1382     }
1383     if (_PyArena_AddPyObject(p->arena, str) < 0) {
1384         Py_DECREF(str);
1385         return NULL;
1386     }
1387     return _PyAST_Constant(str, NULL, tok->lineno, tok->col_offset,
1388                            tok->end_lineno, tok->end_col_offset,
1389                            p->arena);
1390 }
1391 
_PyPegen_constant_from_token(Parser * p,Token * tok)1392 expr_ty _PyPegen_constant_from_token(Parser* p, Token* tok) {
1393     char* bstr = PyBytes_AsString(tok->bytes);
1394     if (bstr == NULL) {
1395         return NULL;
1396     }
1397     PyObject* str = PyUnicode_FromString(bstr);
1398     if (str == NULL) {
1399         return NULL;
1400     }
1401     if (_PyArena_AddPyObject(p->arena, str) < 0) {
1402         Py_DECREF(str);
1403         return NULL;
1404     }
1405     return _PyAST_Constant(str, NULL, tok->lineno, tok->col_offset,
1406                            tok->end_lineno, tok->end_col_offset,
1407                            p->arena);
1408 }
1409 
_PyPegen_constant_from_string(Parser * p,Token * tok)1410 expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok) {
1411     char* the_str = PyBytes_AsString(tok->bytes);
1412     if (the_str == NULL) {
1413         return NULL;
1414     }
1415     PyObject *s = _PyPegen_parse_string(p, tok);
1416     if (s == NULL) {
1417         _Pypegen_raise_decode_error(p);
1418         return NULL;
1419     }
1420     if (_PyArena_AddPyObject(p->arena, s) < 0) {
1421         Py_DECREF(s);
1422         return NULL;
1423     }
1424     PyObject *kind = NULL;
1425     if (the_str && the_str[0] == 'u') {
1426         kind = _PyPegen_new_identifier(p, "u");
1427         if (kind == NULL) {
1428             return NULL;
1429         }
1430     }
1431     return _PyAST_Constant(s, kind, tok->lineno, tok->col_offset, tok->end_lineno, tok->end_col_offset, p->arena);
1432 }
1433 
_PyPegen_formatted_value(Parser * p,expr_ty expression,Token * debug,ResultTokenWithMetadata * conversion,ResultTokenWithMetadata * format,Token * closing_brace,int lineno,int col_offset,int end_lineno,int end_col_offset,PyArena * arena)1434 expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ResultTokenWithMetadata *conversion,
1435                                  ResultTokenWithMetadata *format, Token *closing_brace, int lineno, int col_offset,
1436                                  int end_lineno, int end_col_offset, PyArena *arena) {
1437     int conversion_val = -1;
1438     if (conversion != NULL) {
1439         expr_ty conversion_expr = (expr_ty) conversion->result;
1440         assert(conversion_expr->kind == Name_kind);
1441         Py_UCS4 first = PyUnicode_READ_CHAR(conversion_expr->v.Name.id, 0);
1442 
1443         if (PyUnicode_GET_LENGTH(conversion_expr->v.Name.id) > 1 ||
1444             !(first == 's' || first == 'r' || first == 'a')) {
1445             RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conversion_expr,
1446                                               "f-string: invalid conversion character %R: expected 's', 'r', or 'a'",
1447                                               conversion_expr->v.Name.id);
1448             return NULL;
1449         }
1450 
1451         conversion_val = Py_SAFE_DOWNCAST(first, Py_UCS4, int);
1452     }
1453     else if (debug && !format) {
1454         /* If no conversion is specified, use !r for debug expressions */
1455         conversion_val = (int)'r';
1456     }
1457 
1458     expr_ty formatted_value = _PyAST_FormattedValue(
1459         expression, conversion_val, format ? (expr_ty) format->result : NULL,
1460         lineno, col_offset, end_lineno,
1461         end_col_offset, arena
1462     );
1463 
1464     if (debug) {
1465         /* Find the non whitespace token after the "=" */
1466         int debug_end_line, debug_end_offset;
1467         PyObject *debug_metadata;
1468 
1469         if (conversion) {
1470             debug_end_line = ((expr_ty) conversion->result)->lineno;
1471             debug_end_offset = ((expr_ty) conversion->result)->col_offset;
1472             debug_metadata = conversion->metadata;
1473         }
1474         else if (format) {
1475             debug_end_line = ((expr_ty) format->result)->lineno;
1476             debug_end_offset = ((expr_ty) format->result)->col_offset + 1;
1477             debug_metadata = format->metadata;
1478         }
1479         else {
1480             debug_end_line = end_lineno;
1481             debug_end_offset = end_col_offset;
1482             debug_metadata = closing_brace->metadata;
1483         }
1484         expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
1485                                              debug_end_offset - 1, p->arena);
1486         if (!debug_text) {
1487             return NULL;
1488         }
1489 
1490         asdl_expr_seq *values = _Py_asdl_expr_seq_new(2, arena);
1491         if (values == NULL) {
1492             return NULL;
1493         }
1494         asdl_seq_SET(values, 0, debug_text);
1495         asdl_seq_SET(values, 1, formatted_value);
1496         return _PyAST_JoinedStr(values, lineno, col_offset, debug_end_line, debug_end_offset, p->arena);
1497     }
1498     else {
1499         return formatted_value;
1500     }
1501 }
1502 
1503 expr_ty
_PyPegen_concatenate_strings(Parser * p,asdl_expr_seq * strings,int lineno,int col_offset,int end_lineno,int end_col_offset,PyArena * arena)1504 _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
1505                              int lineno, int col_offset, int end_lineno,
1506                              int end_col_offset, PyArena *arena)
1507 {
1508     Py_ssize_t len = asdl_seq_LEN(strings);
1509     assert(len > 0);
1510 
1511     int f_string_found = 0;
1512     int unicode_string_found = 0;
1513     int bytes_found = 0;
1514 
1515     Py_ssize_t i = 0;
1516     Py_ssize_t n_flattened_elements = 0;
1517     for (i = 0; i < len; i++) {
1518         expr_ty elem = asdl_seq_GET(strings, i);
1519         switch(elem->kind) {
1520             case Constant_kind:
1521                 if (PyBytes_CheckExact(elem->v.Constant.value)) {
1522                     bytes_found = 1;
1523                 } else {
1524                     unicode_string_found = 1;
1525                 }
1526                 n_flattened_elements++;
1527                 break;
1528             case JoinedStr_kind:
1529                 n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values);
1530                 f_string_found = 1;
1531                 break;
1532             default:
1533                 n_flattened_elements++;
1534                 f_string_found = 1;
1535                 break;
1536         }
1537     }
1538 
1539     if ((unicode_string_found || f_string_found) && bytes_found) {
1540         RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
1541         return NULL;
1542     }
1543 
1544     if (bytes_found) {
1545         PyObject* res = PyBytes_FromString("");
1546 
1547         /* Bytes literals never get a kind, but just for consistency
1548            since they are represented as Constant nodes, we'll mirror
1549            the same behavior as unicode strings for determining the
1550            kind. */
1551         PyObject* kind = asdl_seq_GET(strings, 0)->v.Constant.kind;
1552         for (i = 0; i < len; i++) {
1553             expr_ty elem = asdl_seq_GET(strings, i);
1554             PyBytes_Concat(&res, elem->v.Constant.value);
1555         }
1556         if (!res || _PyArena_AddPyObject(arena, res) < 0) {
1557             Py_XDECREF(res);
1558             return NULL;
1559         }
1560         return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, end_col_offset, p->arena);
1561     }
1562 
1563     if (!f_string_found && len == 1) {
1564         return asdl_seq_GET(strings, 0);
1565     }
1566 
1567     asdl_expr_seq* flattened = _Py_asdl_expr_seq_new(n_flattened_elements, p->arena);
1568     if (flattened == NULL) {
1569         return NULL;
1570     }
1571 
1572     /* build flattened list */
1573     Py_ssize_t current_pos = 0;
1574     Py_ssize_t j = 0;
1575     for (i = 0; i < len; i++) {
1576         expr_ty elem = asdl_seq_GET(strings, i);
1577         switch(elem->kind) {
1578             case JoinedStr_kind:
1579                 for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
1580                     expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j);
1581                     if (subvalue == NULL) {
1582                         return NULL;
1583                     }
1584                     asdl_seq_SET(flattened, current_pos++, subvalue);
1585                 }
1586                 break;
1587             default:
1588                 asdl_seq_SET(flattened, current_pos++, elem);
1589                 break;
1590         }
1591     }
1592 
1593     /* calculate folded element count */
1594     Py_ssize_t n_elements = 0;
1595     int prev_is_constant = 0;
1596     for (i = 0; i < n_flattened_elements; i++) {
1597         expr_ty elem = asdl_seq_GET(flattened, i);
1598 
1599         /* The concatenation of a FormattedValue and an empty Contant should
1600            lead to the FormattedValue itself. Thus, we will not take any empty
1601            constants into account, just as in `_PyPegen_joined_str` */
1602         if (f_string_found && elem->kind == Constant_kind &&
1603             PyUnicode_CheckExact(elem->v.Constant.value) &&
1604             PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0)
1605             continue;
1606 
1607         if (!prev_is_constant || elem->kind != Constant_kind) {
1608             n_elements++;
1609         }
1610         prev_is_constant = elem->kind == Constant_kind;
1611     }
1612 
1613     asdl_expr_seq* values = _Py_asdl_expr_seq_new(n_elements, p->arena);
1614     if (values == NULL) {
1615         return NULL;
1616     }
1617 
1618     /* build folded list */
1619     _PyUnicodeWriter writer;
1620     current_pos = 0;
1621     for (i = 0; i < n_flattened_elements; i++) {
1622         expr_ty elem = asdl_seq_GET(flattened, i);
1623 
1624         /* if the current elem and the following are constants,
1625            fold them and all consequent constants */
1626         if (elem->kind == Constant_kind) {
1627             if (i + 1 < n_flattened_elements &&
1628                 asdl_seq_GET(flattened, i + 1)->kind == Constant_kind) {
1629                 expr_ty first_elem = elem;
1630 
1631                 /* When a string is getting concatenated, the kind of the string
1632                    is determined by the first string in the concatenation
1633                    sequence.
1634 
1635                    u"abc" "def" -> u"abcdef"
1636                    "abc" u"abc" ->  "abcabc" */
1637                 PyObject *kind = elem->v.Constant.kind;
1638 
1639                 _PyUnicodeWriter_Init(&writer);
1640                 expr_ty last_elem = elem;
1641                 for (j = i; j < n_flattened_elements; j++) {
1642                     expr_ty current_elem = asdl_seq_GET(flattened, j);
1643                     if (current_elem->kind == Constant_kind) {
1644                         if (_PyUnicodeWriter_WriteStr(
1645                                 &writer, current_elem->v.Constant.value)) {
1646                             _PyUnicodeWriter_Dealloc(&writer);
1647                             return NULL;
1648                         }
1649                         last_elem = current_elem;
1650                     } else {
1651                         break;
1652                     }
1653                 }
1654                 i = j - 1;
1655 
1656                 PyObject *concat_str = _PyUnicodeWriter_Finish(&writer);
1657                 if (concat_str == NULL) {
1658                     _PyUnicodeWriter_Dealloc(&writer);
1659                     return NULL;
1660                 }
1661                 if (_PyArena_AddPyObject(p->arena, concat_str) < 0) {
1662                     Py_DECREF(concat_str);
1663                     return NULL;
1664                 }
1665                 elem = _PyAST_Constant(concat_str, kind, first_elem->lineno,
1666                                        first_elem->col_offset,
1667                                        last_elem->end_lineno,
1668                                        last_elem->end_col_offset, p->arena);
1669                 if (elem == NULL) {
1670                     return NULL;
1671                 }
1672             }
1673 
1674             /* Drop all empty contanst strings */
1675             if (f_string_found &&
1676                 PyUnicode_CheckExact(elem->v.Constant.value) &&
1677                 PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0) {
1678                 continue;
1679             }
1680         }
1681 
1682         asdl_seq_SET(values, current_pos++, elem);
1683     }
1684 
1685     if (!f_string_found) {
1686         assert(n_elements == 1);
1687         expr_ty elem = asdl_seq_GET(values, 0);
1688         assert(elem->kind == Constant_kind);
1689         return elem;
1690     }
1691 
1692     assert(current_pos == n_elements);
1693     return _PyAST_JoinedStr(values, lineno, col_offset, end_lineno, end_col_offset, p->arena);
1694 }
1695