• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <Python.h>
2 #include "pycore_ast.h"           // _PyAST_Validate(),
3 #include <errcode.h>
4 #include "tokenizer.h"
5 
6 #include "pegen.h"
7 #include "string_parser.h"
8 
9 PyObject *
_PyPegen_new_type_comment(Parser * p,const char * s)10 _PyPegen_new_type_comment(Parser *p, const char *s)
11 {
12     PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
13     if (res == NULL) {
14         return NULL;
15     }
16     if (_PyArena_AddPyObject(p->arena, res) < 0) {
17         Py_DECREF(res);
18         return NULL;
19     }
20     return res;
21 }
22 
23 arg_ty
_PyPegen_add_type_comment_to_arg(Parser * p,arg_ty a,Token * tc)24 _PyPegen_add_type_comment_to_arg(Parser *p, arg_ty a, Token *tc)
25 {
26     if (tc == NULL) {
27         return a;
28     }
29     const char *bytes = PyBytes_AsString(tc->bytes);
30     if (bytes == NULL) {
31         return NULL;
32     }
33     PyObject *tco = _PyPegen_new_type_comment(p, bytes);
34     if (tco == NULL) {
35         return NULL;
36     }
37     return _PyAST_arg(a->arg, a->annotation, tco,
38                       a->lineno, a->col_offset, a->end_lineno, a->end_col_offset,
39                       p->arena);
40 }
41 
42 static int
init_normalization(Parser * p)43 init_normalization(Parser *p)
44 {
45     if (p->normalize) {
46         return 1;
47     }
48     PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
49     if (!m)
50     {
51         return 0;
52     }
53     p->normalize = PyObject_GetAttrString(m, "normalize");
54     Py_DECREF(m);
55     if (!p->normalize)
56     {
57         return 0;
58     }
59     return 1;
60 }
61 
62 /* Checks if the NOTEQUAL token is valid given the current parser flags
63 0 indicates success and nonzero indicates failure (an exception may be set) */
64 int
_PyPegen_check_barry_as_flufl(Parser * p,Token * t)65 _PyPegen_check_barry_as_flufl(Parser *p, Token* t) {
66     assert(t->bytes != NULL);
67     assert(t->type == NOTEQUAL);
68 
69     const char* tok_str = PyBytes_AS_STRING(t->bytes);
70     if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>") != 0) {
71         RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='");
72         return -1;
73     }
74     if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) {
75         return strcmp(tok_str, "!=");
76     }
77     return 0;
78 }
79 
80 int
_PyPegen_check_legacy_stmt(Parser * p,expr_ty name)81 _PyPegen_check_legacy_stmt(Parser *p, expr_ty name) {
82     if (name->kind != Name_kind) {
83         return 0;
84     }
85     const char* candidates[2] = {"print", "exec"};
86     for (int i=0; i<2; i++) {
87         if (PyUnicode_CompareWithASCIIString(name->v.Name.id, candidates[i]) == 0) {
88             return 1;
89         }
90     }
91     return 0;
92 }
93 
94 PyObject *
_PyPegen_new_identifier(Parser * p,const char * n)95 _PyPegen_new_identifier(Parser *p, const char *n)
96 {
97     PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
98     if (!id) {
99         goto error;
100     }
101     /* PyUnicode_DecodeUTF8 should always return a ready string. */
102     assert(PyUnicode_IS_READY(id));
103     /* Check whether there are non-ASCII characters in the
104        identifier; if so, normalize to NFKC. */
105     if (!PyUnicode_IS_ASCII(id))
106     {
107         PyObject *id2;
108         if (!init_normalization(p))
109         {
110             Py_DECREF(id);
111             goto error;
112         }
113         PyObject *form = PyUnicode_InternFromString("NFKC");
114         if (form == NULL)
115         {
116             Py_DECREF(id);
117             goto error;
118         }
119         PyObject *args[2] = {form, id};
120         id2 = _PyObject_FastCall(p->normalize, args, 2);
121         Py_DECREF(id);
122         Py_DECREF(form);
123         if (!id2) {
124             goto error;
125         }
126         if (!PyUnicode_Check(id2))
127         {
128             PyErr_Format(PyExc_TypeError,
129                          "unicodedata.normalize() must return a string, not "
130                          "%.200s",
131                          _PyType_Name(Py_TYPE(id2)));
132             Py_DECREF(id2);
133             goto error;
134         }
135         id = id2;
136     }
137     PyUnicode_InternInPlace(&id);
138     if (_PyArena_AddPyObject(p->arena, id) < 0)
139     {
140         Py_DECREF(id);
141         goto error;
142     }
143     return id;
144 
145 error:
146     p->error_indicator = 1;
147     return NULL;
148 }
149 
150 static PyObject *
_create_dummy_identifier(Parser * p)151 _create_dummy_identifier(Parser *p)
152 {
153     return _PyPegen_new_identifier(p, "");
154 }
155 
156 static inline Py_ssize_t
byte_offset_to_character_offset(PyObject * line,Py_ssize_t col_offset)157 byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
158 {
159     const char *str = PyUnicode_AsUTF8(line);
160     if (!str) {
161         return 0;
162     }
163     Py_ssize_t len = strlen(str);
164     if (col_offset > len + 1) {
165         col_offset = len + 1;
166     }
167     assert(col_offset >= 0);
168     PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
169     if (!text) {
170         return 0;
171     }
172     Py_ssize_t size = PyUnicode_GET_LENGTH(text);
173     Py_DECREF(text);
174     return size;
175 }
176 
177 const char *
_PyPegen_get_expr_name(expr_ty e)178 _PyPegen_get_expr_name(expr_ty e)
179 {
180     assert(e != NULL);
181     switch (e->kind) {
182         case Attribute_kind:
183             return "attribute";
184         case Subscript_kind:
185             return "subscript";
186         case Starred_kind:
187             return "starred";
188         case Name_kind:
189             return "name";
190         case List_kind:
191             return "list";
192         case Tuple_kind:
193             return "tuple";
194         case Lambda_kind:
195             return "lambda";
196         case Call_kind:
197             return "function call";
198         case BoolOp_kind:
199         case BinOp_kind:
200         case UnaryOp_kind:
201             return "expression";
202         case GeneratorExp_kind:
203             return "generator expression";
204         case Yield_kind:
205         case YieldFrom_kind:
206             return "yield expression";
207         case Await_kind:
208             return "await expression";
209         case ListComp_kind:
210             return "list comprehension";
211         case SetComp_kind:
212             return "set comprehension";
213         case DictComp_kind:
214             return "dict comprehension";
215         case Dict_kind:
216             return "dict literal";
217         case Set_kind:
218             return "set display";
219         case JoinedStr_kind:
220         case FormattedValue_kind:
221             return "f-string expression";
222         case Constant_kind: {
223             PyObject *value = e->v.Constant.value;
224             if (value == Py_None) {
225                 return "None";
226             }
227             if (value == Py_False) {
228                 return "False";
229             }
230             if (value == Py_True) {
231                 return "True";
232             }
233             if (value == Py_Ellipsis) {
234                 return "ellipsis";
235             }
236             return "literal";
237         }
238         case Compare_kind:
239             return "comparison";
240         case IfExp_kind:
241             return "conditional expression";
242         case NamedExpr_kind:
243             return "named expression";
244         default:
245             PyErr_Format(PyExc_SystemError,
246                          "unexpected expression in assignment %d (line %d)",
247                          e->kind, e->lineno);
248             return NULL;
249     }
250 }
251 
252 static int
raise_decode_error(Parser * p)253 raise_decode_error(Parser *p)
254 {
255     assert(PyErr_Occurred());
256     const char *errtype = NULL;
257     if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
258         errtype = "unicode error";
259     }
260     else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
261         errtype = "value error";
262     }
263     if (errtype) {
264         PyObject *type;
265         PyObject *value;
266         PyObject *tback;
267         PyObject *errstr;
268         PyErr_Fetch(&type, &value, &tback);
269         errstr = PyObject_Str(value);
270         if (errstr) {
271             RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
272             Py_DECREF(errstr);
273         }
274         else {
275             PyErr_Clear();
276             RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
277         }
278         Py_XDECREF(type);
279         Py_XDECREF(value);
280         Py_XDECREF(tback);
281     }
282 
283     return -1;
284 }
285 
286 static inline void
raise_unclosed_parentheses_error(Parser * p)287 raise_unclosed_parentheses_error(Parser *p) {
288        int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
289        int error_col = p->tok->parencolstack[p->tok->level-1];
290        RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
291                                   error_lineno, error_col, error_lineno, -1,
292                                   "'%c' was never closed",
293                                   p->tok->parenstack[p->tok->level-1]);
294 }
295 
296 static void
raise_tokenizer_init_error(PyObject * filename)297 raise_tokenizer_init_error(PyObject *filename)
298 {
299     if (!(PyErr_ExceptionMatches(PyExc_LookupError)
300           || PyErr_ExceptionMatches(PyExc_SyntaxError)
301           || PyErr_ExceptionMatches(PyExc_ValueError)
302           || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
303         return;
304     }
305     PyObject *errstr = NULL;
306     PyObject *tuple = NULL;
307     PyObject *type;
308     PyObject *value;
309     PyObject *tback;
310     PyErr_Fetch(&type, &value, &tback);
311     errstr = PyObject_Str(value);
312     if (!errstr) {
313         goto error;
314     }
315 
316     PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
317     if (!tmp) {
318         goto error;
319     }
320 
321     tuple = PyTuple_Pack(2, errstr, tmp);
322     Py_DECREF(tmp);
323     if (!value) {
324         goto error;
325     }
326     PyErr_SetObject(PyExc_SyntaxError, tuple);
327 
328 error:
329     Py_XDECREF(type);
330     Py_XDECREF(value);
331     Py_XDECREF(tback);
332     Py_XDECREF(errstr);
333     Py_XDECREF(tuple);
334 }
335 
336 static int
tokenizer_error(Parser * p)337 tokenizer_error(Parser *p)
338 {
339     if (PyErr_Occurred()) {
340         return -1;
341     }
342 
343     const char *msg = NULL;
344     PyObject* errtype = PyExc_SyntaxError;
345     Py_ssize_t col_offset = -1;
346     switch (p->tok->done) {
347         case E_TOKEN:
348             msg = "invalid token";
349             break;
350         case E_EOF:
351             if (p->tok->level) {
352                 raise_unclosed_parentheses_error(p);
353             } else {
354                 RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
355             }
356             return -1;
357         case E_DEDENT:
358             RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
359             return -1;
360         case E_INTR:
361             if (!PyErr_Occurred()) {
362                 PyErr_SetNone(PyExc_KeyboardInterrupt);
363             }
364             return -1;
365         case E_NOMEM:
366             PyErr_NoMemory();
367             return -1;
368         case E_TABSPACE:
369             errtype = PyExc_TabError;
370             msg = "inconsistent use of tabs and spaces in indentation";
371             break;
372         case E_TOODEEP:
373             errtype = PyExc_IndentationError;
374             msg = "too many levels of indentation";
375             break;
376         case E_LINECONT: {
377             col_offset = p->tok->cur - p->tok->buf - 1;
378             msg = "unexpected character after line continuation character";
379             break;
380         }
381         default:
382             msg = "unknown parsing error";
383     }
384 
385     RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno,
386                                col_offset >= 0 ? col_offset : 0,
387                                p->tok->lineno, -1, msg);
388     return -1;
389 }
390 
391 void *
_PyPegen_raise_error(Parser * p,PyObject * errtype,const char * errmsg,...)392 _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
393 {
394     if (p->fill == 0) {
395         va_list va;
396         va_start(va, errmsg);
397         _PyPegen_raise_error_known_location(p, errtype, 0, 0, 0, -1, errmsg, va);
398         va_end(va);
399         return NULL;
400     }
401 
402     Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
403     Py_ssize_t col_offset;
404     Py_ssize_t end_col_offset = -1;
405     if (t->col_offset == -1) {
406         if (p->tok->cur == p->tok->buf) {
407             col_offset = 0;
408         } else {
409             const char* start = p->tok->buf  ? p->tok->line_start : p->tok->buf;
410             col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int);
411         }
412     } else {
413         col_offset = t->col_offset + 1;
414     }
415 
416     if (t->end_col_offset != -1) {
417         end_col_offset = t->end_col_offset + 1;
418     }
419 
420     va_list va;
421     va_start(va, errmsg);
422     _PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va);
423     va_end(va);
424 
425     return NULL;
426 }
427 
428 static PyObject *
get_error_line(Parser * p,Py_ssize_t lineno)429 get_error_line(Parser *p, Py_ssize_t lineno)
430 {
431     /* If the file descriptor is interactive, the source lines of the current
432      * (multi-line) statement are stored in p->tok->interactive_src_start.
433      * If not, we're parsing from a string, which means that the whole source
434      * is stored in p->tok->str. */
435     assert((p->tok->fp == NULL && p->tok->str != NULL) || p->tok->fp == stdin);
436 
437     char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;
438     assert(cur_line != NULL);
439     const char* buf_end = p->tok->fp_interactive ? p->tok->interactive_src_end : p->tok->inp;
440 
441     Py_ssize_t relative_lineno = p->starting_lineno ? lineno - p->starting_lineno + 1 : lineno;
442 
443     for (int i = 0; i < relative_lineno - 1; i++) {
444         char *new_line = strchr(cur_line, '\n') + 1;
445         assert(new_line != NULL && new_line <= buf_end);
446         if (new_line == NULL || new_line > buf_end) {
447             break;
448         }
449         cur_line = new_line;
450     }
451 
452     char *next_newline;
453     if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line
454         next_newline = cur_line + strlen(cur_line);
455     }
456     return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace");
457 }
458 
459 void *
_PyPegen_raise_error_known_location(Parser * p,PyObject * errtype,Py_ssize_t lineno,Py_ssize_t col_offset,Py_ssize_t end_lineno,Py_ssize_t end_col_offset,const char * errmsg,va_list va)460 _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
461                                     Py_ssize_t lineno, Py_ssize_t col_offset,
462                                     Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
463                                     const char *errmsg, va_list va)
464 {
465     PyObject *value = NULL;
466     PyObject *errstr = NULL;
467     PyObject *error_line = NULL;
468     PyObject *tmp = NULL;
469     p->error_indicator = 1;
470 
471     if (end_lineno == CURRENT_POS) {
472         end_lineno = p->tok->lineno;
473     }
474     if (end_col_offset == CURRENT_POS) {
475         end_col_offset = p->tok->cur - p->tok->line_start;
476     }
477 
478     if (p->start_rule == Py_fstring_input) {
479         const char *fstring_msg = "f-string: ";
480         Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);
481 
482         char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character
483         if (!new_errmsg) {
484             return (void *) PyErr_NoMemory();
485         }
486 
487         // Copy both strings into new buffer
488         memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
489         memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
490         new_errmsg[len] = 0;
491         errmsg = new_errmsg;
492     }
493     errstr = PyUnicode_FromFormatV(errmsg, va);
494     if (!errstr) {
495         goto error;
496     }
497 
498     if (p->tok->fp_interactive) {
499         error_line = get_error_line(p, lineno);
500     }
501     else if (p->start_rule == Py_file_input) {
502         error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
503                                                      (int) lineno, p->tok->encoding);
504     }
505 
506     if (!error_line) {
507         /* PyErr_ProgramTextObject was not called or returned NULL. If it was not called,
508            then we need to find the error line from some other source, because
509            p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly
510            failed or we're parsing from a string or the REPL. There's a third edge case where
511            we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
512            `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
513            does not physically exist */
514         assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
515 
516         if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
517             Py_ssize_t size = p->tok->inp - p->tok->buf;
518             error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
519         }
520         else if (p->tok->fp == NULL || p->tok->fp == stdin) {
521             error_line = get_error_line(p, lineno);
522         }
523         else {
524             error_line = PyUnicode_FromStringAndSize("", 0);
525         }
526         if (!error_line) {
527             goto error;
528         }
529     }
530 
531     if (p->start_rule == Py_fstring_input) {
532         col_offset -= p->starting_col_offset;
533         end_col_offset -= p->starting_col_offset;
534     }
535 
536     Py_ssize_t col_number = col_offset;
537     Py_ssize_t end_col_number = end_col_offset;
538 
539     if (p->tok->encoding != NULL) {
540         col_number = byte_offset_to_character_offset(error_line, col_offset);
541         end_col_number = end_col_number > 0 ?
542                          byte_offset_to_character_offset(error_line, end_col_offset) :
543                          end_col_number;
544     }
545     tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);
546     if (!tmp) {
547         goto error;
548     }
549     value = PyTuple_Pack(2, errstr, tmp);
550     Py_DECREF(tmp);
551     if (!value) {
552         goto error;
553     }
554     PyErr_SetObject(errtype, value);
555 
556     Py_DECREF(errstr);
557     Py_DECREF(value);
558     if (p->start_rule == Py_fstring_input) {
559         PyMem_Free((void *)errmsg);
560     }
561     return NULL;
562 
563 error:
564     Py_XDECREF(errstr);
565     Py_XDECREF(error_line);
566     if (p->start_rule == Py_fstring_input) {
567         PyMem_Free((void *)errmsg);
568     }
569     return NULL;
570 }
571 
572 #if 0
573 static const char *
574 token_name(int type)
575 {
576     if (0 <= type && type <= N_TOKENS) {
577         return _PyParser_TokenNames[type];
578     }
579     return "<Huh?>";
580 }
581 #endif
582 
583 // Here, mark is the start of the node, while p->mark is the end.
584 // If node==NULL, they should be the same.
585 int
_PyPegen_insert_memo(Parser * p,int mark,int type,void * node)586 _PyPegen_insert_memo(Parser *p, int mark, int type, void *node)
587 {
588     // Insert in front
589     Memo *m = _PyArena_Malloc(p->arena, sizeof(Memo));
590     if (m == NULL) {
591         return -1;
592     }
593     m->type = type;
594     m->node = node;
595     m->mark = p->mark;
596     m->next = p->tokens[mark]->memo;
597     p->tokens[mark]->memo = m;
598     return 0;
599 }
600 
601 // Like _PyPegen_insert_memo(), but updates an existing node if found.
602 int
_PyPegen_update_memo(Parser * p,int mark,int type,void * node)603 _PyPegen_update_memo(Parser *p, int mark, int type, void *node)
604 {
605     for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) {
606         if (m->type == type) {
607             // Update existing node.
608             m->node = node;
609             m->mark = p->mark;
610             return 0;
611         }
612     }
613     // Insert new node.
614     return _PyPegen_insert_memo(p, mark, type, node);
615 }
616 
617 // Return dummy NAME.
618 void *
_PyPegen_dummy_name(Parser * p,...)619 _PyPegen_dummy_name(Parser *p, ...)
620 {
621     static void *cache = NULL;
622 
623     if (cache != NULL) {
624         return cache;
625     }
626 
627     PyObject *id = _create_dummy_identifier(p);
628     if (!id) {
629         return NULL;
630     }
631     cache = _PyAST_Name(id, Load, 1, 0, 1, 0, p->arena);
632     return cache;
633 }
634 
635 static int
_get_keyword_or_name_type(Parser * p,const char * name,int name_len)636 _get_keyword_or_name_type(Parser *p, const char *name, int name_len)
637 {
638     assert(name_len > 0);
639     if (name_len >= p->n_keyword_lists ||
640         p->keywords[name_len] == NULL ||
641         p->keywords[name_len]->type == -1) {
642         return NAME;
643     }
644     for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) {
645         if (strncmp(k->str, name, name_len) == 0) {
646             return k->type;
647         }
648     }
649     return NAME;
650 }
651 
652 static int
growable_comment_array_init(growable_comment_array * arr,size_t initial_size)653 growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
654     assert(initial_size > 0);
655     arr->items = PyMem_Malloc(initial_size * sizeof(*arr->items));
656     arr->size = initial_size;
657     arr->num_items = 0;
658 
659     return arr->items != NULL;
660 }
661 
662 static int
growable_comment_array_add(growable_comment_array * arr,int lineno,char * comment)663 growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
664     if (arr->num_items >= arr->size) {
665         size_t new_size = arr->size * 2;
666         void *new_items_array = PyMem_Realloc(arr->items, new_size * sizeof(*arr->items));
667         if (!new_items_array) {
668             return 0;
669         }
670         arr->items = new_items_array;
671         arr->size = new_size;
672     }
673 
674     arr->items[arr->num_items].lineno = lineno;
675     arr->items[arr->num_items].comment = comment;  // Take ownership
676     arr->num_items++;
677     return 1;
678 }
679 
680 static void
growable_comment_array_deallocate(growable_comment_array * arr)681 growable_comment_array_deallocate(growable_comment_array *arr) {
682     for (unsigned i = 0; i < arr->num_items; i++) {
683         PyMem_Free(arr->items[i].comment);
684     }
685     PyMem_Free(arr->items);
686 }
687 
688 static int
initialize_token(Parser * p,Token * token,const char * start,const char * end,int token_type)689 initialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) {
690     assert(token != NULL);
691 
692     token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : token_type;
693     token->bytes = PyBytes_FromStringAndSize(start, end - start);
694     if (token->bytes == NULL) {
695         return -1;
696     }
697 
698     if (_PyArena_AddPyObject(p->arena, token->bytes) < 0) {
699         Py_DECREF(token->bytes);
700         return -1;
701     }
702 
703     token->level = p->tok->level;
704 
705     const char *line_start = token_type == STRING ? p->tok->multi_line_start : p->tok->line_start;
706     int lineno = token_type == STRING ? p->tok->first_lineno : p->tok->lineno;
707     int end_lineno = p->tok->lineno;
708 
709     int col_offset = (start != NULL && start >= line_start) ? (int)(start - line_start) : -1;
710     int end_col_offset = (end != NULL && end >= p->tok->line_start) ? (int)(end - p->tok->line_start) : -1;
711 
712     token->lineno = lineno;
713     token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + col_offset : col_offset;
714     token->end_lineno = end_lineno;
715     token->end_col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + end_col_offset : end_col_offset;
716 
717     p->fill += 1;
718 
719     if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) {
720         return raise_decode_error(p);
721     }
722 
723     return (token_type == ERRORTOKEN ? tokenizer_error(p) : 0);
724 }
725 
726 static int
_resize_tokens_array(Parser * p)727 _resize_tokens_array(Parser *p) {
728     int newsize = p->size * 2;
729     Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
730     if (new_tokens == NULL) {
731         PyErr_NoMemory();
732         return -1;
733     }
734     p->tokens = new_tokens;
735 
736     for (int i = p->size; i < newsize; i++) {
737         p->tokens[i] = PyMem_Calloc(1, sizeof(Token));
738         if (p->tokens[i] == NULL) {
739             p->size = i; // Needed, in order to cleanup correctly after parser fails
740             PyErr_NoMemory();
741             return -1;
742         }
743     }
744     p->size = newsize;
745     return 0;
746 }
747 
748 int
_PyPegen_fill_token(Parser * p)749 _PyPegen_fill_token(Parser *p)
750 {
751     const char *start;
752     const char *end;
753     int type = PyTokenizer_Get(p->tok, &start, &end);
754 
755     // Record and skip '# type: ignore' comments
756     while (type == TYPE_IGNORE) {
757         Py_ssize_t len = end - start;
758         char *tag = PyMem_Malloc(len + 1);
759         if (tag == NULL) {
760             PyErr_NoMemory();
761             return -1;
762         }
763         strncpy(tag, start, len);
764         tag[len] = '\0';
765         // Ownership of tag passes to the growable array
766         if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) {
767             PyErr_NoMemory();
768             return -1;
769         }
770         type = PyTokenizer_Get(p->tok, &start, &end);
771     }
772 
773     // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing
774     if (p->start_rule == Py_single_input && type == ENDMARKER && p->parsing_started) {
775         type = NEWLINE; /* Add an extra newline */
776         p->parsing_started = 0;
777 
778         if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) {
779             p->tok->pendin = -p->tok->indent;
780             p->tok->indent = 0;
781         }
782     }
783     else {
784         p->parsing_started = 1;
785     }
786 
787     // Check if we are at the limit of the token array capacity and resize if needed
788     if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) {
789         return -1;
790     }
791 
792     Token *t = p->tokens[p->fill];
793     return initialize_token(p, t, start, end, type);
794 }
795 
796 
797 #if defined(Py_DEBUG)
798 // Instrumentation to count the effectiveness of memoization.
799 // The array counts the number of tokens skipped by memoization,
800 // indexed by type.
801 
802 #define NSTATISTICS 2000
803 static long memo_statistics[NSTATISTICS];
804 
805 void
_PyPegen_clear_memo_statistics()806 _PyPegen_clear_memo_statistics()
807 {
808     for (int i = 0; i < NSTATISTICS; i++) {
809         memo_statistics[i] = 0;
810     }
811 }
812 
813 PyObject *
_PyPegen_get_memo_statistics()814 _PyPegen_get_memo_statistics()
815 {
816     PyObject *ret = PyList_New(NSTATISTICS);
817     if (ret == NULL) {
818         return NULL;
819     }
820     for (int i = 0; i < NSTATISTICS; i++) {
821         PyObject *value = PyLong_FromLong(memo_statistics[i]);
822         if (value == NULL) {
823             Py_DECREF(ret);
824             return NULL;
825         }
826         // PyList_SetItem borrows a reference to value.
827         if (PyList_SetItem(ret, i, value) < 0) {
828             Py_DECREF(ret);
829             return NULL;
830         }
831     }
832     return ret;
833 }
834 #endif
835 
836 int  // bool
_PyPegen_is_memoized(Parser * p,int type,void * pres)837 _PyPegen_is_memoized(Parser *p, int type, void *pres)
838 {
839     if (p->mark == p->fill) {
840         if (_PyPegen_fill_token(p) < 0) {
841             p->error_indicator = 1;
842             return -1;
843         }
844     }
845 
846     Token *t = p->tokens[p->mark];
847 
848     for (Memo *m = t->memo; m != NULL; m = m->next) {
849         if (m->type == type) {
850 #if defined(PY_DEBUG)
851             if (0 <= type && type < NSTATISTICS) {
852                 long count = m->mark - p->mark;
853                 // A memoized negative result counts for one.
854                 if (count <= 0) {
855                     count = 1;
856                 }
857                 memo_statistics[type] += count;
858             }
859 #endif
860             p->mark = m->mark;
861             *(void **)(pres) = m->node;
862             return 1;
863         }
864     }
865     return 0;
866 }
867 
868 int
_PyPegen_lookahead_with_name(int positive,expr_ty (func)(Parser *),Parser * p)869 _PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
870 {
871     int mark = p->mark;
872     void *res = func(p);
873     p->mark = mark;
874     return (res != NULL) == positive;
875 }
876 
877 int
_PyPegen_lookahead_with_string(int positive,expr_ty (func)(Parser *,const char *),Parser * p,const char * arg)878 _PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg)
879 {
880     int mark = p->mark;
881     void *res = func(p, arg);
882     p->mark = mark;
883     return (res != NULL) == positive;
884 }
885 
886 int
_PyPegen_lookahead_with_int(int positive,Token * (func)(Parser *,int),Parser * p,int arg)887 _PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
888 {
889     int mark = p->mark;
890     void *res = func(p, arg);
891     p->mark = mark;
892     return (res != NULL) == positive;
893 }
894 
895 int
_PyPegen_lookahead(int positive,void * (func)(Parser *),Parser * p)896 _PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p)
897 {
898     int mark = p->mark;
899     void *res = (void*)func(p);
900     p->mark = mark;
901     return (res != NULL) == positive;
902 }
903 
904 Token *
_PyPegen_expect_token(Parser * p,int type)905 _PyPegen_expect_token(Parser *p, int type)
906 {
907     if (p->mark == p->fill) {
908         if (_PyPegen_fill_token(p) < 0) {
909             p->error_indicator = 1;
910             return NULL;
911         }
912     }
913     Token *t = p->tokens[p->mark];
914     if (t->type != type) {
915         return NULL;
916     }
917     p->mark += 1;
918     return t;
919 }
920 
921 Token *
_PyPegen_expect_forced_token(Parser * p,int type,const char * expected)922 _PyPegen_expect_forced_token(Parser *p, int type, const char* expected) {
923 
924     if (p->error_indicator == 1) {
925         return NULL;
926     }
927 
928     if (p->mark == p->fill) {
929         if (_PyPegen_fill_token(p) < 0) {
930             p->error_indicator = 1;
931             return NULL;
932         }
933     }
934     Token *t = p->tokens[p->mark];
935     if (t->type != type) {
936         RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "expected '%s'", expected);
937         return NULL;
938     }
939     p->mark += 1;
940     return t;
941 }
942 
943 expr_ty
_PyPegen_expect_soft_keyword(Parser * p,const char * keyword)944 _PyPegen_expect_soft_keyword(Parser *p, const char *keyword)
945 {
946     if (p->mark == p->fill) {
947         if (_PyPegen_fill_token(p) < 0) {
948             p->error_indicator = 1;
949             return NULL;
950         }
951     }
952     Token *t = p->tokens[p->mark];
953     if (t->type != NAME) {
954         return NULL;
955     }
956     const char *s = PyBytes_AsString(t->bytes);
957     if (!s) {
958         p->error_indicator = 1;
959         return NULL;
960     }
961     if (strcmp(s, keyword) != 0) {
962         return NULL;
963     }
964     return _PyPegen_name_token(p);
965 }
966 
967 Token *
_PyPegen_get_last_nonnwhitespace_token(Parser * p)968 _PyPegen_get_last_nonnwhitespace_token(Parser *p)
969 {
970     assert(p->mark >= 0);
971     Token *token = NULL;
972     for (int m = p->mark - 1; m >= 0; m--) {
973         token = p->tokens[m];
974         if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) {
975             break;
976         }
977     }
978     return token;
979 }
980 
981 static expr_ty
_PyPegen_name_from_token(Parser * p,Token * t)982 _PyPegen_name_from_token(Parser *p, Token* t)
983 {
984     if (t == NULL) {
985         return NULL;
986     }
987     const char *s = PyBytes_AsString(t->bytes);
988     if (!s) {
989         p->error_indicator = 1;
990         return NULL;
991     }
992     PyObject *id = _PyPegen_new_identifier(p, s);
993     if (id == NULL) {
994         p->error_indicator = 1;
995         return NULL;
996     }
997     return _PyAST_Name(id, Load, t->lineno, t->col_offset, t->end_lineno,
998                        t->end_col_offset, p->arena);
999 }
1000 
1001 
1002 expr_ty
_PyPegen_name_token(Parser * p)1003 _PyPegen_name_token(Parser *p)
1004 {
1005     Token *t = _PyPegen_expect_token(p, NAME);
1006     return _PyPegen_name_from_token(p, t);
1007 }
1008 
1009 void *
_PyPegen_string_token(Parser * p)1010 _PyPegen_string_token(Parser *p)
1011 {
1012     return _PyPegen_expect_token(p, STRING);
1013 }
1014 
1015 
_PyPegen_soft_keyword_token(Parser * p)1016 expr_ty _PyPegen_soft_keyword_token(Parser *p) {
1017     Token *t = _PyPegen_expect_token(p, NAME);
1018     if (t == NULL) {
1019         return NULL;
1020     }
1021     char *the_token;
1022     Py_ssize_t size;
1023     PyBytes_AsStringAndSize(t->bytes, &the_token, &size);
1024     for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) {
1025         if (strncmp(*keyword, the_token, size) == 0) {
1026             return _PyPegen_name_from_token(p, t);
1027         }
1028     }
1029     return NULL;
1030 }
1031 
1032 static PyObject *
parsenumber_raw(const char * s)1033 parsenumber_raw(const char *s)
1034 {
1035     const char *end;
1036     long x;
1037     double dx;
1038     Py_complex compl;
1039     int imflag;
1040 
1041     assert(s != NULL);
1042     errno = 0;
1043     end = s + strlen(s) - 1;
1044     imflag = *end == 'j' || *end == 'J';
1045     if (s[0] == '0') {
1046         x = (long)PyOS_strtoul(s, (char **)&end, 0);
1047         if (x < 0 && errno == 0) {
1048             return PyLong_FromString(s, (char **)0, 0);
1049         }
1050     }
1051     else {
1052         x = PyOS_strtol(s, (char **)&end, 0);
1053     }
1054     if (*end == '\0') {
1055         if (errno != 0) {
1056             return PyLong_FromString(s, (char **)0, 0);
1057         }
1058         return PyLong_FromLong(x);
1059     }
1060     /* XXX Huge floats may silently fail */
1061     if (imflag) {
1062         compl.real = 0.;
1063         compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
1064         if (compl.imag == -1.0 && PyErr_Occurred()) {
1065             return NULL;
1066         }
1067         return PyComplex_FromCComplex(compl);
1068     }
1069     dx = PyOS_string_to_double(s, NULL, NULL);
1070     if (dx == -1.0 && PyErr_Occurred()) {
1071         return NULL;
1072     }
1073     return PyFloat_FromDouble(dx);
1074 }
1075 
1076 static PyObject *
parsenumber(const char * s)1077 parsenumber(const char *s)
1078 {
1079     char *dup;
1080     char *end;
1081     PyObject *res = NULL;
1082 
1083     assert(s != NULL);
1084 
1085     if (strchr(s, '_') == NULL) {
1086         return parsenumber_raw(s);
1087     }
1088     /* Create a duplicate without underscores. */
1089     dup = PyMem_Malloc(strlen(s) + 1);
1090     if (dup == NULL) {
1091         return PyErr_NoMemory();
1092     }
1093     end = dup;
1094     for (; *s; s++) {
1095         if (*s != '_') {
1096             *end++ = *s;
1097         }
1098     }
1099     *end = '\0';
1100     res = parsenumber_raw(dup);
1101     PyMem_Free(dup);
1102     return res;
1103 }
1104 
1105 expr_ty
_PyPegen_number_token(Parser * p)1106 _PyPegen_number_token(Parser *p)
1107 {
1108     Token *t = _PyPegen_expect_token(p, NUMBER);
1109     if (t == NULL) {
1110         return NULL;
1111     }
1112 
1113     const char *num_raw = PyBytes_AsString(t->bytes);
1114     if (num_raw == NULL) {
1115         p->error_indicator = 1;
1116         return NULL;
1117     }
1118 
1119     if (p->feature_version < 6 && strchr(num_raw, '_') != NULL) {
1120         p->error_indicator = 1;
1121         return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported "
1122                                   "in Python 3.6 and greater");
1123     }
1124 
1125     PyObject *c = parsenumber(num_raw);
1126 
1127     if (c == NULL) {
1128         p->error_indicator = 1;
1129         return NULL;
1130     }
1131 
1132     if (_PyArena_AddPyObject(p->arena, c) < 0) {
1133         Py_DECREF(c);
1134         p->error_indicator = 1;
1135         return NULL;
1136     }
1137 
1138     return _PyAST_Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno,
1139                            t->end_col_offset, p->arena);
1140 }
1141 
1142 /* Check that the source for a single input statement really is a single
1143    statement by looking at what is left in the buffer after parsing.
1144    Trailing whitespace and comments are OK. */
1145 static int // bool
bad_single_statement(Parser * p)1146 bad_single_statement(Parser *p)
1147 {
1148     char *cur = p->tok->cur;
1149     char c = *cur;
1150 
1151     for (;;) {
1152         while (c == ' ' || c == '\t' || c == '\n' || c == '\014') {
1153             c = *++cur;
1154         }
1155 
1156         if (!c) {
1157             return 0;
1158         }
1159 
1160         if (c != '#') {
1161             return 1;
1162         }
1163 
1164         /* Suck up comment. */
1165         while (c && c != '\n') {
1166             c = *++cur;
1167         }
1168     }
1169 }
1170 
1171 void
_PyPegen_Parser_Free(Parser * p)1172 _PyPegen_Parser_Free(Parser *p)
1173 {
1174     Py_XDECREF(p->normalize);
1175     for (int i = 0; i < p->size; i++) {
1176         PyMem_Free(p->tokens[i]);
1177     }
1178     PyMem_Free(p->tokens);
1179     growable_comment_array_deallocate(&p->type_ignore_comments);
1180     PyMem_Free(p);
1181 }
1182 
1183 static int
compute_parser_flags(PyCompilerFlags * flags)1184 compute_parser_flags(PyCompilerFlags *flags)
1185 {
1186     int parser_flags = 0;
1187     if (!flags) {
1188         return 0;
1189     }
1190     if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) {
1191         parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
1192     }
1193     if (flags->cf_flags & PyCF_IGNORE_COOKIE) {
1194         parser_flags |= PyPARSE_IGNORE_COOKIE;
1195     }
1196     if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) {
1197         parser_flags |= PyPARSE_BARRY_AS_BDFL;
1198     }
1199     if (flags->cf_flags & PyCF_TYPE_COMMENTS) {
1200         parser_flags |= PyPARSE_TYPE_COMMENTS;
1201     }
1202     if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) {
1203         parser_flags |= PyPARSE_ASYNC_HACKS;
1204     }
1205     if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) {
1206         parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT;
1207     }
1208     return parser_flags;
1209 }
1210 
1211 Parser *
_PyPegen_Parser_New(struct tok_state * tok,int start_rule,int flags,int feature_version,int * errcode,PyArena * arena)1212 _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
1213                     int feature_version, int *errcode, PyArena *arena)
1214 {
1215     Parser *p = PyMem_Malloc(sizeof(Parser));
1216     if (p == NULL) {
1217         return (Parser *) PyErr_NoMemory();
1218     }
1219     assert(tok != NULL);
1220     tok->type_comments = (flags & PyPARSE_TYPE_COMMENTS) > 0;
1221     tok->async_hacks = (flags & PyPARSE_ASYNC_HACKS) > 0;
1222     p->tok = tok;
1223     p->keywords = NULL;
1224     p->n_keyword_lists = -1;
1225     p->soft_keywords = NULL;
1226     p->tokens = PyMem_Malloc(sizeof(Token *));
1227     if (!p->tokens) {
1228         PyMem_Free(p);
1229         return (Parser *) PyErr_NoMemory();
1230     }
1231     p->tokens[0] = PyMem_Calloc(1, sizeof(Token));
1232     if (!p->tokens) {
1233         PyMem_Free(p->tokens);
1234         PyMem_Free(p);
1235         return (Parser *) PyErr_NoMemory();
1236     }
1237     if (!growable_comment_array_init(&p->type_ignore_comments, 10)) {
1238         PyMem_Free(p->tokens[0]);
1239         PyMem_Free(p->tokens);
1240         PyMem_Free(p);
1241         return (Parser *) PyErr_NoMemory();
1242     }
1243 
1244     p->mark = 0;
1245     p->fill = 0;
1246     p->size = 1;
1247 
1248     p->errcode = errcode;
1249     p->arena = arena;
1250     p->start_rule = start_rule;
1251     p->parsing_started = 0;
1252     p->normalize = NULL;
1253     p->error_indicator = 0;
1254 
1255     p->starting_lineno = 0;
1256     p->starting_col_offset = 0;
1257     p->flags = flags;
1258     p->feature_version = feature_version;
1259     p->known_err_token = NULL;
1260     p->level = 0;
1261     p->call_invalid_rules = 0;
1262     return p;
1263 }
1264 
1265 static void
reset_parser_state(Parser * p)1266 reset_parser_state(Parser *p)
1267 {
1268     for (int i = 0; i < p->fill; i++) {
1269         p->tokens[i]->memo = NULL;
1270     }
1271     p->mark = 0;
1272     p->call_invalid_rules = 1;
1273     // Don't try to get extra tokens in interactive mode when trying to
1274     // raise specialized errors in the second pass.
1275     p->tok->interactive_underflow = IUNDERFLOW_STOP;
1276 }
1277 
1278 static int
_PyPegen_check_tokenizer_errors(Parser * p)1279 _PyPegen_check_tokenizer_errors(Parser *p) {
1280     // Tokenize the whole input to see if there are any tokenization
1281     // errors such as mistmatching parentheses. These will get priority
1282     // over generic syntax errors only if the line number of the error is
1283     // before the one that we had for the generic error.
1284 
1285     // We don't want to tokenize to the end for interactive input
1286     if (p->tok->prompt != NULL) {
1287         return 0;
1288     }
1289 
1290     PyObject *type, *value, *traceback;
1291     PyErr_Fetch(&type, &value, &traceback);
1292 
1293     Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
1294     Py_ssize_t current_err_line = current_token->lineno;
1295 
1296     int ret = 0;
1297 
1298     for (;;) {
1299         const char *start;
1300         const char *end;
1301         switch (PyTokenizer_Get(p->tok, &start, &end)) {
1302             case ERRORTOKEN:
1303                 if (p->tok->level != 0) {
1304                     int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
1305                     if (current_err_line > error_lineno) {
1306                         raise_unclosed_parentheses_error(p);
1307                         ret = -1;
1308                         goto exit;
1309                     }
1310                 }
1311                 break;
1312             case ENDMARKER:
1313                 break;
1314             default:
1315                 continue;
1316         }
1317         break;
1318     }
1319 
1320 
1321 exit:
1322     if (PyErr_Occurred()) {
1323         Py_XDECREF(value);
1324         Py_XDECREF(type);
1325         Py_XDECREF(traceback);
1326     } else {
1327         PyErr_Restore(type, value, traceback);
1328     }
1329     return ret;
1330 }
1331 
1332 
1333 static inline int
_is_end_of_source(Parser * p)1334 _is_end_of_source(Parser *p) {
1335     int err = p->tok->done;
1336     return err == E_EOF || err == E_EOFS || err == E_EOLS;
1337 }
1338 
1339 void *
_PyPegen_run_parser(Parser * p)1340 _PyPegen_run_parser(Parser *p)
1341 {
1342     void *res = _PyPegen_parse(p);
1343     assert(p->level == 0);
1344     if (res == NULL) {
1345         if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) &&  _is_end_of_source(p)) {
1346             PyErr_Clear();
1347             return RAISE_SYNTAX_ERROR("incomplete input");
1348         }
1349         if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) {
1350             return NULL;
1351         }
1352         // Make a second parser pass. In this pass we activate heavier and slower checks
1353         // to produce better error messages and more complete diagnostics. Extra "invalid_*"
1354         // rules will be active during parsing.
1355         Token *last_token = p->tokens[p->fill - 1];
1356         reset_parser_state(p);
1357         _PyPegen_parse(p);
1358         if (PyErr_Occurred()) {
1359             // Prioritize tokenizer errors to custom syntax errors raised
1360             // on the second phase only if the errors come from the parser.
1361             int is_tok_ok = (p->tok->done == E_DONE || p->tok->done == E_OK);
1362             if (is_tok_ok && PyErr_ExceptionMatches(PyExc_SyntaxError)) {
1363                 _PyPegen_check_tokenizer_errors(p);
1364             }
1365             return NULL;
1366         }
1367         if (p->fill == 0) {
1368             RAISE_SYNTAX_ERROR("error at start before reading any input");
1369         }
1370         else if (last_token->type == ERRORTOKEN && p->tok->done == E_EOF) {
1371             if (p->tok->level) {
1372                 raise_unclosed_parentheses_error(p);
1373             } else {
1374                 RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
1375             }
1376         }
1377         else {
1378             if (p->tokens[p->fill-1]->type == INDENT) {
1379                 RAISE_INDENTATION_ERROR("unexpected indent");
1380             }
1381             else if (p->tokens[p->fill-1]->type == DEDENT) {
1382                 RAISE_INDENTATION_ERROR("unexpected unindent");
1383             }
1384             else {
1385                 // Use the last token we found on the first pass to avoid reporting
1386                 // incorrect locations for generic syntax errors just because we reached
1387                 // further away when trying to find specific syntax errors in the second
1388                 // pass.
1389                 RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax");
1390                 // _PyPegen_check_tokenizer_errors will override the existing
1391                 // generic SyntaxError we just raised if errors are found.
1392                 _PyPegen_check_tokenizer_errors(p);
1393             }
1394         }
1395         return NULL;
1396     }
1397 
1398     if (p->start_rule == Py_single_input && bad_single_statement(p)) {
1399         p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future
1400         return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement");
1401     }
1402 
1403     // test_peg_generator defines _Py_TEST_PEGEN to not call PyAST_Validate()
1404 #if defined(Py_DEBUG) && !defined(_Py_TEST_PEGEN)
1405     if (p->start_rule == Py_single_input ||
1406         p->start_rule == Py_file_input ||
1407         p->start_rule == Py_eval_input)
1408     {
1409         if (!_PyAST_Validate(res)) {
1410             return NULL;
1411         }
1412     }
1413 #endif
1414     return res;
1415 }
1416 
1417 mod_ty
_PyPegen_run_parser_from_file_pointer(FILE * fp,int start_rule,PyObject * filename_ob,const char * enc,const char * ps1,const char * ps2,PyCompilerFlags * flags,int * errcode,PyArena * arena)1418 _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob,
1419                              const char *enc, const char *ps1, const char *ps2,
1420                              PyCompilerFlags *flags, int *errcode, PyArena *arena)
1421 {
1422     struct tok_state *tok = PyTokenizer_FromFile(fp, enc, ps1, ps2);
1423     if (tok == NULL) {
1424         if (PyErr_Occurred()) {
1425             raise_tokenizer_init_error(filename_ob);
1426             return NULL;
1427         }
1428         return NULL;
1429     }
1430     if (!tok->fp || ps1 != NULL || ps2 != NULL ||
1431         PyUnicode_CompareWithASCIIString(filename_ob, "<stdin>") == 0) {
1432         tok->fp_interactive = 1;
1433     }
1434     // This transfers the ownership to the tokenizer
1435     tok->filename = filename_ob;
1436     Py_INCREF(filename_ob);
1437 
1438     // From here on we need to clean up even if there's an error
1439     mod_ty result = NULL;
1440 
1441     int parser_flags = compute_parser_flags(flags);
1442     Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION,
1443                                     errcode, arena);
1444     if (p == NULL) {
1445         goto error;
1446     }
1447 
1448     result = _PyPegen_run_parser(p);
1449     _PyPegen_Parser_Free(p);
1450 
1451 error:
1452     PyTokenizer_Free(tok);
1453     return result;
1454 }
1455 
1456 mod_ty
_PyPegen_run_parser_from_string(const char * str,int start_rule,PyObject * filename_ob,PyCompilerFlags * flags,PyArena * arena)1457 _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob,
1458                        PyCompilerFlags *flags, PyArena *arena)
1459 {
1460     int exec_input = start_rule == Py_file_input;
1461 
1462     struct tok_state *tok;
1463     if (flags != NULL && flags->cf_flags & PyCF_IGNORE_COOKIE) {
1464         tok = PyTokenizer_FromUTF8(str, exec_input);
1465     } else {
1466         tok = PyTokenizer_FromString(str, exec_input);
1467     }
1468     if (tok == NULL) {
1469         if (PyErr_Occurred()) {
1470             raise_tokenizer_init_error(filename_ob);
1471         }
1472         return NULL;
1473     }
1474     // This transfers the ownership to the tokenizer
1475     tok->filename = filename_ob;
1476     Py_INCREF(filename_ob);
1477 
1478     // We need to clear up from here on
1479     mod_ty result = NULL;
1480 
1481     int parser_flags = compute_parser_flags(flags);
1482     int feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ?
1483         flags->cf_feature_version : PY_MINOR_VERSION;
1484     Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version,
1485                                     NULL, arena);
1486     if (p == NULL) {
1487         goto error;
1488     }
1489 
1490     result = _PyPegen_run_parser(p);
1491     _PyPegen_Parser_Free(p);
1492 
1493 error:
1494     PyTokenizer_Free(tok);
1495     return result;
1496 }
1497 
1498 asdl_stmt_seq*
_PyPegen_interactive_exit(Parser * p)1499 _PyPegen_interactive_exit(Parser *p)
1500 {
1501     if (p->errcode) {
1502         *(p->errcode) = E_EOF;
1503     }
1504     return NULL;
1505 }
1506 
1507 /* Creates a single-element asdl_seq* that contains a */
1508 asdl_seq *
_PyPegen_singleton_seq(Parser * p,void * a)1509 _PyPegen_singleton_seq(Parser *p, void *a)
1510 {
1511     assert(a != NULL);
1512     asdl_seq *seq = (asdl_seq*)_Py_asdl_generic_seq_new(1, p->arena);
1513     if (!seq) {
1514         return NULL;
1515     }
1516     asdl_seq_SET_UNTYPED(seq, 0, a);
1517     return seq;
1518 }
1519 
1520 /* Creates a copy of seq and prepends a to it */
1521 asdl_seq *
_PyPegen_seq_insert_in_front(Parser * p,void * a,asdl_seq * seq)1522 _PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq)
1523 {
1524     assert(a != NULL);
1525     if (!seq) {
1526         return _PyPegen_singleton_seq(p, a);
1527     }
1528 
1529     asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
1530     if (!new_seq) {
1531         return NULL;
1532     }
1533 
1534     asdl_seq_SET_UNTYPED(new_seq, 0, a);
1535     for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) {
1536         asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i - 1));
1537     }
1538     return new_seq;
1539 }
1540 
1541 /* Creates a copy of seq and appends a to it */
1542 asdl_seq *
_PyPegen_seq_append_to_end(Parser * p,asdl_seq * seq,void * a)1543 _PyPegen_seq_append_to_end(Parser *p, asdl_seq *seq, void *a)
1544 {
1545     assert(a != NULL);
1546     if (!seq) {
1547         return _PyPegen_singleton_seq(p, a);
1548     }
1549 
1550     asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
1551     if (!new_seq) {
1552         return NULL;
1553     }
1554 
1555     for (Py_ssize_t i = 0, l = asdl_seq_LEN(new_seq); i + 1 < l; i++) {
1556         asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i));
1557     }
1558     asdl_seq_SET_UNTYPED(new_seq, asdl_seq_LEN(new_seq) - 1, a);
1559     return new_seq;
1560 }
1561 
1562 static Py_ssize_t
_get_flattened_seq_size(asdl_seq * seqs)1563 _get_flattened_seq_size(asdl_seq *seqs)
1564 {
1565     Py_ssize_t size = 0;
1566     for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
1567         asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
1568         size += asdl_seq_LEN(inner_seq);
1569     }
1570     return size;
1571 }
1572 
1573 /* Flattens an asdl_seq* of asdl_seq*s */
1574 asdl_seq *
_PyPegen_seq_flatten(Parser * p,asdl_seq * seqs)1575 _PyPegen_seq_flatten(Parser *p, asdl_seq *seqs)
1576 {
1577     Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs);
1578     assert(flattened_seq_size > 0);
1579 
1580     asdl_seq *flattened_seq = (asdl_seq*)_Py_asdl_generic_seq_new(flattened_seq_size, p->arena);
1581     if (!flattened_seq) {
1582         return NULL;
1583     }
1584 
1585     int flattened_seq_idx = 0;
1586     for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
1587         asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
1588         for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) {
1589             asdl_seq_SET_UNTYPED(flattened_seq, flattened_seq_idx++, asdl_seq_GET_UNTYPED(inner_seq, j));
1590         }
1591     }
1592     assert(flattened_seq_idx == flattened_seq_size);
1593 
1594     return flattened_seq;
1595 }
1596 
1597 void *
_PyPegen_seq_last_item(asdl_seq * seq)1598 _PyPegen_seq_last_item(asdl_seq *seq)
1599 {
1600     Py_ssize_t len = asdl_seq_LEN(seq);
1601     return asdl_seq_GET_UNTYPED(seq, len - 1);
1602 }
1603 
1604 void *
_PyPegen_seq_first_item(asdl_seq * seq)1605 _PyPegen_seq_first_item(asdl_seq *seq)
1606 {
1607     return asdl_seq_GET_UNTYPED(seq, 0);
1608 }
1609 
1610 
1611 /* Creates a new name of the form <first_name>.<second_name> */
1612 expr_ty
_PyPegen_join_names_with_dot(Parser * p,expr_ty first_name,expr_ty second_name)1613 _PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
1614 {
1615     assert(first_name != NULL && second_name != NULL);
1616     PyObject *first_identifier = first_name->v.Name.id;
1617     PyObject *second_identifier = second_name->v.Name.id;
1618 
1619     if (PyUnicode_READY(first_identifier) == -1) {
1620         return NULL;
1621     }
1622     if (PyUnicode_READY(second_identifier) == -1) {
1623         return NULL;
1624     }
1625     const char *first_str = PyUnicode_AsUTF8(first_identifier);
1626     if (!first_str) {
1627         return NULL;
1628     }
1629     const char *second_str = PyUnicode_AsUTF8(second_identifier);
1630     if (!second_str) {
1631         return NULL;
1632     }
1633     Py_ssize_t len = strlen(first_str) + strlen(second_str) + 1;  // +1 for the dot
1634 
1635     PyObject *str = PyBytes_FromStringAndSize(NULL, len);
1636     if (!str) {
1637         return NULL;
1638     }
1639 
1640     char *s = PyBytes_AS_STRING(str);
1641     if (!s) {
1642         return NULL;
1643     }
1644 
1645     strcpy(s, first_str);
1646     s += strlen(first_str);
1647     *s++ = '.';
1648     strcpy(s, second_str);
1649     s += strlen(second_str);
1650     *s = '\0';
1651 
1652     PyObject *uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), PyBytes_GET_SIZE(str), NULL);
1653     Py_DECREF(str);
1654     if (!uni) {
1655         return NULL;
1656     }
1657     PyUnicode_InternInPlace(&uni);
1658     if (_PyArena_AddPyObject(p->arena, uni) < 0) {
1659         Py_DECREF(uni);
1660         return NULL;
1661     }
1662 
1663     return _PyAST_Name(uni, Load, EXTRA_EXPR(first_name, second_name));
1664 }
1665 
1666 /* Counts the total number of dots in seq's tokens */
1667 int
_PyPegen_seq_count_dots(asdl_seq * seq)1668 _PyPegen_seq_count_dots(asdl_seq *seq)
1669 {
1670     int number_of_dots = 0;
1671     for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
1672         Token *current_expr = asdl_seq_GET_UNTYPED(seq, i);
1673         switch (current_expr->type) {
1674             case ELLIPSIS:
1675                 number_of_dots += 3;
1676                 break;
1677             case DOT:
1678                 number_of_dots += 1;
1679                 break;
1680             default:
1681                 Py_UNREACHABLE();
1682         }
1683     }
1684 
1685     return number_of_dots;
1686 }
1687 
1688 /* Creates an alias with '*' as the identifier name */
1689 alias_ty
_PyPegen_alias_for_star(Parser * p,int lineno,int col_offset,int end_lineno,int end_col_offset,PyArena * arena)1690 _PyPegen_alias_for_star(Parser *p, int lineno, int col_offset, int end_lineno,
1691                         int end_col_offset, PyArena *arena) {
1692     PyObject *str = PyUnicode_InternFromString("*");
1693     if (!str) {
1694         return NULL;
1695     }
1696     if (_PyArena_AddPyObject(p->arena, str) < 0) {
1697         Py_DECREF(str);
1698         return NULL;
1699     }
1700     return _PyAST_alias(str, NULL, lineno, col_offset, end_lineno, end_col_offset, arena);
1701 }
1702 
1703 /* Creates a new asdl_seq* with the identifiers of all the names in seq */
1704 asdl_identifier_seq *
_PyPegen_map_names_to_ids(Parser * p,asdl_expr_seq * seq)1705 _PyPegen_map_names_to_ids(Parser *p, asdl_expr_seq *seq)
1706 {
1707     Py_ssize_t len = asdl_seq_LEN(seq);
1708     assert(len > 0);
1709 
1710     asdl_identifier_seq *new_seq = _Py_asdl_identifier_seq_new(len, p->arena);
1711     if (!new_seq) {
1712         return NULL;
1713     }
1714     for (Py_ssize_t i = 0; i < len; i++) {
1715         expr_ty e = asdl_seq_GET(seq, i);
1716         asdl_seq_SET(new_seq, i, e->v.Name.id);
1717     }
1718     return new_seq;
1719 }
1720 
1721 /* Constructs a CmpopExprPair */
1722 CmpopExprPair *
_PyPegen_cmpop_expr_pair(Parser * p,cmpop_ty cmpop,expr_ty expr)1723 _PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr)
1724 {
1725     assert(expr != NULL);
1726     CmpopExprPair *a = _PyArena_Malloc(p->arena, sizeof(CmpopExprPair));
1727     if (!a) {
1728         return NULL;
1729     }
1730     a->cmpop = cmpop;
1731     a->expr = expr;
1732     return a;
1733 }
1734 
1735 asdl_int_seq *
_PyPegen_get_cmpops(Parser * p,asdl_seq * seq)1736 _PyPegen_get_cmpops(Parser *p, asdl_seq *seq)
1737 {
1738     Py_ssize_t len = asdl_seq_LEN(seq);
1739     assert(len > 0);
1740 
1741     asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena);
1742     if (!new_seq) {
1743         return NULL;
1744     }
1745     for (Py_ssize_t i = 0; i < len; i++) {
1746         CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
1747         asdl_seq_SET(new_seq, i, pair->cmpop);
1748     }
1749     return new_seq;
1750 }
1751 
1752 asdl_expr_seq *
_PyPegen_get_exprs(Parser * p,asdl_seq * seq)1753 _PyPegen_get_exprs(Parser *p, asdl_seq *seq)
1754 {
1755     Py_ssize_t len = asdl_seq_LEN(seq);
1756     assert(len > 0);
1757 
1758     asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
1759     if (!new_seq) {
1760         return NULL;
1761     }
1762     for (Py_ssize_t i = 0; i < len; i++) {
1763         CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
1764         asdl_seq_SET(new_seq, i, pair->expr);
1765     }
1766     return new_seq;
1767 }
1768 
1769 /* Creates an asdl_seq* where all the elements have been changed to have ctx as context */
1770 static asdl_expr_seq *
_set_seq_context(Parser * p,asdl_expr_seq * seq,expr_context_ty ctx)1771 _set_seq_context(Parser *p, asdl_expr_seq *seq, expr_context_ty ctx)
1772 {
1773     Py_ssize_t len = asdl_seq_LEN(seq);
1774     if (len == 0) {
1775         return NULL;
1776     }
1777 
1778     asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
1779     if (!new_seq) {
1780         return NULL;
1781     }
1782     for (Py_ssize_t i = 0; i < len; i++) {
1783         expr_ty e = asdl_seq_GET(seq, i);
1784         asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx));
1785     }
1786     return new_seq;
1787 }
1788 
1789 static expr_ty
_set_name_context(Parser * p,expr_ty e,expr_context_ty ctx)1790 _set_name_context(Parser *p, expr_ty e, expr_context_ty ctx)
1791 {
1792     return _PyAST_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e));
1793 }
1794 
1795 static expr_ty
_set_tuple_context(Parser * p,expr_ty e,expr_context_ty ctx)1796 _set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx)
1797 {
1798     return _PyAST_Tuple(
1799             _set_seq_context(p, e->v.Tuple.elts, ctx),
1800             ctx,
1801             EXTRA_EXPR(e, e));
1802 }
1803 
1804 static expr_ty
_set_list_context(Parser * p,expr_ty e,expr_context_ty ctx)1805 _set_list_context(Parser *p, expr_ty e, expr_context_ty ctx)
1806 {
1807     return _PyAST_List(
1808             _set_seq_context(p, e->v.List.elts, ctx),
1809             ctx,
1810             EXTRA_EXPR(e, e));
1811 }
1812 
1813 static expr_ty
_set_subscript_context(Parser * p,expr_ty e,expr_context_ty ctx)1814 _set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx)
1815 {
1816     return _PyAST_Subscript(e->v.Subscript.value, e->v.Subscript.slice,
1817                             ctx, EXTRA_EXPR(e, e));
1818 }
1819 
1820 static expr_ty
_set_attribute_context(Parser * p,expr_ty e,expr_context_ty ctx)1821 _set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx)
1822 {
1823     return _PyAST_Attribute(e->v.Attribute.value, e->v.Attribute.attr,
1824                             ctx, EXTRA_EXPR(e, e));
1825 }
1826 
1827 static expr_ty
_set_starred_context(Parser * p,expr_ty e,expr_context_ty ctx)1828 _set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx)
1829 {
1830     return _PyAST_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx),
1831                           ctx, EXTRA_EXPR(e, e));
1832 }
1833 
1834 /* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */
1835 expr_ty
_PyPegen_set_expr_context(Parser * p,expr_ty expr,expr_context_ty ctx)1836 _PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx)
1837 {
1838     assert(expr != NULL);
1839 
1840     expr_ty new = NULL;
1841     switch (expr->kind) {
1842         case Name_kind:
1843             new = _set_name_context(p, expr, ctx);
1844             break;
1845         case Tuple_kind:
1846             new = _set_tuple_context(p, expr, ctx);
1847             break;
1848         case List_kind:
1849             new = _set_list_context(p, expr, ctx);
1850             break;
1851         case Subscript_kind:
1852             new = _set_subscript_context(p, expr, ctx);
1853             break;
1854         case Attribute_kind:
1855             new = _set_attribute_context(p, expr, ctx);
1856             break;
1857         case Starred_kind:
1858             new = _set_starred_context(p, expr, ctx);
1859             break;
1860         default:
1861             new = expr;
1862     }
1863     return new;
1864 }
1865 
1866 /* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */
1867 KeyValuePair *
_PyPegen_key_value_pair(Parser * p,expr_ty key,expr_ty value)1868 _PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value)
1869 {
1870     KeyValuePair *a = _PyArena_Malloc(p->arena, sizeof(KeyValuePair));
1871     if (!a) {
1872         return NULL;
1873     }
1874     a->key = key;
1875     a->value = value;
1876     return a;
1877 }
1878 
1879 /* Extracts all keys from an asdl_seq* of KeyValuePair*'s */
1880 asdl_expr_seq *
_PyPegen_get_keys(Parser * p,asdl_seq * seq)1881 _PyPegen_get_keys(Parser *p, asdl_seq *seq)
1882 {
1883     Py_ssize_t len = asdl_seq_LEN(seq);
1884     asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
1885     if (!new_seq) {
1886         return NULL;
1887     }
1888     for (Py_ssize_t i = 0; i < len; i++) {
1889         KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
1890         asdl_seq_SET(new_seq, i, pair->key);
1891     }
1892     return new_seq;
1893 }
1894 
1895 /* Extracts all values from an asdl_seq* of KeyValuePair*'s */
1896 asdl_expr_seq *
_PyPegen_get_values(Parser * p,asdl_seq * seq)1897 _PyPegen_get_values(Parser *p, asdl_seq *seq)
1898 {
1899     Py_ssize_t len = asdl_seq_LEN(seq);
1900     asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
1901     if (!new_seq) {
1902         return NULL;
1903     }
1904     for (Py_ssize_t i = 0; i < len; i++) {
1905         KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
1906         asdl_seq_SET(new_seq, i, pair->value);
1907     }
1908     return new_seq;
1909 }
1910 
1911 /* Constructs a KeyPatternPair that is used when parsing mapping & class patterns */
1912 KeyPatternPair *
_PyPegen_key_pattern_pair(Parser * p,expr_ty key,pattern_ty pattern)1913 _PyPegen_key_pattern_pair(Parser *p, expr_ty key, pattern_ty pattern)
1914 {
1915     KeyPatternPair *a = _PyArena_Malloc(p->arena, sizeof(KeyPatternPair));
1916     if (!a) {
1917         return NULL;
1918     }
1919     a->key = key;
1920     a->pattern = pattern;
1921     return a;
1922 }
1923 
1924 /* Extracts all keys from an asdl_seq* of KeyPatternPair*'s */
1925 asdl_expr_seq *
_PyPegen_get_pattern_keys(Parser * p,asdl_seq * seq)1926 _PyPegen_get_pattern_keys(Parser *p, asdl_seq *seq)
1927 {
1928     Py_ssize_t len = asdl_seq_LEN(seq);
1929     asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
1930     if (!new_seq) {
1931         return NULL;
1932     }
1933     for (Py_ssize_t i = 0; i < len; i++) {
1934         KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
1935         asdl_seq_SET(new_seq, i, pair->key);
1936     }
1937     return new_seq;
1938 }
1939 
1940 /* Extracts all patterns from an asdl_seq* of KeyPatternPair*'s */
1941 asdl_pattern_seq *
_PyPegen_get_patterns(Parser * p,asdl_seq * seq)1942 _PyPegen_get_patterns(Parser *p, asdl_seq *seq)
1943 {
1944     Py_ssize_t len = asdl_seq_LEN(seq);
1945     asdl_pattern_seq *new_seq = _Py_asdl_pattern_seq_new(len, p->arena);
1946     if (!new_seq) {
1947         return NULL;
1948     }
1949     for (Py_ssize_t i = 0; i < len; i++) {
1950         KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
1951         asdl_seq_SET(new_seq, i, pair->pattern);
1952     }
1953     return new_seq;
1954 }
1955 
1956 /* Constructs a NameDefaultPair */
1957 NameDefaultPair *
_PyPegen_name_default_pair(Parser * p,arg_ty arg,expr_ty value,Token * tc)1958 _PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value, Token *tc)
1959 {
1960     NameDefaultPair *a = _PyArena_Malloc(p->arena, sizeof(NameDefaultPair));
1961     if (!a) {
1962         return NULL;
1963     }
1964     a->arg = _PyPegen_add_type_comment_to_arg(p, arg, tc);
1965     a->value = value;
1966     return a;
1967 }
1968 
1969 /* Constructs a SlashWithDefault */
1970 SlashWithDefault *
_PyPegen_slash_with_default(Parser * p,asdl_arg_seq * plain_names,asdl_seq * names_with_defaults)1971 _PyPegen_slash_with_default(Parser *p, asdl_arg_seq *plain_names, asdl_seq *names_with_defaults)
1972 {
1973     SlashWithDefault *a = _PyArena_Malloc(p->arena, sizeof(SlashWithDefault));
1974     if (!a) {
1975         return NULL;
1976     }
1977     a->plain_names = plain_names;
1978     a->names_with_defaults = names_with_defaults;
1979     return a;
1980 }
1981 
1982 /* Constructs a StarEtc */
1983 StarEtc *
_PyPegen_star_etc(Parser * p,arg_ty vararg,asdl_seq * kwonlyargs,arg_ty kwarg)1984 _PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg)
1985 {
1986     StarEtc *a = _PyArena_Malloc(p->arena, sizeof(StarEtc));
1987     if (!a) {
1988         return NULL;
1989     }
1990     a->vararg = vararg;
1991     a->kwonlyargs = kwonlyargs;
1992     a->kwarg = kwarg;
1993     return a;
1994 }
1995 
1996 asdl_seq *
_PyPegen_join_sequences(Parser * p,asdl_seq * a,asdl_seq * b)1997 _PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b)
1998 {
1999     Py_ssize_t first_len = asdl_seq_LEN(a);
2000     Py_ssize_t second_len = asdl_seq_LEN(b);
2001     asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(first_len + second_len, p->arena);
2002     if (!new_seq) {
2003         return NULL;
2004     }
2005 
2006     int k = 0;
2007     for (Py_ssize_t i = 0; i < first_len; i++) {
2008         asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(a, i));
2009     }
2010     for (Py_ssize_t i = 0; i < second_len; i++) {
2011         asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(b, i));
2012     }
2013 
2014     return new_seq;
2015 }
2016 
2017 static asdl_arg_seq*
_get_names(Parser * p,asdl_seq * names_with_defaults)2018 _get_names(Parser *p, asdl_seq *names_with_defaults)
2019 {
2020     Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
2021     asdl_arg_seq *seq = _Py_asdl_arg_seq_new(len, p->arena);
2022     if (!seq) {
2023         return NULL;
2024     }
2025     for (Py_ssize_t i = 0; i < len; i++) {
2026         NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
2027         asdl_seq_SET(seq, i, pair->arg);
2028     }
2029     return seq;
2030 }
2031 
2032 static asdl_expr_seq *
_get_defaults(Parser * p,asdl_seq * names_with_defaults)2033 _get_defaults(Parser *p, asdl_seq *names_with_defaults)
2034 {
2035     Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
2036     asdl_expr_seq *seq = _Py_asdl_expr_seq_new(len, p->arena);
2037     if (!seq) {
2038         return NULL;
2039     }
2040     for (Py_ssize_t i = 0; i < len; i++) {
2041         NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
2042         asdl_seq_SET(seq, i, pair->value);
2043     }
2044     return seq;
2045 }
2046 
2047 static int
_make_posonlyargs(Parser * p,asdl_arg_seq * slash_without_default,SlashWithDefault * slash_with_default,asdl_arg_seq ** posonlyargs)2048 _make_posonlyargs(Parser *p,
2049                   asdl_arg_seq *slash_without_default,
2050                   SlashWithDefault *slash_with_default,
2051                   asdl_arg_seq **posonlyargs) {
2052     if (slash_without_default != NULL) {
2053         *posonlyargs = slash_without_default;
2054     }
2055     else if (slash_with_default != NULL) {
2056         asdl_arg_seq *slash_with_default_names =
2057                 _get_names(p, slash_with_default->names_with_defaults);
2058         if (!slash_with_default_names) {
2059             return -1;
2060         }
2061         *posonlyargs = (asdl_arg_seq*)_PyPegen_join_sequences(
2062                 p,
2063                 (asdl_seq*)slash_with_default->plain_names,
2064                 (asdl_seq*)slash_with_default_names);
2065     }
2066     else {
2067         *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
2068     }
2069     return *posonlyargs == NULL ? -1 : 0;
2070 }
2071 
2072 static int
_make_posargs(Parser * p,asdl_arg_seq * plain_names,asdl_seq * names_with_default,asdl_arg_seq ** posargs)2073 _make_posargs(Parser *p,
2074               asdl_arg_seq *plain_names,
2075               asdl_seq *names_with_default,
2076               asdl_arg_seq **posargs) {
2077     if (plain_names != NULL && names_with_default != NULL) {
2078         asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default);
2079         if (!names_with_default_names) {
2080             return -1;
2081         }
2082         *posargs = (asdl_arg_seq*)_PyPegen_join_sequences(
2083                 p,(asdl_seq*)plain_names, (asdl_seq*)names_with_default_names);
2084     }
2085     else if (plain_names == NULL && names_with_default != NULL) {
2086         *posargs = _get_names(p, names_with_default);
2087     }
2088     else if (plain_names != NULL && names_with_default == NULL) {
2089         *posargs = plain_names;
2090     }
2091     else {
2092         *posargs = _Py_asdl_arg_seq_new(0, p->arena);
2093     }
2094     return *posargs == NULL ? -1 : 0;
2095 }
2096 
2097 static int
_make_posdefaults(Parser * p,SlashWithDefault * slash_with_default,asdl_seq * names_with_default,asdl_expr_seq ** posdefaults)2098 _make_posdefaults(Parser *p,
2099                   SlashWithDefault *slash_with_default,
2100                   asdl_seq *names_with_default,
2101                   asdl_expr_seq **posdefaults) {
2102     if (slash_with_default != NULL && names_with_default != NULL) {
2103         asdl_expr_seq *slash_with_default_values =
2104                 _get_defaults(p, slash_with_default->names_with_defaults);
2105         if (!slash_with_default_values) {
2106             return -1;
2107         }
2108         asdl_expr_seq *names_with_default_values = _get_defaults(p, names_with_default);
2109         if (!names_with_default_values) {
2110             return -1;
2111         }
2112         *posdefaults = (asdl_expr_seq*)_PyPegen_join_sequences(
2113                 p,
2114                 (asdl_seq*)slash_with_default_values,
2115                 (asdl_seq*)names_with_default_values);
2116     }
2117     else if (slash_with_default == NULL && names_with_default != NULL) {
2118         *posdefaults = _get_defaults(p, names_with_default);
2119     }
2120     else if (slash_with_default != NULL && names_with_default == NULL) {
2121         *posdefaults = _get_defaults(p, slash_with_default->names_with_defaults);
2122     }
2123     else {
2124         *posdefaults = _Py_asdl_expr_seq_new(0, p->arena);
2125     }
2126     return *posdefaults == NULL ? -1 : 0;
2127 }
2128 
2129 static int
_make_kwargs(Parser * p,StarEtc * star_etc,asdl_arg_seq ** kwonlyargs,asdl_expr_seq ** kwdefaults)2130 _make_kwargs(Parser *p, StarEtc *star_etc,
2131              asdl_arg_seq **kwonlyargs,
2132              asdl_expr_seq **kwdefaults) {
2133     if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
2134         *kwonlyargs = _get_names(p, star_etc->kwonlyargs);
2135     }
2136     else {
2137         *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
2138     }
2139 
2140     if (*kwonlyargs == NULL) {
2141         return -1;
2142     }
2143 
2144     if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
2145         *kwdefaults = _get_defaults(p, star_etc->kwonlyargs);
2146     }
2147     else {
2148         *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena);
2149     }
2150 
2151     if (*kwdefaults == NULL) {
2152         return -1;
2153     }
2154 
2155     return 0;
2156 }
2157 
2158 /* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */
2159 arguments_ty
_PyPegen_make_arguments(Parser * p,asdl_arg_seq * slash_without_default,SlashWithDefault * slash_with_default,asdl_arg_seq * plain_names,asdl_seq * names_with_default,StarEtc * star_etc)2160 _PyPegen_make_arguments(Parser *p, asdl_arg_seq *slash_without_default,
2161                         SlashWithDefault *slash_with_default, asdl_arg_seq *plain_names,
2162                         asdl_seq *names_with_default, StarEtc *star_etc)
2163 {
2164     asdl_arg_seq *posonlyargs;
2165     if (_make_posonlyargs(p, slash_without_default, slash_with_default, &posonlyargs) == -1) {
2166         return NULL;
2167     }
2168 
2169     asdl_arg_seq *posargs;
2170     if (_make_posargs(p, plain_names, names_with_default, &posargs) == -1) {
2171         return NULL;
2172     }
2173 
2174     asdl_expr_seq *posdefaults;
2175     if (_make_posdefaults(p,slash_with_default, names_with_default, &posdefaults) == -1) {
2176         return NULL;
2177     }
2178 
2179     arg_ty vararg = NULL;
2180     if (star_etc != NULL && star_etc->vararg != NULL) {
2181         vararg = star_etc->vararg;
2182     }
2183 
2184     asdl_arg_seq *kwonlyargs;
2185     asdl_expr_seq *kwdefaults;
2186     if (_make_kwargs(p, star_etc, &kwonlyargs, &kwdefaults) == -1) {
2187         return NULL;
2188     }
2189 
2190     arg_ty kwarg = NULL;
2191     if (star_etc != NULL && star_etc->kwarg != NULL) {
2192         kwarg = star_etc->kwarg;
2193     }
2194 
2195     return _PyAST_arguments(posonlyargs, posargs, vararg, kwonlyargs,
2196                             kwdefaults, kwarg, posdefaults, p->arena);
2197 }
2198 
2199 
2200 /* Constructs an empty arguments_ty object, that gets used when a function accepts no
2201  * arguments. */
2202 arguments_ty
_PyPegen_empty_arguments(Parser * p)2203 _PyPegen_empty_arguments(Parser *p)
2204 {
2205     asdl_arg_seq *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
2206     if (!posonlyargs) {
2207         return NULL;
2208     }
2209     asdl_arg_seq *posargs = _Py_asdl_arg_seq_new(0, p->arena);
2210     if (!posargs) {
2211         return NULL;
2212     }
2213     asdl_expr_seq *posdefaults = _Py_asdl_expr_seq_new(0, p->arena);
2214     if (!posdefaults) {
2215         return NULL;
2216     }
2217     asdl_arg_seq *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
2218     if (!kwonlyargs) {
2219         return NULL;
2220     }
2221     asdl_expr_seq *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena);
2222     if (!kwdefaults) {
2223         return NULL;
2224     }
2225 
2226     return _PyAST_arguments(posonlyargs, posargs, NULL, kwonlyargs,
2227                             kwdefaults, NULL, posdefaults, p->arena);
2228 }
2229 
2230 /* Encapsulates the value of an operator_ty into an AugOperator struct */
2231 AugOperator *
_PyPegen_augoperator(Parser * p,operator_ty kind)2232 _PyPegen_augoperator(Parser *p, operator_ty kind)
2233 {
2234     AugOperator *a = _PyArena_Malloc(p->arena, sizeof(AugOperator));
2235     if (!a) {
2236         return NULL;
2237     }
2238     a->kind = kind;
2239     return a;
2240 }
2241 
2242 /* Construct a FunctionDef equivalent to function_def, but with decorators */
2243 stmt_ty
_PyPegen_function_def_decorators(Parser * p,asdl_expr_seq * decorators,stmt_ty function_def)2244 _PyPegen_function_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty function_def)
2245 {
2246     assert(function_def != NULL);
2247     if (function_def->kind == AsyncFunctionDef_kind) {
2248         return _PyAST_AsyncFunctionDef(
2249             function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
2250             function_def->v.FunctionDef.body, decorators, function_def->v.FunctionDef.returns,
2251             function_def->v.FunctionDef.type_comment, function_def->lineno,
2252             function_def->col_offset, function_def->end_lineno, function_def->end_col_offset,
2253             p->arena);
2254     }
2255 
2256     return _PyAST_FunctionDef(
2257         function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
2258         function_def->v.FunctionDef.body, decorators,
2259         function_def->v.FunctionDef.returns,
2260         function_def->v.FunctionDef.type_comment, function_def->lineno,
2261         function_def->col_offset, function_def->end_lineno,
2262         function_def->end_col_offset, p->arena);
2263 }
2264 
2265 /* Construct a ClassDef equivalent to class_def, but with decorators */
2266 stmt_ty
_PyPegen_class_def_decorators(Parser * p,asdl_expr_seq * decorators,stmt_ty class_def)2267 _PyPegen_class_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty class_def)
2268 {
2269     assert(class_def != NULL);
2270     return _PyAST_ClassDef(
2271         class_def->v.ClassDef.name, class_def->v.ClassDef.bases,
2272         class_def->v.ClassDef.keywords, class_def->v.ClassDef.body, decorators,
2273         class_def->lineno, class_def->col_offset, class_def->end_lineno,
2274         class_def->end_col_offset, p->arena);
2275 }
2276 
2277 /* Construct a KeywordOrStarred */
2278 KeywordOrStarred *
_PyPegen_keyword_or_starred(Parser * p,void * element,int is_keyword)2279 _PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword)
2280 {
2281     KeywordOrStarred *a = _PyArena_Malloc(p->arena, sizeof(KeywordOrStarred));
2282     if (!a) {
2283         return NULL;
2284     }
2285     a->element = element;
2286     a->is_keyword = is_keyword;
2287     return a;
2288 }
2289 
2290 /* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */
2291 static int
_seq_number_of_starred_exprs(asdl_seq * seq)2292 _seq_number_of_starred_exprs(asdl_seq *seq)
2293 {
2294     int n = 0;
2295     for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
2296         KeywordOrStarred *k = asdl_seq_GET_UNTYPED(seq, i);
2297         if (!k->is_keyword) {
2298             n++;
2299         }
2300     }
2301     return n;
2302 }
2303 
2304 /* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */
2305 asdl_expr_seq *
_PyPegen_seq_extract_starred_exprs(Parser * p,asdl_seq * kwargs)2306 _PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs)
2307 {
2308     int new_len = _seq_number_of_starred_exprs(kwargs);
2309     if (new_len == 0) {
2310         return NULL;
2311     }
2312     asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(new_len, p->arena);
2313     if (!new_seq) {
2314         return NULL;
2315     }
2316 
2317     int idx = 0;
2318     for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) {
2319         KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
2320         if (!k->is_keyword) {
2321             asdl_seq_SET(new_seq, idx++, k->element);
2322         }
2323     }
2324     return new_seq;
2325 }
2326 
2327 /* Return a new asdl_seq* with only the keywords in kwargs */
2328 asdl_keyword_seq*
_PyPegen_seq_delete_starred_exprs(Parser * p,asdl_seq * kwargs)2329 _PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
2330 {
2331     Py_ssize_t len = asdl_seq_LEN(kwargs);
2332     Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs);
2333     if (new_len == 0) {
2334         return NULL;
2335     }
2336     asdl_keyword_seq *new_seq = _Py_asdl_keyword_seq_new(new_len, p->arena);
2337     if (!new_seq) {
2338         return NULL;
2339     }
2340 
2341     int idx = 0;
2342     for (Py_ssize_t i = 0; i < len; i++) {
2343         KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
2344         if (k->is_keyword) {
2345             asdl_seq_SET(new_seq, idx++, k->element);
2346         }
2347     }
2348     return new_seq;
2349 }
2350 
2351 expr_ty
_PyPegen_concatenate_strings(Parser * p,asdl_seq * strings)2352 _PyPegen_concatenate_strings(Parser *p, asdl_seq *strings)
2353 {
2354     Py_ssize_t len = asdl_seq_LEN(strings);
2355     assert(len > 0);
2356 
2357     Token *first = asdl_seq_GET_UNTYPED(strings, 0);
2358     Token *last = asdl_seq_GET_UNTYPED(strings, len - 1);
2359 
2360     int bytesmode = 0;
2361     PyObject *bytes_str = NULL;
2362 
2363     FstringParser state;
2364     _PyPegen_FstringParser_Init(&state);
2365 
2366     for (Py_ssize_t i = 0; i < len; i++) {
2367         Token *t = asdl_seq_GET_UNTYPED(strings, i);
2368 
2369         int this_bytesmode;
2370         int this_rawmode;
2371         PyObject *s;
2372         const char *fstr;
2373         Py_ssize_t fstrlen = -1;
2374 
2375         if (_PyPegen_parsestr(p, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen, t) != 0) {
2376             goto error;
2377         }
2378 
2379         /* Check that we are not mixing bytes with unicode. */
2380         if (i != 0 && bytesmode != this_bytesmode) {
2381             RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
2382             Py_XDECREF(s);
2383             goto error;
2384         }
2385         bytesmode = this_bytesmode;
2386 
2387         if (fstr != NULL) {
2388             assert(s == NULL && !bytesmode);
2389 
2390             int result = _PyPegen_FstringParser_ConcatFstring(p, &state, &fstr, fstr + fstrlen,
2391                                                      this_rawmode, 0, first, t, last);
2392             if (result < 0) {
2393                 goto error;
2394             }
2395         }
2396         else {
2397             /* String or byte string. */
2398             assert(s != NULL && fstr == NULL);
2399             assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
2400 
2401             if (bytesmode) {
2402                 if (i == 0) {
2403                     bytes_str = s;
2404                 }
2405                 else {
2406                     PyBytes_ConcatAndDel(&bytes_str, s);
2407                     if (!bytes_str) {
2408                         goto error;
2409                     }
2410                 }
2411             }
2412             else {
2413                 /* This is a regular string. Concatenate it. */
2414                 if (_PyPegen_FstringParser_ConcatAndDel(&state, s) < 0) {
2415                     goto error;
2416                 }
2417             }
2418         }
2419     }
2420 
2421     if (bytesmode) {
2422         if (_PyArena_AddPyObject(p->arena, bytes_str) < 0) {
2423             goto error;
2424         }
2425         return _PyAST_Constant(bytes_str, NULL, first->lineno,
2426                                first->col_offset, last->end_lineno,
2427                                last->end_col_offset, p->arena);
2428     }
2429 
2430     return _PyPegen_FstringParser_Finish(p, &state, first, last);
2431 
2432 error:
2433     Py_XDECREF(bytes_str);
2434     _PyPegen_FstringParser_Dealloc(&state);
2435     if (PyErr_Occurred()) {
2436         raise_decode_error(p);
2437     }
2438     return NULL;
2439 }
2440 
2441 expr_ty
_PyPegen_ensure_imaginary(Parser * p,expr_ty exp)2442 _PyPegen_ensure_imaginary(Parser *p, expr_ty exp)
2443 {
2444     if (exp->kind != Constant_kind || !PyComplex_CheckExact(exp->v.Constant.value)) {
2445         RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "imaginary number required in complex literal");
2446         return NULL;
2447     }
2448     return exp;
2449 }
2450 
2451 expr_ty
_PyPegen_ensure_real(Parser * p,expr_ty exp)2452 _PyPegen_ensure_real(Parser *p, expr_ty exp)
2453 {
2454     if (exp->kind != Constant_kind || PyComplex_CheckExact(exp->v.Constant.value)) {
2455         RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "real number required in complex literal");
2456         return NULL;
2457     }
2458     return exp;
2459 }
2460 
2461 mod_ty
_PyPegen_make_module(Parser * p,asdl_stmt_seq * a)2462 _PyPegen_make_module(Parser *p, asdl_stmt_seq *a) {
2463     asdl_type_ignore_seq *type_ignores = NULL;
2464     Py_ssize_t num = p->type_ignore_comments.num_items;
2465     if (num > 0) {
2466         // Turn the raw (comment, lineno) pairs into TypeIgnore objects in the arena
2467         type_ignores = _Py_asdl_type_ignore_seq_new(num, p->arena);
2468         if (type_ignores == NULL) {
2469             return NULL;
2470         }
2471         for (int i = 0; i < num; i++) {
2472             PyObject *tag = _PyPegen_new_type_comment(p, p->type_ignore_comments.items[i].comment);
2473             if (tag == NULL) {
2474                 return NULL;
2475             }
2476             type_ignore_ty ti = _PyAST_TypeIgnore(p->type_ignore_comments.items[i].lineno,
2477                                                   tag, p->arena);
2478             if (ti == NULL) {
2479                 return NULL;
2480             }
2481             asdl_seq_SET(type_ignores, i, ti);
2482         }
2483     }
2484     return _PyAST_Module(a, type_ignores, p->arena);
2485 }
2486 
2487 // Error reporting helpers
2488 
2489 expr_ty
_PyPegen_get_invalid_target(expr_ty e,TARGETS_TYPE targets_type)2490 _PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type)
2491 {
2492     if (e == NULL) {
2493         return NULL;
2494     }
2495 
2496 #define VISIT_CONTAINER(CONTAINER, TYPE) do { \
2497         Py_ssize_t len = asdl_seq_LEN((CONTAINER)->v.TYPE.elts);\
2498         for (Py_ssize_t i = 0; i < len; i++) {\
2499             expr_ty other = asdl_seq_GET((CONTAINER)->v.TYPE.elts, i);\
2500             expr_ty child = _PyPegen_get_invalid_target(other, targets_type);\
2501             if (child != NULL) {\
2502                 return child;\
2503             }\
2504         }\
2505     } while (0)
2506 
2507     // We only need to visit List and Tuple nodes recursively as those
2508     // are the only ones that can contain valid names in targets when
2509     // they are parsed as expressions. Any other kind of expression
2510     // that is a container (like Sets or Dicts) is directly invalid and
2511     // we don't need to visit it recursively.
2512 
2513     switch (e->kind) {
2514         case List_kind:
2515             VISIT_CONTAINER(e, List);
2516             return NULL;
2517         case Tuple_kind:
2518             VISIT_CONTAINER(e, Tuple);
2519             return NULL;
2520         case Starred_kind:
2521             if (targets_type == DEL_TARGETS) {
2522                 return e;
2523             }
2524             return _PyPegen_get_invalid_target(e->v.Starred.value, targets_type);
2525         case Compare_kind:
2526             // This is needed, because the `a in b` in `for a in b` gets parsed
2527             // as a comparison, and so we need to search the left side of the comparison
2528             // for invalid targets.
2529             if (targets_type == FOR_TARGETS) {
2530                 cmpop_ty cmpop = (cmpop_ty) asdl_seq_GET(e->v.Compare.ops, 0);
2531                 if (cmpop == In) {
2532                     return _PyPegen_get_invalid_target(e->v.Compare.left, targets_type);
2533                 }
2534                 return NULL;
2535             }
2536             return e;
2537         case Name_kind:
2538         case Subscript_kind:
2539         case Attribute_kind:
2540             return NULL;
2541         default:
2542             return e;
2543     }
2544 }
2545 
_PyPegen_arguments_parsing_error(Parser * p,expr_ty e)2546 void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) {
2547     int kwarg_unpacking = 0;
2548     for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) {
2549         keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i);
2550         if (!keyword->arg) {
2551             kwarg_unpacking = 1;
2552         }
2553     }
2554 
2555     const char *msg = NULL;
2556     if (kwarg_unpacking) {
2557         msg = "positional argument follows keyword argument unpacking";
2558     } else {
2559         msg = "positional argument follows keyword argument";
2560     }
2561 
2562     return RAISE_SYNTAX_ERROR(msg);
2563 }
2564 
2565 
2566 static inline expr_ty
_PyPegen_get_last_comprehension_item(comprehension_ty comprehension)2567 _PyPegen_get_last_comprehension_item(comprehension_ty comprehension) {
2568     if (comprehension->ifs == NULL || asdl_seq_LEN(comprehension->ifs) == 0) {
2569         return comprehension->iter;
2570     }
2571     return PyPegen_last_item(comprehension->ifs, expr_ty);
2572 }
2573 
2574 void *
_PyPegen_nonparen_genexp_in_call(Parser * p,expr_ty args,asdl_comprehension_seq * comprehensions)2575 _PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions)
2576 {
2577     /* The rule that calls this function is 'args for_if_clauses'.
2578        For the input f(L, x for x in y), L and x are in args and
2579        the for is parsed as a for_if_clause. We have to check if
2580        len <= 1, so that input like dict((a, b) for a, b in x)
2581        gets successfully parsed and then we pass the last
2582        argument (x in the above example) as the location of the
2583        error */
2584     Py_ssize_t len = asdl_seq_LEN(args->v.Call.args);
2585     if (len <= 1) {
2586         return NULL;
2587     }
2588 
2589     comprehension_ty last_comprehension = PyPegen_last_item(comprehensions, comprehension_ty);
2590 
2591     return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
2592         (expr_ty) asdl_seq_GET(args->v.Call.args, len - 1),
2593         _PyPegen_get_last_comprehension_item(last_comprehension),
2594         "Generator expression must be parenthesized"
2595     );
2596 }
2597 
2598 
_PyPegen_collect_call_seqs(Parser * p,asdl_expr_seq * a,asdl_seq * b,int lineno,int col_offset,int end_lineno,int end_col_offset,PyArena * arena)2599 expr_ty _PyPegen_collect_call_seqs(Parser *p, asdl_expr_seq *a, asdl_seq *b,
2600                      int lineno, int col_offset, int end_lineno,
2601                      int end_col_offset, PyArena *arena) {
2602     Py_ssize_t args_len = asdl_seq_LEN(a);
2603     Py_ssize_t total_len = args_len;
2604 
2605     if (b == NULL) {
2606         return _PyAST_Call(_PyPegen_dummy_name(p), a, NULL, lineno, col_offset,
2607                         end_lineno, end_col_offset, arena);
2608 
2609     }
2610 
2611     asdl_expr_seq *starreds = _PyPegen_seq_extract_starred_exprs(p, b);
2612     asdl_keyword_seq *keywords = _PyPegen_seq_delete_starred_exprs(p, b);
2613 
2614     if (starreds) {
2615         total_len += asdl_seq_LEN(starreds);
2616     }
2617 
2618     asdl_expr_seq *args = _Py_asdl_expr_seq_new(total_len, arena);
2619 
2620     Py_ssize_t i = 0;
2621     for (i = 0; i < args_len; i++) {
2622         asdl_seq_SET(args, i, asdl_seq_GET(a, i));
2623     }
2624     for (; i < total_len; i++) {
2625         asdl_seq_SET(args, i, asdl_seq_GET(starreds, i - args_len));
2626     }
2627 
2628     return _PyAST_Call(_PyPegen_dummy_name(p), args, keywords, lineno,
2629                        col_offset, end_lineno, end_col_offset, arena);
2630 }
2631