• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifndef PEGEN_H
2 #define PEGEN_H
3 
4 #include <Python.h>
5 #include <pycore_ast.h>
6 #include <pycore_token.h>
7 
8 #if 0
9 #define PyPARSE_YIELD_IS_KEYWORD        0x0001
10 #endif
11 
12 #define PyPARSE_DONT_IMPLY_DEDENT       0x0002
13 
14 #if 0
15 #define PyPARSE_WITH_IS_KEYWORD         0x0003
16 #define PyPARSE_PRINT_IS_FUNCTION       0x0004
17 #define PyPARSE_UNICODE_LITERALS        0x0008
18 #endif
19 
20 #define PyPARSE_IGNORE_COOKIE 0x0010
21 #define PyPARSE_BARRY_AS_BDFL 0x0020
22 #define PyPARSE_TYPE_COMMENTS 0x0040
23 #define PyPARSE_ALLOW_INCOMPLETE_INPUT 0x0100
24 
25 #define CURRENT_POS (-5)
26 
27 typedef struct _memo {
28     int type;
29     void *node;
30     int mark;
31     struct _memo *next;
32 } Memo;
33 
34 typedef struct {
35     int type;
36     PyObject *bytes;
37     int level;
38     int lineno, col_offset, end_lineno, end_col_offset;
39     Memo *memo;
40     PyObject *metadata;
41 } Token;
42 
43 typedef struct {
44     const char *str;
45     int type;
46 } KeywordToken;
47 
48 
49 typedef struct {
50     struct {
51         int lineno;
52         char *comment;  // The " <tag>" in "# type: ignore <tag>"
53     } *items;
54     size_t size;
55     size_t num_items;
56 } growable_comment_array;
57 
58 typedef struct {
59     struct tok_state *tok;
60     Token **tokens;
61     int mark;
62     int fill, size;
63     PyArena *arena;
64     KeywordToken **keywords;
65     char **soft_keywords;
66     int n_keyword_lists;
67     int start_rule;
68     int *errcode;
69     int parsing_started;
70     PyObject* normalize;
71     int starting_lineno;
72     int starting_col_offset;
73     int error_indicator;
74     int flags;
75     int feature_version;
76     growable_comment_array type_ignore_comments;
77     Token *known_err_token;
78     int level;
79     int call_invalid_rules;
80     int debug;
81 } Parser;
82 
83 typedef struct {
84     cmpop_ty cmpop;
85     expr_ty expr;
86 } CmpopExprPair;
87 
88 typedef struct {
89     expr_ty key;
90     expr_ty value;
91 } KeyValuePair;
92 
93 typedef struct {
94     expr_ty key;
95     pattern_ty pattern;
96 } KeyPatternPair;
97 
98 typedef struct {
99     arg_ty arg;
100     expr_ty value;
101 } NameDefaultPair;
102 
103 typedef struct {
104     asdl_arg_seq *plain_names;
105     asdl_seq *names_with_defaults; // asdl_seq* of NameDefaultsPair's
106 } SlashWithDefault;
107 
108 typedef struct {
109     arg_ty vararg;
110     asdl_seq *kwonlyargs; // asdl_seq* of NameDefaultsPair's
111     arg_ty kwarg;
112 } StarEtc;
113 
114 typedef struct { operator_ty kind; } AugOperator;
115 typedef struct {
116     void *element;
117     int is_keyword;
118 } KeywordOrStarred;
119 
120 typedef struct {
121     void *result;
122     PyObject *metadata;
123 } ResultTokenWithMetadata;
124 
125 // Internal parser functions
126 #if defined(Py_DEBUG)
127 void _PyPegen_clear_memo_statistics(void);
128 PyObject *_PyPegen_get_memo_statistics(void);
129 #endif
130 
131 int _PyPegen_insert_memo(Parser *p, int mark, int type, void *node);
132 int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
133 int _PyPegen_is_memoized(Parser *p, int type, void *pres);
134 
135 int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
136 int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
137 int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*);
138 int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
139 
140 Token *_PyPegen_expect_token(Parser *p, int type);
141 void* _PyPegen_expect_forced_result(Parser *p, void* result, const char* expected);
142 Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
143 expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
144 expr_ty _PyPegen_soft_keyword_token(Parser *p);
145 expr_ty _PyPegen_fstring_middle_token(Parser* p);
146 Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
147 int _PyPegen_fill_token(Parser *p);
148 expr_ty _PyPegen_name_token(Parser *p);
149 expr_ty _PyPegen_number_token(Parser *p);
150 void *_PyPegen_string_token(Parser *p);
151 Py_ssize_t _PyPegen_byte_offset_to_character_offset_line(PyObject *line, Py_ssize_t col_offset, Py_ssize_t end_col_offset);
152 Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset);
153 Py_ssize_t _PyPegen_byte_offset_to_character_offset_raw(const char*, Py_ssize_t col_offset);
154 
155 // Error handling functions and APIs
156 typedef enum {
157     STAR_TARGETS,
158     DEL_TARGETS,
159     FOR_TARGETS
160 } TARGETS_TYPE;
161 
162 int _Pypegen_raise_decode_error(Parser *p);
163 void _PyPegen_raise_tokenizer_init_error(PyObject *filename);
164 int _Pypegen_tokenizer_error(Parser *p);
165 void *_PyPegen_raise_error(Parser *p, PyObject *errtype, int use_mark, const char *errmsg, ...);
166 void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
167                                           Py_ssize_t lineno, Py_ssize_t col_offset,
168                                           Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
169                                           const char *errmsg, va_list va);
170 void _Pypegen_set_syntax_error(Parser* p, Token* last_token);
171 void _Pypegen_stack_overflow(Parser *p);
172 
173 Py_LOCAL_INLINE(void *)
RAISE_ERROR_KNOWN_LOCATION(Parser * p,PyObject * errtype,Py_ssize_t lineno,Py_ssize_t col_offset,Py_ssize_t end_lineno,Py_ssize_t end_col_offset,const char * errmsg,...)174 RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype,
175                            Py_ssize_t lineno, Py_ssize_t col_offset,
176                            Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
177                            const char *errmsg, ...)
178 {
179     va_list va;
180     va_start(va, errmsg);
181     Py_ssize_t _col_offset = (col_offset == CURRENT_POS ? CURRENT_POS : col_offset + 1);
182     Py_ssize_t _end_col_offset = (end_col_offset == CURRENT_POS ? CURRENT_POS : end_col_offset + 1);
183     _PyPegen_raise_error_known_location(p, errtype, lineno, _col_offset, end_lineno, _end_col_offset, errmsg, va);
184     va_end(va);
185     return NULL;
186 }
187 #define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, 0, msg, ##__VA_ARGS__)
188 #define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, 0, msg, ##__VA_ARGS__)
189 #define RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, 1, msg, ##__VA_ARGS__)
190 #define RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, msg, ...) \
191     RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (b)->end_lineno, (b)->end_col_offset, msg, ##__VA_ARGS__)
192 #define RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, msg, ...) \
193     RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (a)->end_lineno, (a)->end_col_offset, msg, ##__VA_ARGS__)
194 #define RAISE_SYNTAX_ERROR_STARTING_FROM(a, msg, ...) \
195     RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, CURRENT_POS, CURRENT_POS, msg, ##__VA_ARGS__)
196 #define RAISE_SYNTAX_ERROR_INVALID_TARGET(type, e) _RAISE_SYNTAX_ERROR_INVALID_TARGET(p, type, e)
197 
198 Py_LOCAL_INLINE(void *)
CHECK_CALL(Parser * p,void * result)199 CHECK_CALL(Parser *p, void *result)
200 {
201     if (result == NULL) {
202         assert(PyErr_Occurred());
203         p->error_indicator = 1;
204     }
205     return result;
206 }
207 
208 /* This is needed for helper functions that are allowed to
209    return NULL without an error. Example: _PyPegen_seq_extract_starred_exprs */
210 Py_LOCAL_INLINE(void *)
CHECK_CALL_NULL_ALLOWED(Parser * p,void * result)211 CHECK_CALL_NULL_ALLOWED(Parser *p, void *result)
212 {
213     if (result == NULL && PyErr_Occurred()) {
214         p->error_indicator = 1;
215     }
216     return result;
217 }
218 
219 #define CHECK(type, result) ((type) CHECK_CALL(p, result))
220 #define CHECK_NULL_ALLOWED(type, result) ((type) CHECK_CALL_NULL_ALLOWED(p, result))
221 
222 expr_ty _PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type);
223 const char *_PyPegen_get_expr_name(expr_ty);
224 Py_LOCAL_INLINE(void *)
_RAISE_SYNTAX_ERROR_INVALID_TARGET(Parser * p,TARGETS_TYPE type,void * e)225 _RAISE_SYNTAX_ERROR_INVALID_TARGET(Parser *p, TARGETS_TYPE type, void *e)
226 {
227     expr_ty invalid_target = CHECK_NULL_ALLOWED(expr_ty, _PyPegen_get_invalid_target(e, type));
228     if (invalid_target != NULL) {
229         const char *msg;
230         if (type == STAR_TARGETS || type == FOR_TARGETS) {
231             msg = "cannot assign to %s";
232         }
233         else {
234             msg = "cannot delete %s";
235         }
236         return RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
237             invalid_target,
238             msg,
239             _PyPegen_get_expr_name(invalid_target)
240         );
241         return RAISE_SYNTAX_ERROR_KNOWN_LOCATION(invalid_target, "invalid syntax");
242     }
243     return NULL;
244 }
245 
246 // Action utility functions
247 
248 void *_PyPegen_dummy_name(Parser *p, ...);
249 void * _PyPegen_seq_last_item(asdl_seq *seq);
250 #define PyPegen_last_item(seq, type) ((type)_PyPegen_seq_last_item((asdl_seq*)seq))
251 void * _PyPegen_seq_first_item(asdl_seq *seq);
252 #define PyPegen_first_item(seq, type) ((type)_PyPegen_seq_first_item((asdl_seq*)seq))
253 #define UNUSED(expr) do { (void)(expr); } while (0)
254 #define EXTRA_EXPR(head, tail) head->lineno, (head)->col_offset, (tail)->end_lineno, (tail)->end_col_offset, p->arena
255 #define EXTRA _start_lineno, _start_col_offset, _end_lineno, _end_col_offset, p->arena
256 PyObject *_PyPegen_new_type_comment(Parser *, const char *);
257 
258 Py_LOCAL_INLINE(PyObject *)
NEW_TYPE_COMMENT(Parser * p,Token * tc)259 NEW_TYPE_COMMENT(Parser *p, Token *tc)
260 {
261     if (tc == NULL) {
262         return NULL;
263     }
264     const char *bytes = PyBytes_AsString(tc->bytes);
265     if (bytes == NULL) {
266         goto error;
267     }
268     PyObject *tco = _PyPegen_new_type_comment(p, bytes);
269     if (tco == NULL) {
270         goto error;
271     }
272     return tco;
273  error:
274     p->error_indicator = 1;  // Inline CHECK_CALL
275     return NULL;
276 }
277 
278 Py_LOCAL_INLINE(void *)
INVALID_VERSION_CHECK(Parser * p,int version,char * msg,void * node)279 INVALID_VERSION_CHECK(Parser *p, int version, char *msg, void *node)
280 {
281     if (node == NULL) {
282         p->error_indicator = 1;  // Inline CHECK_CALL
283         return NULL;
284     }
285     if (p->feature_version < version) {
286         p->error_indicator = 1;
287         return RAISE_SYNTAX_ERROR("%s only supported in Python 3.%i and greater",
288                                   msg, version);
289     }
290     return node;
291 }
292 
293 #define CHECK_VERSION(type, version, msg, node) ((type) INVALID_VERSION_CHECK(p, version, msg, node))
294 
295 arg_ty _PyPegen_add_type_comment_to_arg(Parser *, arg_ty, Token *);
296 PyObject *_PyPegen_new_identifier(Parser *, const char *);
297 asdl_seq *_PyPegen_singleton_seq(Parser *, void *);
298 asdl_seq *_PyPegen_seq_insert_in_front(Parser *, void *, asdl_seq *);
299 asdl_seq *_PyPegen_seq_append_to_end(Parser *, asdl_seq *, void *);
300 asdl_seq *_PyPegen_seq_flatten(Parser *, asdl_seq *);
301 expr_ty _PyPegen_join_names_with_dot(Parser *, expr_ty, expr_ty);
302 int _PyPegen_seq_count_dots(asdl_seq *);
303 alias_ty _PyPegen_alias_for_star(Parser *, int, int, int, int, PyArena *);
304 asdl_identifier_seq *_PyPegen_map_names_to_ids(Parser *, asdl_expr_seq *);
305 CmpopExprPair *_PyPegen_cmpop_expr_pair(Parser *, cmpop_ty, expr_ty);
306 asdl_int_seq *_PyPegen_get_cmpops(Parser *p, asdl_seq *);
307 asdl_expr_seq *_PyPegen_get_exprs(Parser *, asdl_seq *);
308 expr_ty _PyPegen_set_expr_context(Parser *, expr_ty, expr_context_ty);
309 KeyValuePair *_PyPegen_key_value_pair(Parser *, expr_ty, expr_ty);
310 asdl_expr_seq *_PyPegen_get_keys(Parser *, asdl_seq *);
311 asdl_expr_seq *_PyPegen_get_values(Parser *, asdl_seq *);
312 KeyPatternPair *_PyPegen_key_pattern_pair(Parser *, expr_ty, pattern_ty);
313 asdl_expr_seq *_PyPegen_get_pattern_keys(Parser *, asdl_seq *);
314 asdl_pattern_seq *_PyPegen_get_patterns(Parser *, asdl_seq *);
315 NameDefaultPair *_PyPegen_name_default_pair(Parser *, arg_ty, expr_ty, Token *);
316 SlashWithDefault *_PyPegen_slash_with_default(Parser *, asdl_arg_seq *, asdl_seq *);
317 StarEtc *_PyPegen_star_etc(Parser *, arg_ty, asdl_seq *, arg_ty);
318 arguments_ty _PyPegen_make_arguments(Parser *, asdl_arg_seq *, SlashWithDefault *,
319                                      asdl_arg_seq *, asdl_seq *, StarEtc *);
320 arguments_ty _PyPegen_empty_arguments(Parser *);
321 expr_ty _PyPegen_formatted_value(Parser *, expr_ty, Token *, ResultTokenWithMetadata *, ResultTokenWithMetadata *, Token *,
322                                  int, int, int, int, PyArena *);
323 AugOperator *_PyPegen_augoperator(Parser*, operator_ty type);
324 stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
325 stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
326 KeywordOrStarred *_PyPegen_keyword_or_starred(Parser *, void *, int);
327 asdl_expr_seq *_PyPegen_seq_extract_starred_exprs(Parser *, asdl_seq *);
328 asdl_keyword_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *);
329 expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *,
330                      int lineno, int col_offset, int end_lineno,
331                      int end_col_offset, PyArena *arena);
332 expr_ty _PyPegen_constant_from_token(Parser* p, Token* tok);
333 expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok);
334 expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok);
335 expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *, int, int, int, int, PyArena *);
336 expr_ty _PyPegen_FetchRawForm(Parser *p, int, int, int, int);
337 expr_ty _PyPegen_ensure_imaginary(Parser *p, expr_ty);
338 expr_ty _PyPegen_ensure_real(Parser *p, expr_ty);
339 asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
340 int _PyPegen_check_barry_as_flufl(Parser *, Token *);
341 int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
342 ResultTokenWithMetadata *_PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t);
343 ResultTokenWithMetadata *_PyPegen_setup_full_format_spec(Parser *, Token *, asdl_expr_seq *, int, int,
344                                                          int, int, PyArena *);
345 mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
346 void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
347 expr_ty _PyPegen_get_last_comprehension_item(comprehension_ty comprehension);
348 void *_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions);
349 
350 // Parser API
351 
352 Parser *_PyPegen_Parser_New(struct tok_state *, int, int, int, int *, PyArena *);
353 void _PyPegen_Parser_Free(Parser *);
354 mod_ty _PyPegen_run_parser_from_file_pointer(FILE *, int, PyObject *, const char *,
355                                     const char *, const char *, PyCompilerFlags *, int *, PyObject **,
356                                     PyArena *);
357 void *_PyPegen_run_parser(Parser *);
358 mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, PyCompilerFlags *, PyArena *);
359 asdl_stmt_seq *_PyPegen_interactive_exit(Parser *);
360 
361 // TODO: move to the correct place in this file
362 expr_ty _PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* expr, Token*b);
363 
364 // Generated function in parse.c - function definition in python.gram
365 void *_PyPegen_parse(Parser *);
366 
367 #endif
368