1
2 /* Parser-tokenizer link implementation */
3
4 #include "Python.h"
5 #include "tokenizer.h"
6 #include "node.h"
7 #include "grammar.h"
8 #include "parser.h"
9 #include "parsetok.h"
10 #include "errcode.h"
11 #include "graminit.h"
12
13
14 /* Forward */
15 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
16 static int initerr(perrdetail *err_ret, PyObject * filename);
17
18 typedef struct {
19 struct {
20 int lineno;
21 char *comment;
22 } *items;
23 size_t size;
24 size_t num_items;
25 } growable_comment_array;
26
27 static int
growable_comment_array_init(growable_comment_array * arr,size_t initial_size)28 growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
29 assert(initial_size > 0);
30 arr->items = malloc(initial_size * sizeof(*arr->items));
31 arr->size = initial_size;
32 arr->num_items = 0;
33
34 return arr->items != NULL;
35 }
36
37 static int
growable_comment_array_add(growable_comment_array * arr,int lineno,char * comment)38 growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
39 if (arr->num_items >= arr->size) {
40 arr->size *= 2;
41 arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
42 if (!arr->items) {
43 return 0;
44 }
45 }
46
47 arr->items[arr->num_items].lineno = lineno;
48 arr->items[arr->num_items].comment = comment;
49 arr->num_items++;
50 return 1;
51 }
52
53 static void
growable_comment_array_deallocate(growable_comment_array * arr)54 growable_comment_array_deallocate(growable_comment_array *arr) {
55 for (unsigned i = 0; i < arr->num_items; i++) {
56 PyObject_FREE(arr->items[i].comment);
57 }
58 free(arr->items);
59 }
60
61 /* Parse input coming from a string. Return error code, print some errors. */
62 node *
PyParser_ParseString(const char * s,grammar * g,int start,perrdetail * err_ret)63 PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
64 {
65 return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
66 }
67
68 node *
PyParser_ParseStringFlags(const char * s,grammar * g,int start,perrdetail * err_ret,int flags)69 PyParser_ParseStringFlags(const char *s, grammar *g, int start,
70 perrdetail *err_ret, int flags)
71 {
72 return PyParser_ParseStringFlagsFilename(s, NULL,
73 g, start, err_ret, flags);
74 }
75
76 node *
PyParser_ParseStringFlagsFilename(const char * s,const char * filename,grammar * g,int start,perrdetail * err_ret,int flags)77 PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
78 grammar *g, int start,
79 perrdetail *err_ret, int flags)
80 {
81 int iflags = flags;
82 return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
83 err_ret, &iflags);
84 }
85
86 node *
PyParser_ParseStringObject(const char * s,PyObject * filename,grammar * g,int start,perrdetail * err_ret,int * flags)87 PyParser_ParseStringObject(const char *s, PyObject *filename,
88 grammar *g, int start,
89 perrdetail *err_ret, int *flags)
90 {
91 struct tok_state *tok;
92 int exec_input = start == file_input;
93
94 if (initerr(err_ret, filename) < 0)
95 return NULL;
96
97 if (PySys_Audit("compile", "yO", s, err_ret->filename) < 0) {
98 err_ret->error = E_ERROR;
99 return NULL;
100 }
101
102 if (*flags & PyPARSE_IGNORE_COOKIE)
103 tok = PyTokenizer_FromUTF8(s, exec_input);
104 else
105 tok = PyTokenizer_FromString(s, exec_input);
106 if (tok == NULL) {
107 err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
108 return NULL;
109 }
110 if (*flags & PyPARSE_TYPE_COMMENTS) {
111 tok->type_comments = 1;
112 }
113
114 Py_INCREF(err_ret->filename);
115 tok->filename = err_ret->filename;
116 if (*flags & PyPARSE_ASYNC_HACKS)
117 tok->async_hacks = 1;
118 return parsetok(tok, g, start, err_ret, flags);
119 }
120
121 node *
PyParser_ParseStringFlagsFilenameEx(const char * s,const char * filename_str,grammar * g,int start,perrdetail * err_ret,int * flags)122 PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
123 grammar *g, int start,
124 perrdetail *err_ret, int *flags)
125 {
126 node *n;
127 PyObject *filename = NULL;
128 if (filename_str != NULL) {
129 filename = PyUnicode_DecodeFSDefault(filename_str);
130 if (filename == NULL) {
131 err_ret->error = E_ERROR;
132 return NULL;
133 }
134 }
135 n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags);
136 Py_XDECREF(filename);
137 return n;
138 }
139
140 /* Parse input coming from a file. Return error code, print some errors. */
141
142 node *
PyParser_ParseFile(FILE * fp,const char * filename,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret)143 PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
144 const char *ps1, const char *ps2,
145 perrdetail *err_ret)
146 {
147 return PyParser_ParseFileFlags(fp, filename, NULL,
148 g, start, ps1, ps2, err_ret, 0);
149 }
150
151 node *
PyParser_ParseFileFlags(FILE * fp,const char * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int flags)152 PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
153 grammar *g, int start,
154 const char *ps1, const char *ps2,
155 perrdetail *err_ret, int flags)
156 {
157 int iflags = flags;
158 return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
159 ps2, err_ret, &iflags);
160 }
161
162 node *
PyParser_ParseFileObject(FILE * fp,PyObject * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int * flags)163 PyParser_ParseFileObject(FILE *fp, PyObject *filename,
164 const char *enc, grammar *g, int start,
165 const char *ps1, const char *ps2,
166 perrdetail *err_ret, int *flags)
167 {
168 struct tok_state *tok;
169
170 if (initerr(err_ret, filename) < 0)
171 return NULL;
172
173 if (PySys_Audit("compile", "OO", Py_None, err_ret->filename) < 0) {
174 return NULL;
175 }
176
177 if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) {
178 err_ret->error = E_NOMEM;
179 return NULL;
180 }
181 if (*flags & PyPARSE_TYPE_COMMENTS) {
182 tok->type_comments = 1;
183 }
184 Py_INCREF(err_ret->filename);
185 tok->filename = err_ret->filename;
186 return parsetok(tok, g, start, err_ret, flags);
187 }
188
189 node *
PyParser_ParseFileFlagsEx(FILE * fp,const char * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int * flags)190 PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
191 const char *enc, grammar *g, int start,
192 const char *ps1, const char *ps2,
193 perrdetail *err_ret, int *flags)
194 {
195 node *n;
196 PyObject *fileobj = NULL;
197 if (filename != NULL) {
198 fileobj = PyUnicode_DecodeFSDefault(filename);
199 if (fileobj == NULL) {
200 err_ret->error = E_ERROR;
201 return NULL;
202 }
203 }
204 n = PyParser_ParseFileObject(fp, fileobj, enc, g,
205 start, ps1, ps2, err_ret, flags);
206 Py_XDECREF(fileobj);
207 return n;
208 }
209
210 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
211 #if 0
212 static const char with_msg[] =
213 "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
214
215 static const char as_msg[] =
216 "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
217
218 static void
219 warn(const char *msg, const char *filename, int lineno)
220 {
221 if (filename == NULL)
222 filename = "<string>";
223 PySys_WriteStderr(msg, filename, lineno);
224 }
225 #endif
226 #endif
227
228 /* Parse input coming from the given tokenizer structure.
229 Return error code. */
230
231 static node *
parsetok(struct tok_state * tok,grammar * g,int start,perrdetail * err_ret,int * flags)232 parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
233 int *flags)
234 {
235 parser_state *ps;
236 node *n;
237 int started = 0;
238 int col_offset, end_col_offset;
239 growable_comment_array type_ignores;
240
241 if (!growable_comment_array_init(&type_ignores, 10)) {
242 err_ret->error = E_NOMEM;
243 PyTokenizer_Free(tok);
244 return NULL;
245 }
246
247 if ((ps = PyParser_New(g, start)) == NULL) {
248 err_ret->error = E_NOMEM;
249 PyTokenizer_Free(tok);
250 return NULL;
251 }
252 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
253 if (*flags & PyPARSE_BARRY_AS_BDFL)
254 ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
255 if (*flags & PyPARSE_TYPE_COMMENTS)
256 ps->p_flags |= PyCF_TYPE_COMMENTS;
257 #endif
258
259 for (;;) {
260 char *a, *b;
261 int type;
262 size_t len;
263 char *str;
264 col_offset = -1;
265 int lineno;
266 const char *line_start;
267
268 type = PyTokenizer_Get(tok, &a, &b);
269 if (type == ERRORTOKEN) {
270 err_ret->error = tok->done;
271 break;
272 }
273 if (type == ENDMARKER && started) {
274 type = NEWLINE; /* Add an extra newline */
275 started = 0;
276 /* Add the right number of dedent tokens,
277 except if a certain flag is given --
278 codeop.py uses this. */
279 if (tok->indent &&
280 !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
281 {
282 tok->pendin = -tok->indent;
283 tok->indent = 0;
284 }
285 }
286 else
287 started = 1;
288 len = (a != NULL && b != NULL) ? b - a : 0;
289 str = (char *) PyObject_MALLOC(len + 1);
290 if (str == NULL) {
291 err_ret->error = E_NOMEM;
292 break;
293 }
294 if (len > 0)
295 strncpy(str, a, len);
296 str[len] = '\0';
297
298 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
299 if (type == NOTEQUAL) {
300 if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
301 strcmp(str, "!=")) {
302 PyObject_FREE(str);
303 err_ret->error = E_SYNTAX;
304 break;
305 }
306 else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
307 strcmp(str, "<>")) {
308 PyObject_FREE(str);
309 err_ret->expected = NOTEQUAL;
310 err_ret->error = E_SYNTAX;
311 break;
312 }
313 }
314 #endif
315
316 /* Nodes of type STRING, especially multi line strings
317 must be handled differently in order to get both
318 the starting line number and the column offset right.
319 (cf. issue 16806) */
320 lineno = type == STRING ? tok->first_lineno : tok->lineno;
321 line_start = type == STRING ? tok->multi_line_start : tok->line_start;
322 if (a != NULL && a >= line_start) {
323 col_offset = Py_SAFE_DOWNCAST(a - line_start,
324 intptr_t, int);
325 }
326 else {
327 col_offset = -1;
328 }
329
330 if (b != NULL && b >= tok->line_start) {
331 end_col_offset = Py_SAFE_DOWNCAST(b - tok->line_start,
332 intptr_t, int);
333 }
334 else {
335 end_col_offset = -1;
336 }
337
338 if (type == TYPE_IGNORE) {
339 if (!growable_comment_array_add(&type_ignores, tok->lineno, str)) {
340 err_ret->error = E_NOMEM;
341 break;
342 }
343 continue;
344 }
345
346 if ((err_ret->error =
347 PyParser_AddToken(ps, (int)type, str,
348 lineno, col_offset, tok->lineno, end_col_offset,
349 &(err_ret->expected))) != E_OK) {
350 if (err_ret->error != E_DONE) {
351 PyObject_FREE(str);
352 err_ret->token = type;
353 }
354 break;
355 }
356 }
357
358 if (err_ret->error == E_DONE) {
359 n = ps->p_tree;
360 ps->p_tree = NULL;
361
362 if (n->n_type == file_input) {
363 /* Put type_ignore nodes in the ENDMARKER of file_input. */
364 int num;
365 node *ch;
366 size_t i;
367
368 num = NCH(n);
369 ch = CHILD(n, num - 1);
370 REQ(ch, ENDMARKER);
371
372 for (i = 0; i < type_ignores.num_items; i++) {
373 int res = PyNode_AddChild(ch, TYPE_IGNORE, type_ignores.items[i].comment,
374 type_ignores.items[i].lineno, 0,
375 type_ignores.items[i].lineno, 0);
376 if (res != 0) {
377 err_ret->error = res;
378 PyNode_Free(n);
379 n = NULL;
380 break;
381 }
382 type_ignores.items[i].comment = NULL;
383 }
384 }
385
386 /* Check that the source for a single input statement really
387 is a single statement by looking at what is left in the
388 buffer after parsing. Trailing whitespace and comments
389 are OK. */
390 if (err_ret->error == E_DONE && start == single_input) {
391 char *cur = tok->cur;
392 char c = *tok->cur;
393
394 for (;;) {
395 while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
396 c = *++cur;
397
398 if (!c)
399 break;
400
401 if (c != '#') {
402 err_ret->error = E_BADSINGLE;
403 PyNode_Free(n);
404 n = NULL;
405 break;
406 }
407
408 /* Suck up comment. */
409 while (c && c != '\n')
410 c = *++cur;
411 }
412 }
413 }
414 else
415 n = NULL;
416
417 growable_comment_array_deallocate(&type_ignores);
418
419 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
420 *flags = ps->p_flags;
421 #endif
422 PyParser_Delete(ps);
423
424 if (n == NULL) {
425 if (tok->done == E_EOF)
426 err_ret->error = E_EOF;
427 err_ret->lineno = tok->lineno;
428 if (tok->buf != NULL) {
429 size_t len;
430 assert(tok->cur - tok->buf < INT_MAX);
431 /* if we've managed to parse a token, point the offset to its start,
432 * else use the current reading position of the tokenizer
433 */
434 err_ret->offset = col_offset != -1 ? col_offset + 1 : ((int)(tok->cur - tok->buf));
435 len = tok->inp - tok->buf;
436 err_ret->text = (char *) PyObject_MALLOC(len + 1);
437 if (err_ret->text != NULL) {
438 if (len > 0)
439 strncpy(err_ret->text, tok->buf, len);
440 err_ret->text[len] = '\0';
441 }
442 }
443 } else if (tok->encoding != NULL) {
444 /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
445 * allocated using PyMem_
446 */
447 node* r = PyNode_New(encoding_decl);
448 if (r)
449 r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
450 if (!r || !r->n_str) {
451 err_ret->error = E_NOMEM;
452 if (r)
453 PyObject_FREE(r);
454 n = NULL;
455 goto done;
456 }
457 strcpy(r->n_str, tok->encoding);
458 PyMem_FREE(tok->encoding);
459 tok->encoding = NULL;
460 r->n_nchildren = 1;
461 r->n_child = n;
462 n = r;
463 }
464
465 done:
466 PyTokenizer_Free(tok);
467
468 if (n != NULL) {
469 _PyNode_FinalizeEndPos(n);
470 }
471 return n;
472 }
473
474 static int
initerr(perrdetail * err_ret,PyObject * filename)475 initerr(perrdetail *err_ret, PyObject *filename)
476 {
477 err_ret->error = E_OK;
478 err_ret->lineno = 0;
479 err_ret->offset = 0;
480 err_ret->text = NULL;
481 err_ret->token = -1;
482 err_ret->expected = -1;
483 if (filename) {
484 Py_INCREF(filename);
485 err_ret->filename = filename;
486 }
487 else {
488 err_ret->filename = PyUnicode_FromString("<string>");
489 if (err_ret->filename == NULL) {
490 err_ret->error = E_ERROR;
491 return -1;
492 }
493 }
494 return 0;
495 }
496