• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* Parser-tokenizer link implementation */
3 
4 #include "Python.h"
5 #include "tokenizer.h"
6 #include "node.h"
7 #include "grammar.h"
8 #include "parser.h"
9 #include "parsetok.h"
10 #include "errcode.h"
11 #include "graminit.h"
12 
13 
14 /* Forward */
15 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
16 static int initerr(perrdetail *err_ret, PyObject * filename);
17 
18 typedef struct {
19     struct {
20         int lineno;
21         char *comment;
22     } *items;
23     size_t size;
24     size_t num_items;
25 } growable_comment_array;
26 
27 static int
growable_comment_array_init(growable_comment_array * arr,size_t initial_size)28 growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
29     assert(initial_size > 0);
30     arr->items = malloc(initial_size * sizeof(*arr->items));
31     arr->size = initial_size;
32     arr->num_items = 0;
33 
34     return arr->items != NULL;
35 }
36 
37 static int
growable_comment_array_add(growable_comment_array * arr,int lineno,char * comment)38 growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
39     if (arr->num_items >= arr->size) {
40         arr->size *= 2;
41         arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
42         if (!arr->items) {
43             return 0;
44         }
45     }
46 
47     arr->items[arr->num_items].lineno = lineno;
48     arr->items[arr->num_items].comment = comment;
49     arr->num_items++;
50     return 1;
51 }
52 
53 static void
growable_comment_array_deallocate(growable_comment_array * arr)54 growable_comment_array_deallocate(growable_comment_array *arr) {
55     for (unsigned i = 0; i < arr->num_items; i++) {
56         PyObject_FREE(arr->items[i].comment);
57     }
58     free(arr->items);
59 }
60 
61 /* Parse input coming from a string.  Return error code, print some errors. */
62 node *
PyParser_ParseString(const char * s,grammar * g,int start,perrdetail * err_ret)63 PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
64 {
65     return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
66 }
67 
68 node *
PyParser_ParseStringFlags(const char * s,grammar * g,int start,perrdetail * err_ret,int flags)69 PyParser_ParseStringFlags(const char *s, grammar *g, int start,
70                           perrdetail *err_ret, int flags)
71 {
72     return PyParser_ParseStringFlagsFilename(s, NULL,
73                                              g, start, err_ret, flags);
74 }
75 
76 node *
PyParser_ParseStringFlagsFilename(const char * s,const char * filename,grammar * g,int start,perrdetail * err_ret,int flags)77 PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
78                           grammar *g, int start,
79                           perrdetail *err_ret, int flags)
80 {
81     int iflags = flags;
82     return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
83                                                err_ret, &iflags);
84 }
85 
86 node *
PyParser_ParseStringObject(const char * s,PyObject * filename,grammar * g,int start,perrdetail * err_ret,int * flags)87 PyParser_ParseStringObject(const char *s, PyObject *filename,
88                            grammar *g, int start,
89                            perrdetail *err_ret, int *flags)
90 {
91     struct tok_state *tok;
92     int exec_input = start == file_input;
93 
94     if (initerr(err_ret, filename) < 0)
95         return NULL;
96 
97     if (PySys_Audit("compile", "yO", s, err_ret->filename) < 0) {
98         err_ret->error = E_ERROR;
99         return NULL;
100     }
101 
102     if (*flags & PyPARSE_IGNORE_COOKIE)
103         tok = PyTokenizer_FromUTF8(s, exec_input);
104     else
105         tok = PyTokenizer_FromString(s, exec_input);
106     if (tok == NULL) {
107         err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
108         return NULL;
109     }
110     if (*flags & PyPARSE_TYPE_COMMENTS) {
111         tok->type_comments = 1;
112     }
113 
114     Py_INCREF(err_ret->filename);
115     tok->filename = err_ret->filename;
116     if (*flags & PyPARSE_ASYNC_HACKS)
117         tok->async_hacks = 1;
118     return parsetok(tok, g, start, err_ret, flags);
119 }
120 
121 node *
PyParser_ParseStringFlagsFilenameEx(const char * s,const char * filename_str,grammar * g,int start,perrdetail * err_ret,int * flags)122 PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
123                           grammar *g, int start,
124                           perrdetail *err_ret, int *flags)
125 {
126     node *n;
127     PyObject *filename = NULL;
128     if (filename_str != NULL) {
129         filename = PyUnicode_DecodeFSDefault(filename_str);
130         if (filename == NULL) {
131             err_ret->error = E_ERROR;
132             return NULL;
133         }
134     }
135     n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags);
136     Py_XDECREF(filename);
137     return n;
138 }
139 
140 /* Parse input coming from a file.  Return error code, print some errors. */
141 
142 node *
PyParser_ParseFile(FILE * fp,const char * filename,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret)143 PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
144                    const char *ps1, const char *ps2,
145                    perrdetail *err_ret)
146 {
147     return PyParser_ParseFileFlags(fp, filename, NULL,
148                                    g, start, ps1, ps2, err_ret, 0);
149 }
150 
151 node *
PyParser_ParseFileFlags(FILE * fp,const char * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int flags)152 PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
153                         grammar *g, int start,
154                         const char *ps1, const char *ps2,
155                         perrdetail *err_ret, int flags)
156 {
157     int iflags = flags;
158     return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
159                                      ps2, err_ret, &iflags);
160 }
161 
162 node *
PyParser_ParseFileObject(FILE * fp,PyObject * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int * flags)163 PyParser_ParseFileObject(FILE *fp, PyObject *filename,
164                          const char *enc, grammar *g, int start,
165                          const char *ps1, const char *ps2,
166                          perrdetail *err_ret, int *flags)
167 {
168     struct tok_state *tok;
169 
170     if (initerr(err_ret, filename) < 0)
171         return NULL;
172 
173     if (PySys_Audit("compile", "OO", Py_None, err_ret->filename) < 0) {
174         return NULL;
175     }
176 
177     if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) {
178         err_ret->error = E_NOMEM;
179         return NULL;
180     }
181     if (*flags & PyPARSE_TYPE_COMMENTS) {
182         tok->type_comments = 1;
183     }
184     Py_INCREF(err_ret->filename);
185     tok->filename = err_ret->filename;
186     return parsetok(tok, g, start, err_ret, flags);
187 }
188 
189 node *
PyParser_ParseFileFlagsEx(FILE * fp,const char * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int * flags)190 PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
191                           const char *enc, grammar *g, int start,
192                           const char *ps1, const char *ps2,
193                           perrdetail *err_ret, int *flags)
194 {
195     node *n;
196     PyObject *fileobj = NULL;
197     if (filename != NULL) {
198         fileobj = PyUnicode_DecodeFSDefault(filename);
199         if (fileobj == NULL) {
200             err_ret->error = E_ERROR;
201             return NULL;
202         }
203     }
204     n = PyParser_ParseFileObject(fp, fileobj, enc, g,
205                                  start, ps1, ps2, err_ret, flags);
206     Py_XDECREF(fileobj);
207     return n;
208 }
209 
210 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
211 #if 0
212 static const char with_msg[] =
213 "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
214 
215 static const char as_msg[] =
216 "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
217 
218 static void
219 warn(const char *msg, const char *filename, int lineno)
220 {
221     if (filename == NULL)
222         filename = "<string>";
223     PySys_WriteStderr(msg, filename, lineno);
224 }
225 #endif
226 #endif
227 
228 /* Parse input coming from the given tokenizer structure.
229    Return error code. */
230 
231 static node *
parsetok(struct tok_state * tok,grammar * g,int start,perrdetail * err_ret,int * flags)232 parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
233          int *flags)
234 {
235     parser_state *ps;
236     node *n;
237     int started = 0;
238     int col_offset, end_col_offset;
239     growable_comment_array type_ignores;
240 
241     if (!growable_comment_array_init(&type_ignores, 10)) {
242         err_ret->error = E_NOMEM;
243         PyTokenizer_Free(tok);
244         return NULL;
245     }
246 
247     if ((ps = PyParser_New(g, start)) == NULL) {
248         err_ret->error = E_NOMEM;
249         PyTokenizer_Free(tok);
250         return NULL;
251     }
252 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
253     if (*flags & PyPARSE_BARRY_AS_BDFL)
254         ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
255     if (*flags & PyPARSE_TYPE_COMMENTS)
256         ps->p_flags |= PyCF_TYPE_COMMENTS;
257 #endif
258 
259     for (;;) {
260         char *a, *b;
261         int type;
262         size_t len;
263         char *str;
264         col_offset = -1;
265         int lineno;
266         const char *line_start;
267 
268         type = PyTokenizer_Get(tok, &a, &b);
269         if (type == ERRORTOKEN) {
270             err_ret->error = tok->done;
271             break;
272         }
273         if (type == ENDMARKER && started) {
274             type = NEWLINE; /* Add an extra newline */
275             started = 0;
276             /* Add the right number of dedent tokens,
277                except if a certain flag is given --
278                codeop.py uses this. */
279             if (tok->indent &&
280                 !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
281             {
282                 tok->pendin = -tok->indent;
283                 tok->indent = 0;
284             }
285         }
286         else
287             started = 1;
288         len = (a != NULL && b != NULL) ? b - a : 0;
289         str = (char *) PyObject_MALLOC(len + 1);
290         if (str == NULL) {
291             err_ret->error = E_NOMEM;
292             break;
293         }
294         if (len > 0)
295             strncpy(str, a, len);
296         str[len] = '\0';
297 
298 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
299         if (type == NOTEQUAL) {
300             if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
301                             strcmp(str, "!=")) {
302                 PyObject_FREE(str);
303                 err_ret->error = E_SYNTAX;
304                 break;
305             }
306             else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
307                             strcmp(str, "<>")) {
308                 PyObject_FREE(str);
309                 err_ret->expected = NOTEQUAL;
310                 err_ret->error = E_SYNTAX;
311                 break;
312             }
313         }
314 #endif
315 
316         /* Nodes of type STRING, especially multi line strings
317            must be handled differently in order to get both
318            the starting line number and the column offset right.
319            (cf. issue 16806) */
320         lineno = type == STRING ? tok->first_lineno : tok->lineno;
321         line_start = type == STRING ? tok->multi_line_start : tok->line_start;
322         if (a != NULL && a >= line_start) {
323             col_offset = Py_SAFE_DOWNCAST(a - line_start,
324                                           intptr_t, int);
325         }
326         else {
327             col_offset = -1;
328         }
329 
330         if (b != NULL && b >= tok->line_start) {
331             end_col_offset = Py_SAFE_DOWNCAST(b - tok->line_start,
332                                               intptr_t, int);
333         }
334         else {
335             end_col_offset = -1;
336         }
337 
338         if (type == TYPE_IGNORE) {
339             if (!growable_comment_array_add(&type_ignores, tok->lineno, str)) {
340                 err_ret->error = E_NOMEM;
341                 break;
342             }
343             continue;
344         }
345 
346         if ((err_ret->error =
347              PyParser_AddToken(ps, (int)type, str,
348                                lineno, col_offset, tok->lineno, end_col_offset,
349                                &(err_ret->expected))) != E_OK) {
350             if (err_ret->error != E_DONE) {
351                 PyObject_FREE(str);
352                 err_ret->token = type;
353             }
354             break;
355         }
356     }
357 
358     if (err_ret->error == E_DONE) {
359         n = ps->p_tree;
360         ps->p_tree = NULL;
361 
362         if (n->n_type == file_input) {
363             /* Put type_ignore nodes in the ENDMARKER of file_input. */
364             int num;
365             node *ch;
366             size_t i;
367 
368             num = NCH(n);
369             ch = CHILD(n, num - 1);
370             REQ(ch, ENDMARKER);
371 
372             for (i = 0; i < type_ignores.num_items; i++) {
373                 int res = PyNode_AddChild(ch, TYPE_IGNORE, type_ignores.items[i].comment,
374                                           type_ignores.items[i].lineno, 0,
375                                           type_ignores.items[i].lineno, 0);
376                 if (res != 0) {
377                     err_ret->error = res;
378                     PyNode_Free(n);
379                     n = NULL;
380                     break;
381                 }
382                 type_ignores.items[i].comment = NULL;
383             }
384         }
385 
386         /* Check that the source for a single input statement really
387            is a single statement by looking at what is left in the
388            buffer after parsing.  Trailing whitespace and comments
389            are OK.  */
390         if (err_ret->error == E_DONE && start == single_input) {
391             char *cur = tok->cur;
392             char c = *tok->cur;
393 
394             for (;;) {
395                 while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
396                     c = *++cur;
397 
398                 if (!c)
399                     break;
400 
401                 if (c != '#') {
402                     err_ret->error = E_BADSINGLE;
403                     PyNode_Free(n);
404                     n = NULL;
405                     break;
406                 }
407 
408                 /* Suck up comment. */
409                 while (c && c != '\n')
410                     c = *++cur;
411             }
412         }
413     }
414     else
415         n = NULL;
416 
417     growable_comment_array_deallocate(&type_ignores);
418 
419 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
420     *flags = ps->p_flags;
421 #endif
422     PyParser_Delete(ps);
423 
424     if (n == NULL) {
425         if (tok->done == E_EOF)
426             err_ret->error = E_EOF;
427         err_ret->lineno = tok->lineno;
428         if (tok->buf != NULL) {
429             size_t len;
430             assert(tok->cur - tok->buf < INT_MAX);
431             /* if we've managed to parse a token, point the offset to its start,
432              * else use the current reading position of the tokenizer
433              */
434             err_ret->offset = col_offset != -1 ? col_offset + 1 : ((int)(tok->cur - tok->buf));
435             len = tok->inp - tok->buf;
436             err_ret->text = (char *) PyObject_MALLOC(len + 1);
437             if (err_ret->text != NULL) {
438                 if (len > 0)
439                     strncpy(err_ret->text, tok->buf, len);
440                 err_ret->text[len] = '\0';
441             }
442         }
443     } else if (tok->encoding != NULL) {
444         /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
445          * allocated using PyMem_
446          */
447         node* r = PyNode_New(encoding_decl);
448         if (r)
449             r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
450         if (!r || !r->n_str) {
451             err_ret->error = E_NOMEM;
452             if (r)
453                 PyObject_FREE(r);
454             n = NULL;
455             goto done;
456         }
457         strcpy(r->n_str, tok->encoding);
458         PyMem_FREE(tok->encoding);
459         tok->encoding = NULL;
460         r->n_nchildren = 1;
461         r->n_child = n;
462         n = r;
463     }
464 
465 done:
466     PyTokenizer_Free(tok);
467 
468     if (n != NULL) {
469         _PyNode_FinalizeEndPos(n);
470     }
471     return n;
472 }
473 
474 static int
initerr(perrdetail * err_ret,PyObject * filename)475 initerr(perrdetail *err_ret, PyObject *filename)
476 {
477     err_ret->error = E_OK;
478     err_ret->lineno = 0;
479     err_ret->offset = 0;
480     err_ret->text = NULL;
481     err_ret->token = -1;
482     err_ret->expected = -1;
483     if (filename) {
484         Py_INCREF(filename);
485         err_ret->filename = filename;
486     }
487     else {
488         err_ret->filename = PyUnicode_FromString("<string>");
489         if (err_ret->filename == NULL) {
490             err_ret->error = E_ERROR;
491             return -1;
492         }
493     }
494     return 0;
495 }
496