1 /* parsermodule.c
2 *
3 * Copyright 1995-1996 by Fred L. Drake, Jr. and Virginia Polytechnic
4 * Institute and State University, Blacksburg, Virginia, USA.
5 * Portions copyright 1991-1995 by Stichting Mathematisch Centrum,
6 * Amsterdam, The Netherlands. Copying is permitted under the terms
7 * associated with the main Python distribution, with the additional
8 * restriction that this additional notice be included and maintained
9 * on all distributed copies.
10 *
11 * This module serves to replace the original parser module written
12 * by Guido. The functionality is not matched precisely, but the
13 * original may be implemented on top of this. This is desirable
14 * since the source of the text to be parsed is now divorced from
15 * this interface.
16 *
17 * Unlike the prior interface, the ability to give a parse tree
18 * produced by Python code as a tuple to the compiler is enabled by
19 * this module. See the documentation for more details.
20 *
21 * I've added some annotations that help with the lint code-checking
22 * program, but they're not complete by a long shot. The real errors
23 * that lint detects are gone, but there are still warnings with
24 * Py_[X]DECREF() and Py_[X]INCREF() macros. The lint annotations
25 * look like "NOTE(...)".
26 *
27 */
28
29 #include "Python.h" /* general Python API */
30 #include "Python-ast.h" /* mod_ty */
31 #undef Yield /* undefine macro conflicting with <winbase.h> */
32 #include "ast.h"
33 #include "graminit.h" /* symbols defined in the grammar */
34 #include "node.h" /* internal parser structure */
35 #include "errcode.h" /* error codes for PyNode_*() */
36 #include "token.h" /* token definitions */
37 /* ISTERMINAL() / ISNONTERMINAL() */
38 #include "grammar.h"
39 #include "parsetok.h"
40
41 extern grammar _PyParser_Grammar; /* From graminit.c */
42
43 #ifdef lint
44 #include <note.h>
45 #else
46 #define NOTE(x)
47 #endif
48
49 /* String constants used to initialize module attributes.
50 *
51 */
52 static const char parser_copyright_string[] =
53 "Copyright 1995-1996 by Virginia Polytechnic Institute & State\n\
54 University, Blacksburg, Virginia, USA, and Fred L. Drake, Jr., Reston,\n\
55 Virginia, USA. Portions copyright 1991-1995 by Stichting Mathematisch\n\
56 Centrum, Amsterdam, The Netherlands.";
57
58
59 PyDoc_STRVAR(parser_doc_string,
60 "This is an interface to Python's internal parser.");
61
62 static const char parser_version_string[] = "0.5";
63
64
65 typedef PyObject* (*SeqMaker) (Py_ssize_t length);
66 typedef int (*SeqInserter) (PyObject* sequence,
67 Py_ssize_t index,
68 PyObject* element);
69
70 /* The function below is copyrighted by Stichting Mathematisch Centrum. The
71 * original copyright statement is included below, and continues to apply
72 * in full to the function immediately following. All other material is
73 * original, copyrighted by Fred L. Drake, Jr. and Virginia Polytechnic
74 * Institute and State University. Changes were made to comply with the
75 * new naming conventions. Added arguments to provide support for creating
76 * lists as well as tuples, and optionally including the line numbers.
77 */
78
79
80 static PyObject*
node2tuple(node * n,SeqMaker mkseq,SeqInserter addelem,int lineno,int col_offset)81 node2tuple(node *n, /* node to convert */
82 SeqMaker mkseq, /* create sequence */
83 SeqInserter addelem, /* func. to add elem. in seq. */
84 int lineno, /* include line numbers? */
85 int col_offset) /* include column offsets? */
86 {
87 PyObject *result = NULL, *w;
88
89 if (n == NULL) {
90 Py_RETURN_NONE;
91 }
92
93 if (ISNONTERMINAL(TYPE(n))) {
94 int i;
95
96 result = mkseq(1 + NCH(n) + (TYPE(n) == encoding_decl));
97 if (result == NULL)
98 goto error;
99
100 w = PyLong_FromLong(TYPE(n));
101 if (w == NULL)
102 goto error;
103 (void) addelem(result, 0, w);
104
105 for (i = 0; i < NCH(n); i++) {
106 w = node2tuple(CHILD(n, i), mkseq, addelem, lineno, col_offset);
107 if (w == NULL)
108 goto error;
109 (void) addelem(result, i+1, w);
110 }
111
112 if (TYPE(n) == encoding_decl) {
113 w = PyUnicode_FromString(STR(n));
114 if (w == NULL)
115 goto error;
116 (void) addelem(result, i+1, w);
117 }
118 }
119 else if (ISTERMINAL(TYPE(n))) {
120 result = mkseq(2 + lineno + col_offset);
121 if (result == NULL)
122 goto error;
123
124 w = PyLong_FromLong(TYPE(n));
125 if (w == NULL)
126 goto error;
127 (void) addelem(result, 0, w);
128
129 w = PyUnicode_FromString(STR(n));
130 if (w == NULL)
131 goto error;
132 (void) addelem(result, 1, w);
133
134 if (lineno) {
135 w = PyLong_FromLong(n->n_lineno);
136 if (w == NULL)
137 goto error;
138 (void) addelem(result, 2, w);
139 }
140
141 if (col_offset) {
142 w = PyLong_FromLong(n->n_col_offset);
143 if (w == NULL)
144 goto error;
145 (void) addelem(result, 2 + lineno, w);
146 }
147 }
148 else {
149 PyErr_SetString(PyExc_SystemError,
150 "unrecognized parse tree node type");
151 return ((PyObject*) NULL);
152 }
153 return result;
154
155 error:
156 Py_XDECREF(result);
157 return NULL;
158 }
159 /*
160 * End of material copyrighted by Stichting Mathematisch Centrum.
161 */
162
163
164
165 /* There are two types of intermediate objects we're interested in:
166 * 'eval' and 'exec' types. These constants can be used in the st_type
167 * field of the object type to identify which any given object represents.
168 * These should probably go in an external header to allow other extensions
169 * to use them, but then, we really should be using C++ too. ;-)
170 */
171
172 #define PyST_EXPR 1
173 #define PyST_SUITE 2
174
175
176 /* These are the internal objects and definitions required to implement the
177 * ST type. Most of the internal names are more reminiscent of the 'old'
178 * naming style, but the code uses the new naming convention.
179 */
180
181 static PyObject*
182 parser_error = 0;
183
184
185 typedef struct {
186 PyObject_HEAD /* standard object header */
187 node* st_node; /* the node* returned by the parser */
188 int st_type; /* EXPR or SUITE ? */
189 PyCompilerFlags st_flags; /* Parser and compiler flags */
190 } PyST_Object;
191
192
193 static void parser_free(PyST_Object *st);
194 static PyObject* parser_sizeof(PyST_Object *, void *);
195 static PyObject* parser_richcompare(PyObject *left, PyObject *right, int op);
196 static PyObject* parser_compilest(PyST_Object *, PyObject *, PyObject *);
197 static PyObject* parser_isexpr(PyST_Object *, PyObject *, PyObject *);
198 static PyObject* parser_issuite(PyST_Object *, PyObject *, PyObject *);
199 static PyObject* parser_st2list(PyST_Object *, PyObject *, PyObject *);
200 static PyObject* parser_st2tuple(PyST_Object *, PyObject *, PyObject *);
201
202 #define PUBLIC_METHOD_TYPE (METH_VARARGS|METH_KEYWORDS)
203
204 static PyMethodDef parser_methods[] = {
205 {"compile", (PyCFunction)(void(*)(void))parser_compilest, PUBLIC_METHOD_TYPE,
206 PyDoc_STR("Compile this ST object into a code object.")},
207 {"isexpr", (PyCFunction)(void(*)(void))parser_isexpr, PUBLIC_METHOD_TYPE,
208 PyDoc_STR("Determines if this ST object was created from an expression.")},
209 {"issuite", (PyCFunction)(void(*)(void))parser_issuite, PUBLIC_METHOD_TYPE,
210 PyDoc_STR("Determines if this ST object was created from a suite.")},
211 {"tolist", (PyCFunction)(void(*)(void))parser_st2list, PUBLIC_METHOD_TYPE,
212 PyDoc_STR("Creates a list-tree representation of this ST.")},
213 {"totuple", (PyCFunction)(void(*)(void))parser_st2tuple, PUBLIC_METHOD_TYPE,
214 PyDoc_STR("Creates a tuple-tree representation of this ST.")},
215 {"__sizeof__", (PyCFunction)parser_sizeof, METH_NOARGS,
216 PyDoc_STR("Returns size in memory, in bytes.")},
217 {NULL, NULL, 0, NULL}
218 };
219
220 static
221 PyTypeObject PyST_Type = {
222 PyVarObject_HEAD_INIT(NULL, 0)
223 "parser.st", /* tp_name */
224 (int) sizeof(PyST_Object), /* tp_basicsize */
225 0, /* tp_itemsize */
226 (destructor)parser_free, /* tp_dealloc */
227 0, /* tp_vectorcall_offset */
228 0, /* tp_getattr */
229 0, /* tp_setattr */
230 0, /* tp_as_async */
231 0, /* tp_repr */
232 0, /* tp_as_number */
233 0, /* tp_as_sequence */
234 0, /* tp_as_mapping */
235 0, /* tp_hash */
236 0, /* tp_call */
237 0, /* tp_str */
238 0, /* tp_getattro */
239 0, /* tp_setattro */
240
241 /* Functions to access object as input/output buffer */
242 0, /* tp_as_buffer */
243
244 Py_TPFLAGS_DEFAULT, /* tp_flags */
245
246 /* __doc__ */
247 "Intermediate representation of a Python parse tree.",
248 0, /* tp_traverse */
249 0, /* tp_clear */
250 parser_richcompare, /* tp_richcompare */
251 0, /* tp_weaklistoffset */
252 0, /* tp_iter */
253 0, /* tp_iternext */
254 parser_methods, /* tp_methods */
255 }; /* PyST_Type */
256
257
258 /* PyST_Type isn't subclassable, so just check ob_type */
259 #define PyST_Object_Check(v) Py_IS_TYPE(v, &PyST_Type)
260
261 static int
parser_compare_nodes(node * left,node * right)262 parser_compare_nodes(node *left, node *right)
263 {
264 int j;
265
266 if (TYPE(left) < TYPE(right))
267 return (-1);
268
269 if (TYPE(right) < TYPE(left))
270 return (1);
271
272 if (ISTERMINAL(TYPE(left)))
273 return (strcmp(STR(left), STR(right)));
274
275 if (NCH(left) < NCH(right))
276 return (-1);
277
278 if (NCH(right) < NCH(left))
279 return (1);
280
281 for (j = 0; j < NCH(left); ++j) {
282 int v = parser_compare_nodes(CHILD(left, j), CHILD(right, j));
283
284 if (v != 0)
285 return (v);
286 }
287 return (0);
288 }
289
290 /* parser_richcompare(PyObject* left, PyObject* right, int op)
291 *
292 * Comparison function used by the Python operators ==, !=, <, >, <=, >=
293 * This really just wraps a call to parser_compare_nodes() with some easy
294 * checks and protection code.
295 *
296 */
297
298 static PyObject *
parser_richcompare(PyObject * left,PyObject * right,int op)299 parser_richcompare(PyObject *left, PyObject *right, int op)
300 {
301 int result;
302
303 /* neither argument should be NULL, unless something's gone wrong */
304 if (left == NULL || right == NULL) {
305 PyErr_BadInternalCall();
306 return NULL;
307 }
308
309 /* both arguments should be instances of PyST_Object */
310 if (!PyST_Object_Check(left) || !PyST_Object_Check(right)) {
311 Py_RETURN_NOTIMPLEMENTED;
312 }
313
314 if (left == right)
315 /* if arguments are identical, they're equal */
316 result = 0;
317 else
318 result = parser_compare_nodes(((PyST_Object *)left)->st_node,
319 ((PyST_Object *)right)->st_node);
320
321 Py_RETURN_RICHCOMPARE(result, 0, op);
322 }
323
324 /* parser_newstobject(node* st)
325 *
326 * Allocates a new Python object representing an ST. This is simply the
327 * 'wrapper' object that holds a node* and allows it to be passed around in
328 * Python code.
329 *
330 */
331 static PyObject*
parser_newstobject(node * st,int type)332 parser_newstobject(node *st, int type)
333 {
334 PyST_Object* o = PyObject_New(PyST_Object, &PyST_Type);
335
336 if (o != 0) {
337 o->st_node = st;
338 o->st_type = type;
339 o->st_flags = _PyCompilerFlags_INIT;
340 }
341 else {
342 PyNode_Free(st);
343 }
344 return ((PyObject*)o);
345 }
346
347
348 /* void parser_free(PyST_Object* st)
349 *
350 * This is called by a del statement that reduces the reference count to 0.
351 *
352 */
353 static void
parser_free(PyST_Object * st)354 parser_free(PyST_Object *st)
355 {
356 PyNode_Free(st->st_node);
357 PyObject_Del(st);
358 }
359
360 static PyObject *
parser_sizeof(PyST_Object * st,void * unused)361 parser_sizeof(PyST_Object *st, void *unused)
362 {
363 Py_ssize_t res;
364
365 res = _PyObject_SIZE(Py_TYPE(st)) + _PyNode_SizeOf(st->st_node);
366 return PyLong_FromSsize_t(res);
367 }
368
369
370 /* parser_st2tuple(PyObject* self, PyObject* args, PyObject* kw)
371 *
372 * This provides conversion from a node* to a tuple object that can be
373 * returned to the Python-level caller. The ST object is not modified.
374 *
375 */
376 static PyObject*
parser_st2tuple(PyST_Object * self,PyObject * args,PyObject * kw)377 parser_st2tuple(PyST_Object *self, PyObject *args, PyObject *kw)
378 {
379 int line_info = 0;
380 int col_info = 0;
381 PyObject *res = 0;
382 int ok;
383
384 static char *keywords[] = {"st", "line_info", "col_info", NULL};
385
386 if (self == NULL || PyModule_Check(self)) {
387 ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|pp:st2tuple", keywords,
388 &PyST_Type, &self, &line_info,
389 &col_info);
390 }
391 else
392 ok = PyArg_ParseTupleAndKeywords(args, kw, "|pp:totuple", &keywords[1],
393 &line_info, &col_info);
394 if (ok != 0) {
395 /*
396 * Convert ST into a tuple representation. Use Guido's function,
397 * since it's known to work already.
398 */
399 res = node2tuple(((PyST_Object*)self)->st_node,
400 PyTuple_New, PyTuple_SetItem, line_info, col_info);
401 }
402 return (res);
403 }
404
405
406 /* parser_st2list(PyObject* self, PyObject* args, PyObject* kw)
407 *
408 * This provides conversion from a node* to a list object that can be
409 * returned to the Python-level caller. The ST object is not modified.
410 *
411 */
412 static PyObject*
parser_st2list(PyST_Object * self,PyObject * args,PyObject * kw)413 parser_st2list(PyST_Object *self, PyObject *args, PyObject *kw)
414 {
415 int line_info = 0;
416 int col_info = 0;
417 PyObject *res = 0;
418 int ok;
419
420 static char *keywords[] = {"st", "line_info", "col_info", NULL};
421
422 if (self == NULL || PyModule_Check(self))
423 ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|pp:st2list", keywords,
424 &PyST_Type, &self, &line_info,
425 &col_info);
426 else
427 ok = PyArg_ParseTupleAndKeywords(args, kw, "|pp:tolist", &keywords[1],
428 &line_info, &col_info);
429 if (ok) {
430 /*
431 * Convert ST into a tuple representation. Use Guido's function,
432 * since it's known to work already.
433 */
434 res = node2tuple(self->st_node,
435 PyList_New, PyList_SetItem, line_info, col_info);
436 }
437 return (res);
438 }
439
440
441 /* parser_compilest(PyObject* self, PyObject* args)
442 *
443 * This function creates code objects from the parse tree represented by
444 * the passed-in data object. An optional file name is passed in as well.
445 *
446 */
447 static PyObject*
parser_compilest(PyST_Object * self,PyObject * args,PyObject * kw)448 parser_compilest(PyST_Object *self, PyObject *args, PyObject *kw)
449 {
450 PyObject* res = NULL;
451 PyArena* arena = NULL;
452 mod_ty mod;
453 PyObject* filename = NULL;
454 int ok;
455
456 static char *keywords[] = {"st", "filename", NULL};
457
458 if (self == NULL || PyModule_Check(self))
459 ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|O&:compilest", keywords,
460 &PyST_Type, &self,
461 PyUnicode_FSDecoder, &filename);
462 else
463 ok = PyArg_ParseTupleAndKeywords(args, kw, "|O&:compile", &keywords[1],
464 PyUnicode_FSDecoder, &filename);
465 if (!ok)
466 goto error;
467
468 if (filename == NULL) {
469 filename = PyUnicode_FromString("<syntax-tree>");
470 if (filename == NULL)
471 goto error;
472 }
473
474 arena = PyArena_New();
475 if (!arena)
476 goto error;
477
478 mod = PyAST_FromNodeObject(self->st_node, &self->st_flags,
479 filename, arena);
480 if (!mod)
481 goto error;
482
483 res = (PyObject *)PyAST_CompileObject(mod, filename,
484 &self->st_flags, -1, arena);
485 error:
486 Py_XDECREF(filename);
487 if (arena != NULL)
488 PyArena_Free(arena);
489 return res;
490 }
491
492
493 /* PyObject* parser_isexpr(PyObject* self, PyObject* args)
494 * PyObject* parser_issuite(PyObject* self, PyObject* args)
495 *
496 * Checks the passed-in ST object to determine if it is an expression or
497 * a statement suite, respectively. The return is a Python truth value.
498 *
499 */
500 static PyObject*
parser_isexpr(PyST_Object * self,PyObject * args,PyObject * kw)501 parser_isexpr(PyST_Object *self, PyObject *args, PyObject *kw)
502 {
503 PyObject* res = 0;
504 int ok;
505
506 static char *keywords[] = {"st", NULL};
507
508 if (self == NULL || PyModule_Check(self))
509 ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:isexpr", keywords,
510 &PyST_Type, &self);
511 else
512 ok = PyArg_ParseTupleAndKeywords(args, kw, ":isexpr", &keywords[1]);
513
514 if (ok) {
515 /* Check to see if the ST represents an expression or not. */
516 res = (self->st_type == PyST_EXPR) ? Py_True : Py_False;
517 Py_INCREF(res);
518 }
519 return (res);
520 }
521
522
523 static PyObject*
parser_issuite(PyST_Object * self,PyObject * args,PyObject * kw)524 parser_issuite(PyST_Object *self, PyObject *args, PyObject *kw)
525 {
526 PyObject* res = 0;
527 int ok;
528
529 static char *keywords[] = {"st", NULL};
530
531 if (self == NULL || PyModule_Check(self))
532 ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:issuite", keywords,
533 &PyST_Type, &self);
534 else
535 ok = PyArg_ParseTupleAndKeywords(args, kw, ":issuite", &keywords[1]);
536
537 if (ok) {
538 /* Check to see if the ST represents an expression or not. */
539 res = (self->st_type == PyST_EXPR) ? Py_False : Py_True;
540 Py_INCREF(res);
541 }
542 return (res);
543 }
544
545
546 /* err_string(const char* message)
547 *
548 * Sets the error string for an exception of type ParserError.
549 *
550 */
551 static void
err_string(const char * message)552 err_string(const char *message)
553 {
554 PyErr_SetString(parser_error, message);
555 }
556
557
558 /* PyObject* parser_do_parse(PyObject* args, int type)
559 *
560 * Internal function to actually execute the parse and return the result if
561 * successful or set an exception if not.
562 *
563 */
564 static PyObject*
parser_do_parse(PyObject * args,PyObject * kw,const char * argspec,int type)565 parser_do_parse(PyObject *args, PyObject *kw, const char *argspec, int type)
566 {
567 char* string = 0;
568 PyObject* res = 0;
569 int flags = 0;
570 perrdetail err;
571
572 static char *keywords[] = {"source", NULL};
573
574 if (PyArg_ParseTupleAndKeywords(args, kw, argspec, keywords, &string)) {
575 node* n = PyParser_ParseStringFlagsFilenameEx(string, NULL,
576 &_PyParser_Grammar,
577 (type == PyST_EXPR)
578 ? eval_input : file_input,
579 &err, &flags);
580
581 if (n) {
582 res = parser_newstobject(n, type);
583 if (res) {
584 ((PyST_Object *)res)->st_flags.cf_flags = flags & PyCF_MASK;
585 ((PyST_Object *)res)->st_flags.cf_feature_version = PY_MINOR_VERSION;
586 }
587 }
588 else {
589 PyParser_SetError(&err);
590 }
591 PyParser_ClearError(&err);
592 }
593 return (res);
594 }
595
596
597 /* PyObject* parser_expr(PyObject* self, PyObject* args)
598 * PyObject* parser_suite(PyObject* self, PyObject* args)
599 *
600 * External interfaces to the parser itself. Which is called determines if
601 * the parser attempts to recognize an expression ('eval' form) or statement
602 * suite ('exec' form). The real work is done by parser_do_parse() above.
603 *
604 */
605 static PyObject*
parser_expr(PyST_Object * self,PyObject * args,PyObject * kw)606 parser_expr(PyST_Object *self, PyObject *args, PyObject *kw)
607 {
608 NOTE(ARGUNUSED(self))
609 return (parser_do_parse(args, kw, "s:expr", PyST_EXPR));
610 }
611
612
613 static PyObject*
parser_suite(PyST_Object * self,PyObject * args,PyObject * kw)614 parser_suite(PyST_Object *self, PyObject *args, PyObject *kw)
615 {
616 NOTE(ARGUNUSED(self))
617 return (parser_do_parse(args, kw, "s:suite", PyST_SUITE));
618 }
619
620
621
622 /* This is the messy part of the code. Conversion from a tuple to an ST
623 * object requires that the input tuple be valid without having to rely on
624 * catching an exception from the compiler. This is done to allow the
625 * compiler itself to remain fast, since most of its input will come from
626 * the parser directly, and therefore be known to be syntactically correct.
627 * This validation is done to ensure that we don't core dump the compile
628 * phase, returning an exception instead.
629 *
630 * Two aspects can be broken out in this code: creating a node tree from
631 * the tuple passed in, and verifying that it is indeed valid. It may be
632 * advantageous to expand the number of ST types to include funcdefs and
633 * lambdadefs to take advantage of the optimizer, recognizing those STs
634 * here. They are not necessary, and not quite as useful in a raw form.
635 * For now, let's get expressions and suites working reliably.
636 */
637
638
639 static node* build_node_tree(PyObject *tuple);
640
641 static int
validate_node(node * tree)642 validate_node(node *tree)
643 {
644 int type = TYPE(tree);
645 int nch = NCH(tree);
646 state *dfa_state;
647 int pos, arc;
648
649 assert(ISNONTERMINAL(type));
650 type -= NT_OFFSET;
651 if (type >= _PyParser_Grammar.g_ndfas) {
652 PyErr_Format(parser_error, "Unrecognized node type %d.", TYPE(tree));
653 return 0;
654 }
655 const dfa *nt_dfa = &_PyParser_Grammar.g_dfa[type];
656 REQ(tree, nt_dfa->d_type);
657
658 /* Run the DFA for this nonterminal. */
659 dfa_state = nt_dfa->d_state;
660 for (pos = 0; pos < nch; ++pos) {
661 node *ch = CHILD(tree, pos);
662 int ch_type = TYPE(ch);
663 if ((ch_type >= NT_OFFSET + _PyParser_Grammar.g_ndfas)
664 || (ISTERMINAL(ch_type) && (ch_type >= N_TOKENS))
665 || (ch_type < 0)
666 ) {
667 PyErr_Format(parser_error, "Unrecognized node type %d.", ch_type);
668 return 0;
669 }
670 if (ch_type == suite && TYPE(tree) == funcdef) {
671 /* This is the opposite hack of what we do in parser.c
672 (search for func_body_suite), except we don't ever
673 support type comments here. */
674 ch_type = func_body_suite;
675 }
676 for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
677 short a_label = dfa_state->s_arc[arc].a_lbl;
678 assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels);
679
680 const char *label_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str;
681 if ((_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type)
682 && ((ch->n_str == NULL) || (label_str == NULL)
683 || (strcmp(ch->n_str, label_str) == 0))
684 ) {
685 /* The child is acceptable; if non-terminal, validate it recursively. */
686 if (ISNONTERMINAL(ch_type) && !validate_node(ch))
687 return 0;
688
689 /* Update the state, and move on to the next child. */
690 dfa_state = &nt_dfa->d_state[dfa_state->s_arc[arc].a_arrow];
691 goto arc_found;
692 }
693 }
694 /* What would this state have accepted? */
695 {
696 short a_label = dfa_state->s_arc->a_lbl;
697 if (!a_label) /* Wouldn't accept any more children */
698 goto illegal_num_children;
699
700 int next_type = _PyParser_Grammar.g_ll.ll_label[a_label].lb_type;
701 const char *expected_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str;
702
703 if (ISNONTERMINAL(next_type)) {
704 PyErr_Format(parser_error, "Expected %s, got %s.",
705 _PyParser_Grammar.g_dfa[next_type - NT_OFFSET].d_name,
706 ISTERMINAL(ch_type) ? _PyParser_TokenNames[ch_type] :
707 _PyParser_Grammar.g_dfa[ch_type - NT_OFFSET].d_name);
708 }
709 else if (expected_str != NULL) {
710 PyErr_Format(parser_error, "Illegal terminal: expected '%s'.",
711 expected_str);
712 }
713 else {
714 PyErr_Format(parser_error, "Illegal terminal: expected %s.",
715 _PyParser_TokenNames[next_type]);
716 }
717 return 0;
718 }
719
720 arc_found:
721 continue;
722 }
723 /* Are we in a final state? If so, return 1 for successful validation. */
724 for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
725 if (!dfa_state->s_arc[arc].a_lbl) {
726 return 1;
727 }
728 }
729
730 illegal_num_children:
731 PyErr_Format(parser_error,
732 "Illegal number of children for %s node.", nt_dfa->d_name);
733 return 0;
734 }
735
736 /* PyObject* parser_tuple2st(PyObject* self, PyObject* args)
737 *
738 * This is the public function, called from the Python code. It receives a
739 * single tuple object from the caller, and creates an ST object if the
740 * tuple can be validated. It does this by checking the first code of the
741 * tuple, and, if acceptable, builds the internal representation. If this
742 * step succeeds, the internal representation is validated as fully as
743 * possible with the recursive validate_node() routine defined above.
744 *
745 * This function must be changed if support is to be added for PyST_FRAGMENT
746 * ST objects.
747 *
748 */
749 static PyObject*
parser_tuple2st(PyST_Object * self,PyObject * args,PyObject * kw)750 parser_tuple2st(PyST_Object *self, PyObject *args, PyObject *kw)
751 {
752 NOTE(ARGUNUSED(self))
753 PyObject *st = 0;
754 PyObject *tuple;
755 node *tree;
756
757 static char *keywords[] = {"sequence", NULL};
758
759 if (!PyArg_ParseTupleAndKeywords(args, kw, "O:sequence2st", keywords,
760 &tuple))
761 return (0);
762 if (!PySequence_Check(tuple)) {
763 PyErr_SetString(PyExc_ValueError,
764 "sequence2st() requires a single sequence argument");
765 return (0);
766 }
767 /*
768 * Convert the tree to the internal form before checking it.
769 */
770 tree = build_node_tree(tuple);
771 if (tree != 0) {
772 node *validation_root = NULL;
773 int tree_type = 0;
774 switch (TYPE(tree)) {
775 case eval_input:
776 /* Might be an eval form. */
777 tree_type = PyST_EXPR;
778 validation_root = tree;
779 break;
780 case encoding_decl:
781 /* This looks like an encoding_decl so far. */
782 if (NCH(tree) == 1) {
783 tree_type = PyST_SUITE;
784 validation_root = CHILD(tree, 0);
785 }
786 else {
787 err_string("Error Parsing encoding_decl");
788 }
789 break;
790 case file_input:
791 /* This looks like an exec form so far. */
792 tree_type = PyST_SUITE;
793 validation_root = tree;
794 break;
795 default:
796 /* This is a fragment, at best. */
797 err_string("parse tree does not use a valid start symbol");
798 }
799
800 if (validation_root != NULL && validate_node(validation_root))
801 st = parser_newstobject(tree, tree_type);
802 else
803 PyNode_Free(tree);
804 }
805 /* Make sure we raise an exception on all errors. We should never
806 * get this, but we'd do well to be sure something is done.
807 */
808 if (st == NULL && !PyErr_Occurred())
809 err_string("unspecified ST error occurred");
810
811 return st;
812 }
813
814
815 /* node* build_node_children()
816 *
817 * Iterate across the children of the current non-terminal node and build
818 * their structures. If successful, return the root of this portion of
819 * the tree, otherwise, 0. Any required exception will be specified already,
820 * and no memory will have been deallocated.
821 *
822 */
823 static node*
build_node_children(PyObject * tuple,node * root,int * line_num)824 build_node_children(PyObject *tuple, node *root, int *line_num)
825 {
826 Py_ssize_t len = PyObject_Size(tuple);
827 Py_ssize_t i;
828 int err;
829
830 if (len < 0) {
831 return NULL;
832 }
833 for (i = 1; i < len; ++i) {
834 /* elem must always be a sequence, however simple */
835 PyObject* elem = PySequence_GetItem(tuple, i);
836 int ok = elem != NULL;
837 int type = 0;
838 char *strn = 0;
839
840 if (ok)
841 ok = PySequence_Check(elem);
842 if (ok) {
843 PyObject *temp = PySequence_GetItem(elem, 0);
844 if (temp == NULL)
845 ok = 0;
846 else {
847 ok = PyLong_Check(temp);
848 if (ok) {
849 type = _PyLong_AsInt(temp);
850 if (type == -1 && PyErr_Occurred()) {
851 Py_DECREF(temp);
852 Py_DECREF(elem);
853 return NULL;
854 }
855 }
856 Py_DECREF(temp);
857 }
858 }
859 if (!ok) {
860 PyObject *err = Py_BuildValue("Os", elem,
861 "Illegal node construct.");
862 PyErr_SetObject(parser_error, err);
863 Py_XDECREF(err);
864 Py_XDECREF(elem);
865 return NULL;
866 }
867 if (ISTERMINAL(type)) {
868 Py_ssize_t len = PyObject_Size(elem);
869 PyObject *temp;
870 const char *temp_str;
871
872 if ((len != 2) && (len != 3)) {
873 err_string("terminal nodes must have 2 or 3 entries");
874 Py_DECREF(elem);
875 return NULL;
876 }
877 temp = PySequence_GetItem(elem, 1);
878 if (temp == NULL) {
879 Py_DECREF(elem);
880 return NULL;
881 }
882 if (!PyUnicode_Check(temp)) {
883 PyErr_Format(parser_error,
884 "second item in terminal node must be a string,"
885 " found %s",
886 Py_TYPE(temp)->tp_name);
887 Py_DECREF(temp);
888 Py_DECREF(elem);
889 return NULL;
890 }
891 if (len == 3) {
892 PyObject *o = PySequence_GetItem(elem, 2);
893 if (o == NULL) {
894 Py_DECREF(temp);
895 Py_DECREF(elem);
896 return NULL;
897 }
898 if (PyLong_Check(o)) {
899 int num = _PyLong_AsInt(o);
900 if (num == -1 && PyErr_Occurred()) {
901 Py_DECREF(o);
902 Py_DECREF(temp);
903 Py_DECREF(elem);
904 return NULL;
905 }
906 *line_num = num;
907 }
908 else {
909 PyErr_Format(parser_error,
910 "third item in terminal node must be an"
911 " integer, found %s",
912 Py_TYPE(temp)->tp_name);
913 Py_DECREF(o);
914 Py_DECREF(temp);
915 Py_DECREF(elem);
916 return NULL;
917 }
918 Py_DECREF(o);
919 }
920 temp_str = PyUnicode_AsUTF8AndSize(temp, &len);
921 if (temp_str == NULL) {
922 Py_DECREF(temp);
923 Py_DECREF(elem);
924 return NULL;
925 }
926 strn = (char *)PyObject_MALLOC(len + 1);
927 if (strn == NULL) {
928 Py_DECREF(temp);
929 Py_DECREF(elem);
930 PyErr_NoMemory();
931 return NULL;
932 }
933 (void) memcpy(strn, temp_str, len + 1);
934 Py_DECREF(temp);
935 }
936 else if (!ISNONTERMINAL(type)) {
937 /*
938 * It has to be one or the other; this is an error.
939 * Raise an exception.
940 */
941 PyObject *err = Py_BuildValue("Os", elem, "unknown node type.");
942 PyErr_SetObject(parser_error, err);
943 Py_XDECREF(err);
944 Py_DECREF(elem);
945 return NULL;
946 }
947 err = PyNode_AddChild(root, type, strn, *line_num, 0, *line_num, 0);
948 if (err == E_NOMEM) {
949 Py_DECREF(elem);
950 PyObject_FREE(strn);
951 PyErr_NoMemory();
952 return NULL;
953 }
954 if (err == E_OVERFLOW) {
955 Py_DECREF(elem);
956 PyObject_FREE(strn);
957 PyErr_SetString(PyExc_ValueError,
958 "unsupported number of child nodes");
959 return NULL;
960 }
961
962 if (ISNONTERMINAL(type)) {
963 node* new_child = CHILD(root, i - 1);
964
965 if (new_child != build_node_children(elem, new_child, line_num)) {
966 Py_DECREF(elem);
967 return NULL;
968 }
969 }
970 else if (type == NEWLINE) { /* It's true: we increment the */
971 ++(*line_num); /* line number *after* the newline! */
972 }
973 Py_DECREF(elem);
974 }
975 return root;
976 }
977
978
979 static node*
build_node_tree(PyObject * tuple)980 build_node_tree(PyObject *tuple)
981 {
982 node* res = 0;
983 PyObject *temp = PySequence_GetItem(tuple, 0);
984 long num = -1;
985
986 if (temp != NULL)
987 num = PyLong_AsLong(temp);
988 Py_XDECREF(temp);
989 if (ISTERMINAL(num)) {
990 /*
991 * The tuple is simple, but it doesn't start with a start symbol.
992 * Raise an exception now and be done with it.
993 */
994 tuple = Py_BuildValue("Os", tuple,
995 "Illegal syntax-tree; cannot start with terminal symbol.");
996 PyErr_SetObject(parser_error, tuple);
997 Py_XDECREF(tuple);
998 }
999 else if (ISNONTERMINAL(num)) {
1000 /*
1001 * Not efficient, but that can be handled later.
1002 */
1003 int line_num = 0;
1004 PyObject *encoding = NULL;
1005
1006 if (num == encoding_decl) {
1007 encoding = PySequence_GetItem(tuple, 2);
1008 if (encoding == NULL) {
1009 PyErr_SetString(parser_error, "missed encoding");
1010 return NULL;
1011 }
1012 if (!PyUnicode_Check(encoding)) {
1013 PyErr_Format(parser_error,
1014 "encoding must be a string, found %.200s",
1015 Py_TYPE(encoding)->tp_name);
1016 Py_DECREF(encoding);
1017 return NULL;
1018 }
1019 /* tuple isn't borrowed anymore here, need to DECREF */
1020 tuple = PySequence_GetSlice(tuple, 0, 2);
1021 if (tuple == NULL) {
1022 Py_DECREF(encoding);
1023 return NULL;
1024 }
1025 }
1026 res = PyNode_New(num);
1027 if (res != NULL) {
1028 if (res != build_node_children(tuple, res, &line_num)) {
1029 PyNode_Free(res);
1030 res = NULL;
1031 }
1032 if (res && encoding) {
1033 Py_ssize_t len;
1034 const char *temp;
1035 temp = PyUnicode_AsUTF8AndSize(encoding, &len);
1036 if (temp == NULL) {
1037 PyNode_Free(res);
1038 Py_DECREF(encoding);
1039 Py_DECREF(tuple);
1040 return NULL;
1041 }
1042 res->n_str = (char *)PyObject_MALLOC(len + 1);
1043 if (res->n_str == NULL) {
1044 PyNode_Free(res);
1045 Py_DECREF(encoding);
1046 Py_DECREF(tuple);
1047 PyErr_NoMemory();
1048 return NULL;
1049 }
1050 (void) memcpy(res->n_str, temp, len + 1);
1051 }
1052 }
1053 if (encoding != NULL) {
1054 Py_DECREF(encoding);
1055 Py_DECREF(tuple);
1056 }
1057 }
1058 else {
1059 /* The tuple is illegal -- if the number is neither TERMINAL nor
1060 * NONTERMINAL, we can't use it. Not sure the implementation
1061 * allows this condition, but the API doesn't preclude it.
1062 */
1063 PyObject *err = Py_BuildValue("Os", tuple,
1064 "Illegal component tuple.");
1065 PyErr_SetObject(parser_error, err);
1066 Py_XDECREF(err);
1067 }
1068
1069 return (res);
1070 }
1071
1072
1073 static PyObject*
1074 pickle_constructor = NULL;
1075
1076
1077 static PyObject*
parser__pickler(PyObject * self,PyObject * args)1078 parser__pickler(PyObject *self, PyObject *args)
1079 {
1080 NOTE(ARGUNUSED(self))
1081 PyObject *result = NULL;
1082 PyObject *st = NULL;
1083
1084 if (PyArg_ParseTuple(args, "O!:_pickler", &PyST_Type, &st)) {
1085 PyObject *newargs;
1086 PyObject *tuple;
1087
1088 if ((newargs = PyTuple_Pack(2, st, Py_True)) == NULL)
1089 return NULL;
1090 tuple = parser_st2tuple((PyST_Object*)NULL, newargs, NULL);
1091 if (tuple != NULL) {
1092 result = Py_BuildValue("O(O)", pickle_constructor, tuple);
1093 Py_DECREF(tuple);
1094 }
1095 Py_DECREF(newargs);
1096 }
1097
1098 return (result);
1099 }
1100
1101
1102 /* Functions exported by this module. Most of this should probably
1103 * be converted into an ST object with methods, but that is better
1104 * done directly in Python, allowing subclasses to be created directly.
1105 * We'd really have to write a wrapper around it all anyway to allow
1106 * inheritance.
1107 */
1108 static PyMethodDef parser_functions[] = {
1109 {"compilest", (PyCFunction)(void(*)(void))parser_compilest, PUBLIC_METHOD_TYPE,
1110 PyDoc_STR("Compiles an ST object into a code object.")},
1111 {"expr", (PyCFunction)(void(*)(void))parser_expr, PUBLIC_METHOD_TYPE,
1112 PyDoc_STR("Creates an ST object from an expression.")},
1113 {"isexpr", (PyCFunction)(void(*)(void))parser_isexpr, PUBLIC_METHOD_TYPE,
1114 PyDoc_STR("Determines if an ST object was created from an expression.")},
1115 {"issuite", (PyCFunction)(void(*)(void))parser_issuite, PUBLIC_METHOD_TYPE,
1116 PyDoc_STR("Determines if an ST object was created from a suite.")},
1117 {"suite", (PyCFunction)(void(*)(void))parser_suite, PUBLIC_METHOD_TYPE,
1118 PyDoc_STR("Creates an ST object from a suite.")},
1119 {"sequence2st", (PyCFunction)(void(*)(void))parser_tuple2st, PUBLIC_METHOD_TYPE,
1120 PyDoc_STR("Creates an ST object from a tree representation.")},
1121 {"st2tuple", (PyCFunction)(void(*)(void))parser_st2tuple, PUBLIC_METHOD_TYPE,
1122 PyDoc_STR("Creates a tuple-tree representation of an ST.")},
1123 {"st2list", (PyCFunction)(void(*)(void))parser_st2list, PUBLIC_METHOD_TYPE,
1124 PyDoc_STR("Creates a list-tree representation of an ST.")},
1125 {"tuple2st", (PyCFunction)(void(*)(void))parser_tuple2st, PUBLIC_METHOD_TYPE,
1126 PyDoc_STR("Creates an ST object from a tree representation.")},
1127
1128 /* private stuff: support pickle module */
1129 {"_pickler", (PyCFunction)parser__pickler, METH_VARARGS,
1130 PyDoc_STR("Returns the pickle magic to allow ST objects to be pickled.")},
1131
1132 {NULL, NULL, 0, NULL}
1133 };
1134
1135
1136
1137 static struct PyModuleDef parsermodule = {
1138 PyModuleDef_HEAD_INIT,
1139 "parser",
1140 NULL,
1141 -1,
1142 parser_functions,
1143 NULL,
1144 NULL,
1145 NULL,
1146 NULL
1147 };
1148
1149 PyMODINIT_FUNC PyInit_parser(void); /* supply a prototype */
1150
1151 PyMODINIT_FUNC
PyInit_parser(void)1152 PyInit_parser(void)
1153 {
1154 PyObject *module, *copyreg;
1155
1156 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1157 "The parser module is deprecated and will be removed "
1158 "in future versions of Python", 7) != 0) {
1159 return NULL;
1160 }
1161
1162 if (PyType_Ready(&PyST_Type) < 0)
1163 return NULL;
1164 module = PyModule_Create(&parsermodule);
1165 if (module == NULL)
1166 return NULL;
1167
1168 if (parser_error == 0)
1169 parser_error = PyErr_NewException("parser.ParserError", NULL, NULL);
1170
1171 if (parser_error == 0)
1172 return NULL;
1173 /* CAUTION: The code next used to skip bumping the refcount on
1174 * parser_error. That's a disaster if PyInit_parser() gets called more
1175 * than once. By incref'ing, we ensure that each module dict that
1176 * gets created owns its reference to the shared parser_error object,
1177 * and the file static parser_error vrbl owns a reference too.
1178 */
1179 Py_INCREF(parser_error);
1180 if (PyModule_AddObject(module, "ParserError", parser_error) != 0)
1181 return NULL;
1182
1183 Py_INCREF(&PyST_Type);
1184 PyModule_AddObject(module, "STType", (PyObject*)&PyST_Type);
1185
1186 PyModule_AddStringConstant(module, "__copyright__",
1187 parser_copyright_string);
1188 PyModule_AddStringConstant(module, "__doc__",
1189 parser_doc_string);
1190 PyModule_AddStringConstant(module, "__version__",
1191 parser_version_string);
1192
1193 /* Register to support pickling.
1194 * If this fails, the import of this module will fail because an
1195 * exception will be raised here; should we clear the exception?
1196 */
1197 copyreg = PyImport_ImportModuleNoBlock("copyreg");
1198 if (copyreg != NULL) {
1199 PyObject *func, *pickler;
1200 _Py_IDENTIFIER(pickle);
1201 _Py_IDENTIFIER(sequence2st);
1202 _Py_IDENTIFIER(_pickler);
1203
1204 func = _PyObject_GetAttrId(copyreg, &PyId_pickle);
1205 pickle_constructor = _PyObject_GetAttrId(module, &PyId_sequence2st);
1206 pickler = _PyObject_GetAttrId(module, &PyId__pickler);
1207 Py_XINCREF(pickle_constructor);
1208 if ((func != NULL) && (pickle_constructor != NULL)
1209 && (pickler != NULL)) {
1210 PyObject *res;
1211
1212 res = PyObject_CallFunctionObjArgs(func, &PyST_Type, pickler,
1213 pickle_constructor, NULL);
1214 Py_XDECREF(res);
1215 }
1216 Py_XDECREF(func);
1217 Py_XDECREF(pickle_constructor);
1218 Py_XDECREF(pickler);
1219 Py_DECREF(copyreg);
1220 }
1221 return module;
1222 }
1223