1 /* parsermodule.c 2 * 3 * Copyright 1995-1996 by Fred L. Drake, Jr. and Virginia Polytechnic 4 * Institute and State University, Blacksburg, Virginia, USA. 5 * Portions copyright 1991-1995 by Stichting Mathematisch Centrum, 6 * Amsterdam, The Netherlands. Copying is permitted under the terms 7 * associated with the main Python distribution, with the additional 8 * restriction that this additional notice be included and maintained 9 * on all distributed copies. 10 * 11 * This module serves to replace the original parser module written 12 * by Guido. The functionality is not matched precisely, but the 13 * original may be implemented on top of this. This is desirable 14 * since the source of the text to be parsed is now divorced from 15 * this interface. 16 * 17 * Unlike the prior interface, the ability to give a parse tree 18 * produced by Python code as a tuple to the compiler is enabled by 19 * this module. See the documentation for more details. 20 * 21 * I've added some annotations that help with the lint code-checking 22 * program, but they're not complete by a long shot. The real errors 23 * that lint detects are gone, but there are still warnings with 24 * Py_[X]DECREF() and Py_[X]INCREF() macros. The lint annotations 25 * look like "NOTE(...)". 26 * 27 */ 28 29 #include "Python.h" /* general Python API */ 30 #include "Python-ast.h" /* mod_ty */ 31 #undef Yield /* undefine macro conflicting with <winbase.h> */ 32 #include "ast.h" 33 #include "graminit.h" /* symbols defined in the grammar */ 34 #include "node.h" /* internal parser structure */ 35 #include "errcode.h" /* error codes for PyNode_*() */ 36 #include "token.h" /* token definitions */ 37 /* ISTERMINAL() / ISNONTERMINAL() */ 38 #include "grammar.h" 39 #include "parsetok.h" 40 41 extern grammar _PyParser_Grammar; /* From graminit.c */ 42 43 #ifdef lint 44 #include <note.h> 45 #else 46 #define NOTE(x) 47 #endif 48 49 /* String constants used to initialize module attributes. 50 * 51 */ 52 static const char parser_copyright_string[] = 53 "Copyright 1995-1996 by Virginia Polytechnic Institute & State\n\ 54 University, Blacksburg, Virginia, USA, and Fred L. Drake, Jr., Reston,\n\ 55 Virginia, USA. Portions copyright 1991-1995 by Stichting Mathematisch\n\ 56 Centrum, Amsterdam, The Netherlands."; 57 58 59 PyDoc_STRVAR(parser_doc_string, 60 "This is an interface to Python's internal parser."); 61 62 static const char parser_version_string[] = "0.5"; 63 64 65 typedef PyObject* (*SeqMaker) (Py_ssize_t length); 66 typedef int (*SeqInserter) (PyObject* sequence, 67 Py_ssize_t index, 68 PyObject* element); 69 70 /* The function below is copyrighted by Stichting Mathematisch Centrum. The 71 * original copyright statement is included below, and continues to apply 72 * in full to the function immediately following. All other material is 73 * original, copyrighted by Fred L. Drake, Jr. and Virginia Polytechnic 74 * Institute and State University. Changes were made to comply with the 75 * new naming conventions. Added arguments to provide support for creating 76 * lists as well as tuples, and optionally including the line numbers. 77 */ 78 79 80 static PyObject* node2tuple(node * n,SeqMaker mkseq,SeqInserter addelem,int lineno,int col_offset)81 node2tuple(node *n, /* node to convert */ 82 SeqMaker mkseq, /* create sequence */ 83 SeqInserter addelem, /* func. to add elem. in seq. */ 84 int lineno, /* include line numbers? */ 85 int col_offset) /* include column offsets? */ 86 { 87 PyObject *result = NULL, *w; 88 89 if (n == NULL) { 90 Py_RETURN_NONE; 91 } 92 93 if (ISNONTERMINAL(TYPE(n))) { 94 int i; 95 96 result = mkseq(1 + NCH(n) + (TYPE(n) == encoding_decl)); 97 if (result == NULL) 98 goto error; 99 100 w = PyLong_FromLong(TYPE(n)); 101 if (w == NULL) 102 goto error; 103 (void) addelem(result, 0, w); 104 105 for (i = 0; i < NCH(n); i++) { 106 w = node2tuple(CHILD(n, i), mkseq, addelem, lineno, col_offset); 107 if (w == NULL) 108 goto error; 109 (void) addelem(result, i+1, w); 110 } 111 112 if (TYPE(n) == encoding_decl) { 113 w = PyUnicode_FromString(STR(n)); 114 if (w == NULL) 115 goto error; 116 (void) addelem(result, i+1, w); 117 } 118 } 119 else if (ISTERMINAL(TYPE(n))) { 120 result = mkseq(2 + lineno + col_offset); 121 if (result == NULL) 122 goto error; 123 124 w = PyLong_FromLong(TYPE(n)); 125 if (w == NULL) 126 goto error; 127 (void) addelem(result, 0, w); 128 129 w = PyUnicode_FromString(STR(n)); 130 if (w == NULL) 131 goto error; 132 (void) addelem(result, 1, w); 133 134 if (lineno) { 135 w = PyLong_FromLong(n->n_lineno); 136 if (w == NULL) 137 goto error; 138 (void) addelem(result, 2, w); 139 } 140 141 if (col_offset) { 142 w = PyLong_FromLong(n->n_col_offset); 143 if (w == NULL) 144 goto error; 145 (void) addelem(result, 2 + lineno, w); 146 } 147 } 148 else { 149 PyErr_SetString(PyExc_SystemError, 150 "unrecognized parse tree node type"); 151 return ((PyObject*) NULL); 152 } 153 return result; 154 155 error: 156 Py_XDECREF(result); 157 return NULL; 158 } 159 /* 160 * End of material copyrighted by Stichting Mathematisch Centrum. 161 */ 162 163 164 165 /* There are two types of intermediate objects we're interested in: 166 * 'eval' and 'exec' types. These constants can be used in the st_type 167 * field of the object type to identify which any given object represents. 168 * These should probably go in an external header to allow other extensions 169 * to use them, but then, we really should be using C++ too. ;-) 170 */ 171 172 #define PyST_EXPR 1 173 #define PyST_SUITE 2 174 175 176 /* These are the internal objects and definitions required to implement the 177 * ST type. Most of the internal names are more reminiscent of the 'old' 178 * naming style, but the code uses the new naming convention. 179 */ 180 181 static PyObject* 182 parser_error = 0; 183 184 185 typedef struct { 186 PyObject_HEAD /* standard object header */ 187 node* st_node; /* the node* returned by the parser */ 188 int st_type; /* EXPR or SUITE ? */ 189 PyCompilerFlags st_flags; /* Parser and compiler flags */ 190 } PyST_Object; 191 192 193 static void parser_free(PyST_Object *st); 194 static PyObject* parser_sizeof(PyST_Object *, void *); 195 static PyObject* parser_richcompare(PyObject *left, PyObject *right, int op); 196 static PyObject* parser_compilest(PyST_Object *, PyObject *, PyObject *); 197 static PyObject* parser_isexpr(PyST_Object *, PyObject *, PyObject *); 198 static PyObject* parser_issuite(PyST_Object *, PyObject *, PyObject *); 199 static PyObject* parser_st2list(PyST_Object *, PyObject *, PyObject *); 200 static PyObject* parser_st2tuple(PyST_Object *, PyObject *, PyObject *); 201 202 #define PUBLIC_METHOD_TYPE (METH_VARARGS|METH_KEYWORDS) 203 204 static PyMethodDef parser_methods[] = { 205 {"compile", (PyCFunction)(void(*)(void))parser_compilest, PUBLIC_METHOD_TYPE, 206 PyDoc_STR("Compile this ST object into a code object.")}, 207 {"isexpr", (PyCFunction)(void(*)(void))parser_isexpr, PUBLIC_METHOD_TYPE, 208 PyDoc_STR("Determines if this ST object was created from an expression.")}, 209 {"issuite", (PyCFunction)(void(*)(void))parser_issuite, PUBLIC_METHOD_TYPE, 210 PyDoc_STR("Determines if this ST object was created from a suite.")}, 211 {"tolist", (PyCFunction)(void(*)(void))parser_st2list, PUBLIC_METHOD_TYPE, 212 PyDoc_STR("Creates a list-tree representation of this ST.")}, 213 {"totuple", (PyCFunction)(void(*)(void))parser_st2tuple, PUBLIC_METHOD_TYPE, 214 PyDoc_STR("Creates a tuple-tree representation of this ST.")}, 215 {"__sizeof__", (PyCFunction)parser_sizeof, METH_NOARGS, 216 PyDoc_STR("Returns size in memory, in bytes.")}, 217 {NULL, NULL, 0, NULL} 218 }; 219 220 static 221 PyTypeObject PyST_Type = { 222 PyVarObject_HEAD_INIT(NULL, 0) 223 "parser.st", /* tp_name */ 224 (int) sizeof(PyST_Object), /* tp_basicsize */ 225 0, /* tp_itemsize */ 226 (destructor)parser_free, /* tp_dealloc */ 227 0, /* tp_vectorcall_offset */ 228 0, /* tp_getattr */ 229 0, /* tp_setattr */ 230 0, /* tp_as_async */ 231 0, /* tp_repr */ 232 0, /* tp_as_number */ 233 0, /* tp_as_sequence */ 234 0, /* tp_as_mapping */ 235 0, /* tp_hash */ 236 0, /* tp_call */ 237 0, /* tp_str */ 238 0, /* tp_getattro */ 239 0, /* tp_setattro */ 240 241 /* Functions to access object as input/output buffer */ 242 0, /* tp_as_buffer */ 243 244 Py_TPFLAGS_DEFAULT, /* tp_flags */ 245 246 /* __doc__ */ 247 "Intermediate representation of a Python parse tree.", 248 0, /* tp_traverse */ 249 0, /* tp_clear */ 250 parser_richcompare, /* tp_richcompare */ 251 0, /* tp_weaklistoffset */ 252 0, /* tp_iter */ 253 0, /* tp_iternext */ 254 parser_methods, /* tp_methods */ 255 }; /* PyST_Type */ 256 257 258 /* PyST_Type isn't subclassable, so just check ob_type */ 259 #define PyST_Object_Check(v) Py_IS_TYPE(v, &PyST_Type) 260 261 static int parser_compare_nodes(node * left,node * right)262 parser_compare_nodes(node *left, node *right) 263 { 264 int j; 265 266 if (TYPE(left) < TYPE(right)) 267 return (-1); 268 269 if (TYPE(right) < TYPE(left)) 270 return (1); 271 272 if (ISTERMINAL(TYPE(left))) 273 return (strcmp(STR(left), STR(right))); 274 275 if (NCH(left) < NCH(right)) 276 return (-1); 277 278 if (NCH(right) < NCH(left)) 279 return (1); 280 281 for (j = 0; j < NCH(left); ++j) { 282 int v = parser_compare_nodes(CHILD(left, j), CHILD(right, j)); 283 284 if (v != 0) 285 return (v); 286 } 287 return (0); 288 } 289 290 /* parser_richcompare(PyObject* left, PyObject* right, int op) 291 * 292 * Comparison function used by the Python operators ==, !=, <, >, <=, >= 293 * This really just wraps a call to parser_compare_nodes() with some easy 294 * checks and protection code. 295 * 296 */ 297 298 static PyObject * parser_richcompare(PyObject * left,PyObject * right,int op)299 parser_richcompare(PyObject *left, PyObject *right, int op) 300 { 301 int result; 302 303 /* neither argument should be NULL, unless something's gone wrong */ 304 if (left == NULL || right == NULL) { 305 PyErr_BadInternalCall(); 306 return NULL; 307 } 308 309 /* both arguments should be instances of PyST_Object */ 310 if (!PyST_Object_Check(left) || !PyST_Object_Check(right)) { 311 Py_RETURN_NOTIMPLEMENTED; 312 } 313 314 if (left == right) 315 /* if arguments are identical, they're equal */ 316 result = 0; 317 else 318 result = parser_compare_nodes(((PyST_Object *)left)->st_node, 319 ((PyST_Object *)right)->st_node); 320 321 Py_RETURN_RICHCOMPARE(result, 0, op); 322 } 323 324 /* parser_newstobject(node* st) 325 * 326 * Allocates a new Python object representing an ST. This is simply the 327 * 'wrapper' object that holds a node* and allows it to be passed around in 328 * Python code. 329 * 330 */ 331 static PyObject* parser_newstobject(node * st,int type)332 parser_newstobject(node *st, int type) 333 { 334 PyST_Object* o = PyObject_New(PyST_Object, &PyST_Type); 335 336 if (o != 0) { 337 o->st_node = st; 338 o->st_type = type; 339 o->st_flags = _PyCompilerFlags_INIT; 340 } 341 else { 342 PyNode_Free(st); 343 } 344 return ((PyObject*)o); 345 } 346 347 348 /* void parser_free(PyST_Object* st) 349 * 350 * This is called by a del statement that reduces the reference count to 0. 351 * 352 */ 353 static void parser_free(PyST_Object * st)354 parser_free(PyST_Object *st) 355 { 356 PyNode_Free(st->st_node); 357 PyObject_Del(st); 358 } 359 360 static PyObject * parser_sizeof(PyST_Object * st,void * unused)361 parser_sizeof(PyST_Object *st, void *unused) 362 { 363 Py_ssize_t res; 364 365 res = _PyObject_SIZE(Py_TYPE(st)) + _PyNode_SizeOf(st->st_node); 366 return PyLong_FromSsize_t(res); 367 } 368 369 370 /* parser_st2tuple(PyObject* self, PyObject* args, PyObject* kw) 371 * 372 * This provides conversion from a node* to a tuple object that can be 373 * returned to the Python-level caller. The ST object is not modified. 374 * 375 */ 376 static PyObject* parser_st2tuple(PyST_Object * self,PyObject * args,PyObject * kw)377 parser_st2tuple(PyST_Object *self, PyObject *args, PyObject *kw) 378 { 379 int line_info = 0; 380 int col_info = 0; 381 PyObject *res = 0; 382 int ok; 383 384 static char *keywords[] = {"st", "line_info", "col_info", NULL}; 385 386 if (self == NULL || PyModule_Check(self)) { 387 ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|pp:st2tuple", keywords, 388 &PyST_Type, &self, &line_info, 389 &col_info); 390 } 391 else 392 ok = PyArg_ParseTupleAndKeywords(args, kw, "|pp:totuple", &keywords[1], 393 &line_info, &col_info); 394 if (ok != 0) { 395 /* 396 * Convert ST into a tuple representation. Use Guido's function, 397 * since it's known to work already. 398 */ 399 res = node2tuple(((PyST_Object*)self)->st_node, 400 PyTuple_New, PyTuple_SetItem, line_info, col_info); 401 } 402 return (res); 403 } 404 405 406 /* parser_st2list(PyObject* self, PyObject* args, PyObject* kw) 407 * 408 * This provides conversion from a node* to a list object that can be 409 * returned to the Python-level caller. The ST object is not modified. 410 * 411 */ 412 static PyObject* parser_st2list(PyST_Object * self,PyObject * args,PyObject * kw)413 parser_st2list(PyST_Object *self, PyObject *args, PyObject *kw) 414 { 415 int line_info = 0; 416 int col_info = 0; 417 PyObject *res = 0; 418 int ok; 419 420 static char *keywords[] = {"st", "line_info", "col_info", NULL}; 421 422 if (self == NULL || PyModule_Check(self)) 423 ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|pp:st2list", keywords, 424 &PyST_Type, &self, &line_info, 425 &col_info); 426 else 427 ok = PyArg_ParseTupleAndKeywords(args, kw, "|pp:tolist", &keywords[1], 428 &line_info, &col_info); 429 if (ok) { 430 /* 431 * Convert ST into a tuple representation. Use Guido's function, 432 * since it's known to work already. 433 */ 434 res = node2tuple(self->st_node, 435 PyList_New, PyList_SetItem, line_info, col_info); 436 } 437 return (res); 438 } 439 440 441 /* parser_compilest(PyObject* self, PyObject* args) 442 * 443 * This function creates code objects from the parse tree represented by 444 * the passed-in data object. An optional file name is passed in as well. 445 * 446 */ 447 static PyObject* parser_compilest(PyST_Object * self,PyObject * args,PyObject * kw)448 parser_compilest(PyST_Object *self, PyObject *args, PyObject *kw) 449 { 450 PyObject* res = NULL; 451 PyArena* arena = NULL; 452 mod_ty mod; 453 PyObject* filename = NULL; 454 int ok; 455 456 static char *keywords[] = {"st", "filename", NULL}; 457 458 if (self == NULL || PyModule_Check(self)) 459 ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|O&:compilest", keywords, 460 &PyST_Type, &self, 461 PyUnicode_FSDecoder, &filename); 462 else 463 ok = PyArg_ParseTupleAndKeywords(args, kw, "|O&:compile", &keywords[1], 464 PyUnicode_FSDecoder, &filename); 465 if (!ok) 466 goto error; 467 468 if (filename == NULL) { 469 filename = PyUnicode_FromString("<syntax-tree>"); 470 if (filename == NULL) 471 goto error; 472 } 473 474 arena = PyArena_New(); 475 if (!arena) 476 goto error; 477 478 mod = PyAST_FromNodeObject(self->st_node, &self->st_flags, 479 filename, arena); 480 if (!mod) 481 goto error; 482 483 res = (PyObject *)PyAST_CompileObject(mod, filename, 484 &self->st_flags, -1, arena); 485 error: 486 Py_XDECREF(filename); 487 if (arena != NULL) 488 PyArena_Free(arena); 489 return res; 490 } 491 492 493 /* PyObject* parser_isexpr(PyObject* self, PyObject* args) 494 * PyObject* parser_issuite(PyObject* self, PyObject* args) 495 * 496 * Checks the passed-in ST object to determine if it is an expression or 497 * a statement suite, respectively. The return is a Python truth value. 498 * 499 */ 500 static PyObject* parser_isexpr(PyST_Object * self,PyObject * args,PyObject * kw)501 parser_isexpr(PyST_Object *self, PyObject *args, PyObject *kw) 502 { 503 PyObject* res = 0; 504 int ok; 505 506 static char *keywords[] = {"st", NULL}; 507 508 if (self == NULL || PyModule_Check(self)) 509 ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:isexpr", keywords, 510 &PyST_Type, &self); 511 else 512 ok = PyArg_ParseTupleAndKeywords(args, kw, ":isexpr", &keywords[1]); 513 514 if (ok) { 515 /* Check to see if the ST represents an expression or not. */ 516 res = (self->st_type == PyST_EXPR) ? Py_True : Py_False; 517 Py_INCREF(res); 518 } 519 return (res); 520 } 521 522 523 static PyObject* parser_issuite(PyST_Object * self,PyObject * args,PyObject * kw)524 parser_issuite(PyST_Object *self, PyObject *args, PyObject *kw) 525 { 526 PyObject* res = 0; 527 int ok; 528 529 static char *keywords[] = {"st", NULL}; 530 531 if (self == NULL || PyModule_Check(self)) 532 ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:issuite", keywords, 533 &PyST_Type, &self); 534 else 535 ok = PyArg_ParseTupleAndKeywords(args, kw, ":issuite", &keywords[1]); 536 537 if (ok) { 538 /* Check to see if the ST represents an expression or not. */ 539 res = (self->st_type == PyST_EXPR) ? Py_False : Py_True; 540 Py_INCREF(res); 541 } 542 return (res); 543 } 544 545 546 /* err_string(const char* message) 547 * 548 * Sets the error string for an exception of type ParserError. 549 * 550 */ 551 static void err_string(const char * message)552 err_string(const char *message) 553 { 554 PyErr_SetString(parser_error, message); 555 } 556 557 558 /* PyObject* parser_do_parse(PyObject* args, int type) 559 * 560 * Internal function to actually execute the parse and return the result if 561 * successful or set an exception if not. 562 * 563 */ 564 static PyObject* parser_do_parse(PyObject * args,PyObject * kw,const char * argspec,int type)565 parser_do_parse(PyObject *args, PyObject *kw, const char *argspec, int type) 566 { 567 char* string = 0; 568 PyObject* res = 0; 569 int flags = 0; 570 perrdetail err; 571 572 static char *keywords[] = {"source", NULL}; 573 574 if (PyArg_ParseTupleAndKeywords(args, kw, argspec, keywords, &string)) { 575 node* n = PyParser_ParseStringFlagsFilenameEx(string, NULL, 576 &_PyParser_Grammar, 577 (type == PyST_EXPR) 578 ? eval_input : file_input, 579 &err, &flags); 580 581 if (n) { 582 res = parser_newstobject(n, type); 583 if (res) { 584 ((PyST_Object *)res)->st_flags.cf_flags = flags & PyCF_MASK; 585 ((PyST_Object *)res)->st_flags.cf_feature_version = PY_MINOR_VERSION; 586 } 587 } 588 else { 589 PyParser_SetError(&err); 590 } 591 PyParser_ClearError(&err); 592 } 593 return (res); 594 } 595 596 597 /* PyObject* parser_expr(PyObject* self, PyObject* args) 598 * PyObject* parser_suite(PyObject* self, PyObject* args) 599 * 600 * External interfaces to the parser itself. Which is called determines if 601 * the parser attempts to recognize an expression ('eval' form) or statement 602 * suite ('exec' form). The real work is done by parser_do_parse() above. 603 * 604 */ 605 static PyObject* parser_expr(PyST_Object * self,PyObject * args,PyObject * kw)606 parser_expr(PyST_Object *self, PyObject *args, PyObject *kw) 607 { 608 NOTE(ARGUNUSED(self)) 609 return (parser_do_parse(args, kw, "s:expr", PyST_EXPR)); 610 } 611 612 613 static PyObject* parser_suite(PyST_Object * self,PyObject * args,PyObject * kw)614 parser_suite(PyST_Object *self, PyObject *args, PyObject *kw) 615 { 616 NOTE(ARGUNUSED(self)) 617 return (parser_do_parse(args, kw, "s:suite", PyST_SUITE)); 618 } 619 620 621 622 /* This is the messy part of the code. Conversion from a tuple to an ST 623 * object requires that the input tuple be valid without having to rely on 624 * catching an exception from the compiler. This is done to allow the 625 * compiler itself to remain fast, since most of its input will come from 626 * the parser directly, and therefore be known to be syntactically correct. 627 * This validation is done to ensure that we don't core dump the compile 628 * phase, returning an exception instead. 629 * 630 * Two aspects can be broken out in this code: creating a node tree from 631 * the tuple passed in, and verifying that it is indeed valid. It may be 632 * advantageous to expand the number of ST types to include funcdefs and 633 * lambdadefs to take advantage of the optimizer, recognizing those STs 634 * here. They are not necessary, and not quite as useful in a raw form. 635 * For now, let's get expressions and suites working reliably. 636 */ 637 638 639 static node* build_node_tree(PyObject *tuple); 640 641 static int validate_node(node * tree)642 validate_node(node *tree) 643 { 644 int type = TYPE(tree); 645 int nch = NCH(tree); 646 state *dfa_state; 647 int pos, arc; 648 649 assert(ISNONTERMINAL(type)); 650 type -= NT_OFFSET; 651 if (type >= _PyParser_Grammar.g_ndfas) { 652 PyErr_Format(parser_error, "Unrecognized node type %d.", TYPE(tree)); 653 return 0; 654 } 655 const dfa *nt_dfa = &_PyParser_Grammar.g_dfa[type]; 656 REQ(tree, nt_dfa->d_type); 657 658 /* Run the DFA for this nonterminal. */ 659 dfa_state = nt_dfa->d_state; 660 for (pos = 0; pos < nch; ++pos) { 661 node *ch = CHILD(tree, pos); 662 int ch_type = TYPE(ch); 663 if ((ch_type >= NT_OFFSET + _PyParser_Grammar.g_ndfas) 664 || (ISTERMINAL(ch_type) && (ch_type >= N_TOKENS)) 665 || (ch_type < 0) 666 ) { 667 PyErr_Format(parser_error, "Unrecognized node type %d.", ch_type); 668 return 0; 669 } 670 if (ch_type == suite && TYPE(tree) == funcdef) { 671 /* This is the opposite hack of what we do in parser.c 672 (search for func_body_suite), except we don't ever 673 support type comments here. */ 674 ch_type = func_body_suite; 675 } 676 for (arc = 0; arc < dfa_state->s_narcs; ++arc) { 677 short a_label = dfa_state->s_arc[arc].a_lbl; 678 assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels); 679 680 const char *label_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str; 681 if ((_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type) 682 && ((ch->n_str == NULL) || (label_str == NULL) 683 || (strcmp(ch->n_str, label_str) == 0)) 684 ) { 685 /* The child is acceptable; if non-terminal, validate it recursively. */ 686 if (ISNONTERMINAL(ch_type) && !validate_node(ch)) 687 return 0; 688 689 /* Update the state, and move on to the next child. */ 690 dfa_state = &nt_dfa->d_state[dfa_state->s_arc[arc].a_arrow]; 691 goto arc_found; 692 } 693 } 694 /* What would this state have accepted? */ 695 { 696 short a_label = dfa_state->s_arc->a_lbl; 697 if (!a_label) /* Wouldn't accept any more children */ 698 goto illegal_num_children; 699 700 int next_type = _PyParser_Grammar.g_ll.ll_label[a_label].lb_type; 701 const char *expected_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str; 702 703 if (ISNONTERMINAL(next_type)) { 704 PyErr_Format(parser_error, "Expected %s, got %s.", 705 _PyParser_Grammar.g_dfa[next_type - NT_OFFSET].d_name, 706 ISTERMINAL(ch_type) ? _PyParser_TokenNames[ch_type] : 707 _PyParser_Grammar.g_dfa[ch_type - NT_OFFSET].d_name); 708 } 709 else if (expected_str != NULL) { 710 PyErr_Format(parser_error, "Illegal terminal: expected '%s'.", 711 expected_str); 712 } 713 else { 714 PyErr_Format(parser_error, "Illegal terminal: expected %s.", 715 _PyParser_TokenNames[next_type]); 716 } 717 return 0; 718 } 719 720 arc_found: 721 continue; 722 } 723 /* Are we in a final state? If so, return 1 for successful validation. */ 724 for (arc = 0; arc < dfa_state->s_narcs; ++arc) { 725 if (!dfa_state->s_arc[arc].a_lbl) { 726 return 1; 727 } 728 } 729 730 illegal_num_children: 731 PyErr_Format(parser_error, 732 "Illegal number of children for %s node.", nt_dfa->d_name); 733 return 0; 734 } 735 736 /* PyObject* parser_tuple2st(PyObject* self, PyObject* args) 737 * 738 * This is the public function, called from the Python code. It receives a 739 * single tuple object from the caller, and creates an ST object if the 740 * tuple can be validated. It does this by checking the first code of the 741 * tuple, and, if acceptable, builds the internal representation. If this 742 * step succeeds, the internal representation is validated as fully as 743 * possible with the recursive validate_node() routine defined above. 744 * 745 * This function must be changed if support is to be added for PyST_FRAGMENT 746 * ST objects. 747 * 748 */ 749 static PyObject* parser_tuple2st(PyST_Object * self,PyObject * args,PyObject * kw)750 parser_tuple2st(PyST_Object *self, PyObject *args, PyObject *kw) 751 { 752 NOTE(ARGUNUSED(self)) 753 PyObject *st = 0; 754 PyObject *tuple; 755 node *tree; 756 757 static char *keywords[] = {"sequence", NULL}; 758 759 if (!PyArg_ParseTupleAndKeywords(args, kw, "O:sequence2st", keywords, 760 &tuple)) 761 return (0); 762 if (!PySequence_Check(tuple)) { 763 PyErr_SetString(PyExc_ValueError, 764 "sequence2st() requires a single sequence argument"); 765 return (0); 766 } 767 /* 768 * Convert the tree to the internal form before checking it. 769 */ 770 tree = build_node_tree(tuple); 771 if (tree != 0) { 772 node *validation_root = NULL; 773 int tree_type = 0; 774 switch (TYPE(tree)) { 775 case eval_input: 776 /* Might be an eval form. */ 777 tree_type = PyST_EXPR; 778 validation_root = tree; 779 break; 780 case encoding_decl: 781 /* This looks like an encoding_decl so far. */ 782 if (NCH(tree) == 1) { 783 tree_type = PyST_SUITE; 784 validation_root = CHILD(tree, 0); 785 } 786 else { 787 err_string("Error Parsing encoding_decl"); 788 } 789 break; 790 case file_input: 791 /* This looks like an exec form so far. */ 792 tree_type = PyST_SUITE; 793 validation_root = tree; 794 break; 795 default: 796 /* This is a fragment, at best. */ 797 err_string("parse tree does not use a valid start symbol"); 798 } 799 800 if (validation_root != NULL && validate_node(validation_root)) 801 st = parser_newstobject(tree, tree_type); 802 else 803 PyNode_Free(tree); 804 } 805 /* Make sure we raise an exception on all errors. We should never 806 * get this, but we'd do well to be sure something is done. 807 */ 808 if (st == NULL && !PyErr_Occurred()) 809 err_string("unspecified ST error occurred"); 810 811 return st; 812 } 813 814 815 /* node* build_node_children() 816 * 817 * Iterate across the children of the current non-terminal node and build 818 * their structures. If successful, return the root of this portion of 819 * the tree, otherwise, 0. Any required exception will be specified already, 820 * and no memory will have been deallocated. 821 * 822 */ 823 static node* build_node_children(PyObject * tuple,node * root,int * line_num)824 build_node_children(PyObject *tuple, node *root, int *line_num) 825 { 826 Py_ssize_t len = PyObject_Size(tuple); 827 Py_ssize_t i; 828 int err; 829 830 if (len < 0) { 831 return NULL; 832 } 833 for (i = 1; i < len; ++i) { 834 /* elem must always be a sequence, however simple */ 835 PyObject* elem = PySequence_GetItem(tuple, i); 836 int ok = elem != NULL; 837 int type = 0; 838 char *strn = 0; 839 840 if (ok) 841 ok = PySequence_Check(elem); 842 if (ok) { 843 PyObject *temp = PySequence_GetItem(elem, 0); 844 if (temp == NULL) 845 ok = 0; 846 else { 847 ok = PyLong_Check(temp); 848 if (ok) { 849 type = _PyLong_AsInt(temp); 850 if (type == -1 && PyErr_Occurred()) { 851 Py_DECREF(temp); 852 Py_DECREF(elem); 853 return NULL; 854 } 855 } 856 Py_DECREF(temp); 857 } 858 } 859 if (!ok) { 860 PyObject *err = Py_BuildValue("Os", elem, 861 "Illegal node construct."); 862 PyErr_SetObject(parser_error, err); 863 Py_XDECREF(err); 864 Py_XDECREF(elem); 865 return NULL; 866 } 867 if (ISTERMINAL(type)) { 868 Py_ssize_t len = PyObject_Size(elem); 869 PyObject *temp; 870 const char *temp_str; 871 872 if ((len != 2) && (len != 3)) { 873 err_string("terminal nodes must have 2 or 3 entries"); 874 Py_DECREF(elem); 875 return NULL; 876 } 877 temp = PySequence_GetItem(elem, 1); 878 if (temp == NULL) { 879 Py_DECREF(elem); 880 return NULL; 881 } 882 if (!PyUnicode_Check(temp)) { 883 PyErr_Format(parser_error, 884 "second item in terminal node must be a string," 885 " found %s", 886 Py_TYPE(temp)->tp_name); 887 Py_DECREF(temp); 888 Py_DECREF(elem); 889 return NULL; 890 } 891 if (len == 3) { 892 PyObject *o = PySequence_GetItem(elem, 2); 893 if (o == NULL) { 894 Py_DECREF(temp); 895 Py_DECREF(elem); 896 return NULL; 897 } 898 if (PyLong_Check(o)) { 899 int num = _PyLong_AsInt(o); 900 if (num == -1 && PyErr_Occurred()) { 901 Py_DECREF(o); 902 Py_DECREF(temp); 903 Py_DECREF(elem); 904 return NULL; 905 } 906 *line_num = num; 907 } 908 else { 909 PyErr_Format(parser_error, 910 "third item in terminal node must be an" 911 " integer, found %s", 912 Py_TYPE(temp)->tp_name); 913 Py_DECREF(o); 914 Py_DECREF(temp); 915 Py_DECREF(elem); 916 return NULL; 917 } 918 Py_DECREF(o); 919 } 920 temp_str = PyUnicode_AsUTF8AndSize(temp, &len); 921 if (temp_str == NULL) { 922 Py_DECREF(temp); 923 Py_DECREF(elem); 924 return NULL; 925 } 926 strn = (char *)PyObject_MALLOC(len + 1); 927 if (strn == NULL) { 928 Py_DECREF(temp); 929 Py_DECREF(elem); 930 PyErr_NoMemory(); 931 return NULL; 932 } 933 (void) memcpy(strn, temp_str, len + 1); 934 Py_DECREF(temp); 935 } 936 else if (!ISNONTERMINAL(type)) { 937 /* 938 * It has to be one or the other; this is an error. 939 * Raise an exception. 940 */ 941 PyObject *err = Py_BuildValue("Os", elem, "unknown node type."); 942 PyErr_SetObject(parser_error, err); 943 Py_XDECREF(err); 944 Py_DECREF(elem); 945 return NULL; 946 } 947 err = PyNode_AddChild(root, type, strn, *line_num, 0, *line_num, 0); 948 if (err == E_NOMEM) { 949 Py_DECREF(elem); 950 PyObject_FREE(strn); 951 PyErr_NoMemory(); 952 return NULL; 953 } 954 if (err == E_OVERFLOW) { 955 Py_DECREF(elem); 956 PyObject_FREE(strn); 957 PyErr_SetString(PyExc_ValueError, 958 "unsupported number of child nodes"); 959 return NULL; 960 } 961 962 if (ISNONTERMINAL(type)) { 963 node* new_child = CHILD(root, i - 1); 964 965 if (new_child != build_node_children(elem, new_child, line_num)) { 966 Py_DECREF(elem); 967 return NULL; 968 } 969 } 970 else if (type == NEWLINE) { /* It's true: we increment the */ 971 ++(*line_num); /* line number *after* the newline! */ 972 } 973 Py_DECREF(elem); 974 } 975 return root; 976 } 977 978 979 static node* build_node_tree(PyObject * tuple)980 build_node_tree(PyObject *tuple) 981 { 982 node* res = 0; 983 PyObject *temp = PySequence_GetItem(tuple, 0); 984 long num = -1; 985 986 if (temp != NULL) 987 num = PyLong_AsLong(temp); 988 Py_XDECREF(temp); 989 if (ISTERMINAL(num)) { 990 /* 991 * The tuple is simple, but it doesn't start with a start symbol. 992 * Raise an exception now and be done with it. 993 */ 994 tuple = Py_BuildValue("Os", tuple, 995 "Illegal syntax-tree; cannot start with terminal symbol."); 996 PyErr_SetObject(parser_error, tuple); 997 Py_XDECREF(tuple); 998 } 999 else if (ISNONTERMINAL(num)) { 1000 /* 1001 * Not efficient, but that can be handled later. 1002 */ 1003 int line_num = 0; 1004 PyObject *encoding = NULL; 1005 1006 if (num == encoding_decl) { 1007 encoding = PySequence_GetItem(tuple, 2); 1008 if (encoding == NULL) { 1009 PyErr_SetString(parser_error, "missed encoding"); 1010 return NULL; 1011 } 1012 if (!PyUnicode_Check(encoding)) { 1013 PyErr_Format(parser_error, 1014 "encoding must be a string, found %.200s", 1015 Py_TYPE(encoding)->tp_name); 1016 Py_DECREF(encoding); 1017 return NULL; 1018 } 1019 /* tuple isn't borrowed anymore here, need to DECREF */ 1020 tuple = PySequence_GetSlice(tuple, 0, 2); 1021 if (tuple == NULL) { 1022 Py_DECREF(encoding); 1023 return NULL; 1024 } 1025 } 1026 res = PyNode_New(num); 1027 if (res != NULL) { 1028 if (res != build_node_children(tuple, res, &line_num)) { 1029 PyNode_Free(res); 1030 res = NULL; 1031 } 1032 if (res && encoding) { 1033 Py_ssize_t len; 1034 const char *temp; 1035 temp = PyUnicode_AsUTF8AndSize(encoding, &len); 1036 if (temp == NULL) { 1037 PyNode_Free(res); 1038 Py_DECREF(encoding); 1039 Py_DECREF(tuple); 1040 return NULL; 1041 } 1042 res->n_str = (char *)PyObject_MALLOC(len + 1); 1043 if (res->n_str == NULL) { 1044 PyNode_Free(res); 1045 Py_DECREF(encoding); 1046 Py_DECREF(tuple); 1047 PyErr_NoMemory(); 1048 return NULL; 1049 } 1050 (void) memcpy(res->n_str, temp, len + 1); 1051 } 1052 } 1053 if (encoding != NULL) { 1054 Py_DECREF(encoding); 1055 Py_DECREF(tuple); 1056 } 1057 } 1058 else { 1059 /* The tuple is illegal -- if the number is neither TERMINAL nor 1060 * NONTERMINAL, we can't use it. Not sure the implementation 1061 * allows this condition, but the API doesn't preclude it. 1062 */ 1063 PyObject *err = Py_BuildValue("Os", tuple, 1064 "Illegal component tuple."); 1065 PyErr_SetObject(parser_error, err); 1066 Py_XDECREF(err); 1067 } 1068 1069 return (res); 1070 } 1071 1072 1073 static PyObject* 1074 pickle_constructor = NULL; 1075 1076 1077 static PyObject* parser__pickler(PyObject * self,PyObject * args)1078 parser__pickler(PyObject *self, PyObject *args) 1079 { 1080 NOTE(ARGUNUSED(self)) 1081 PyObject *result = NULL; 1082 PyObject *st = NULL; 1083 1084 if (PyArg_ParseTuple(args, "O!:_pickler", &PyST_Type, &st)) { 1085 PyObject *newargs; 1086 PyObject *tuple; 1087 1088 if ((newargs = PyTuple_Pack(2, st, Py_True)) == NULL) 1089 return NULL; 1090 tuple = parser_st2tuple((PyST_Object*)NULL, newargs, NULL); 1091 if (tuple != NULL) { 1092 result = Py_BuildValue("O(O)", pickle_constructor, tuple); 1093 Py_DECREF(tuple); 1094 } 1095 Py_DECREF(newargs); 1096 } 1097 1098 return (result); 1099 } 1100 1101 1102 /* Functions exported by this module. Most of this should probably 1103 * be converted into an ST object with methods, but that is better 1104 * done directly in Python, allowing subclasses to be created directly. 1105 * We'd really have to write a wrapper around it all anyway to allow 1106 * inheritance. 1107 */ 1108 static PyMethodDef parser_functions[] = { 1109 {"compilest", (PyCFunction)(void(*)(void))parser_compilest, PUBLIC_METHOD_TYPE, 1110 PyDoc_STR("Compiles an ST object into a code object.")}, 1111 {"expr", (PyCFunction)(void(*)(void))parser_expr, PUBLIC_METHOD_TYPE, 1112 PyDoc_STR("Creates an ST object from an expression.")}, 1113 {"isexpr", (PyCFunction)(void(*)(void))parser_isexpr, PUBLIC_METHOD_TYPE, 1114 PyDoc_STR("Determines if an ST object was created from an expression.")}, 1115 {"issuite", (PyCFunction)(void(*)(void))parser_issuite, PUBLIC_METHOD_TYPE, 1116 PyDoc_STR("Determines if an ST object was created from a suite.")}, 1117 {"suite", (PyCFunction)(void(*)(void))parser_suite, PUBLIC_METHOD_TYPE, 1118 PyDoc_STR("Creates an ST object from a suite.")}, 1119 {"sequence2st", (PyCFunction)(void(*)(void))parser_tuple2st, PUBLIC_METHOD_TYPE, 1120 PyDoc_STR("Creates an ST object from a tree representation.")}, 1121 {"st2tuple", (PyCFunction)(void(*)(void))parser_st2tuple, PUBLIC_METHOD_TYPE, 1122 PyDoc_STR("Creates a tuple-tree representation of an ST.")}, 1123 {"st2list", (PyCFunction)(void(*)(void))parser_st2list, PUBLIC_METHOD_TYPE, 1124 PyDoc_STR("Creates a list-tree representation of an ST.")}, 1125 {"tuple2st", (PyCFunction)(void(*)(void))parser_tuple2st, PUBLIC_METHOD_TYPE, 1126 PyDoc_STR("Creates an ST object from a tree representation.")}, 1127 1128 /* private stuff: support pickle module */ 1129 {"_pickler", (PyCFunction)parser__pickler, METH_VARARGS, 1130 PyDoc_STR("Returns the pickle magic to allow ST objects to be pickled.")}, 1131 1132 {NULL, NULL, 0, NULL} 1133 }; 1134 1135 1136 1137 static struct PyModuleDef parsermodule = { 1138 PyModuleDef_HEAD_INIT, 1139 "parser", 1140 NULL, 1141 -1, 1142 parser_functions, 1143 NULL, 1144 NULL, 1145 NULL, 1146 NULL 1147 }; 1148 1149 PyMODINIT_FUNC PyInit_parser(void); /* supply a prototype */ 1150 1151 PyMODINIT_FUNC PyInit_parser(void)1152 PyInit_parser(void) 1153 { 1154 PyObject *module, *copyreg; 1155 1156 if (PyErr_WarnEx(PyExc_DeprecationWarning, 1157 "The parser module is deprecated and will be removed " 1158 "in future versions of Python", 7) != 0) { 1159 return NULL; 1160 } 1161 1162 if (PyType_Ready(&PyST_Type) < 0) 1163 return NULL; 1164 module = PyModule_Create(&parsermodule); 1165 if (module == NULL) 1166 return NULL; 1167 1168 if (parser_error == 0) 1169 parser_error = PyErr_NewException("parser.ParserError", NULL, NULL); 1170 1171 if (parser_error == 0) 1172 return NULL; 1173 /* CAUTION: The code next used to skip bumping the refcount on 1174 * parser_error. That's a disaster if PyInit_parser() gets called more 1175 * than once. By incref'ing, we ensure that each module dict that 1176 * gets created owns its reference to the shared parser_error object, 1177 * and the file static parser_error vrbl owns a reference too. 1178 */ 1179 Py_INCREF(parser_error); 1180 if (PyModule_AddObject(module, "ParserError", parser_error) != 0) 1181 return NULL; 1182 1183 Py_INCREF(&PyST_Type); 1184 PyModule_AddObject(module, "STType", (PyObject*)&PyST_Type); 1185 1186 PyModule_AddStringConstant(module, "__copyright__", 1187 parser_copyright_string); 1188 PyModule_AddStringConstant(module, "__doc__", 1189 parser_doc_string); 1190 PyModule_AddStringConstant(module, "__version__", 1191 parser_version_string); 1192 1193 /* Register to support pickling. 1194 * If this fails, the import of this module will fail because an 1195 * exception will be raised here; should we clear the exception? 1196 */ 1197 copyreg = PyImport_ImportModuleNoBlock("copyreg"); 1198 if (copyreg != NULL) { 1199 PyObject *func, *pickler; 1200 _Py_IDENTIFIER(pickle); 1201 _Py_IDENTIFIER(sequence2st); 1202 _Py_IDENTIFIER(_pickler); 1203 1204 func = _PyObject_GetAttrId(copyreg, &PyId_pickle); 1205 pickle_constructor = _PyObject_GetAttrId(module, &PyId_sequence2st); 1206 pickler = _PyObject_GetAttrId(module, &PyId__pickler); 1207 Py_XINCREF(pickle_constructor); 1208 if ((func != NULL) && (pickle_constructor != NULL) 1209 && (pickler != NULL)) { 1210 PyObject *res; 1211 1212 res = PyObject_CallFunctionObjArgs(func, &PyST_Type, pickler, 1213 pickle_constructor, NULL); 1214 Py_XDECREF(res); 1215 } 1216 Py_XDECREF(func); 1217 Py_XDECREF(pickle_constructor); 1218 Py_XDECREF(pickler); 1219 Py_DECREF(copyreg); 1220 } 1221 return module; 1222 } 1223