• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*  parsermodule.c
2   *
3   *  Copyright 1995-1996 by Fred L. Drake, Jr. and Virginia Polytechnic
4   *  Institute and State University, Blacksburg, Virginia, USA.
5   *  Portions copyright 1991-1995 by Stichting Mathematisch Centrum,
6   *  Amsterdam, The Netherlands.  Copying is permitted under the terms
7   *  associated with the main Python distribution, with the additional
8   *  restriction that this additional notice be included and maintained
9   *  on all distributed copies.
10   *
11   *  This module serves to replace the original parser module written
12   *  by Guido.  The functionality is not matched precisely, but the
13   *  original may be implemented on top of this.  This is desirable
14   *  since the source of the text to be parsed is now divorced from
15   *  this interface.
16   *
17   *  Unlike the prior interface, the ability to give a parse tree
18   *  produced by Python code as a tuple to the compiler is enabled by
19   *  this module.  See the documentation for more details.
20   *
21   *  I've added some annotations that help with the lint code-checking
22   *  program, but they're not complete by a long shot.  The real errors
23   *  that lint detects are gone, but there are still warnings with
24   *  Py_[X]DECREF() and Py_[X]INCREF() macros.  The lint annotations
25   *  look like "NOTE(...)".
26   *
27   */
28  
29  #include "Python.h"                     /* general Python API             */
30  #include "Python-ast.h"                 /* mod_ty */
31  #undef Yield   /* undefine macro conflicting with <winbase.h> */
32  #include "ast.h"
33  #include "graminit.h"                   /* symbols defined in the grammar */
34  #include "node.h"                       /* internal parser structure      */
35  #include "errcode.h"                    /* error codes for PyNode_*()     */
36  #include "token.h"                      /* token definitions              */
37                                          /* ISTERMINAL() / ISNONTERMINAL() */
38  #include "grammar.h"
39  #include "parsetok.h"
40  
41  extern grammar _PyParser_Grammar; /* From graminit.c */
42  
43  #ifdef lint
44  #include <note.h>
45  #else
46  #define NOTE(x)
47  #endif
48  
49  /*  String constants used to initialize module attributes.
50   *
51   */
52  static const char parser_copyright_string[] =
53  "Copyright 1995-1996 by Virginia Polytechnic Institute & State\n\
54  University, Blacksburg, Virginia, USA, and Fred L. Drake, Jr., Reston,\n\
55  Virginia, USA.  Portions copyright 1991-1995 by Stichting Mathematisch\n\
56  Centrum, Amsterdam, The Netherlands.";
57  
58  
59  PyDoc_STRVAR(parser_doc_string,
60  "This is an interface to Python's internal parser.");
61  
62  static const char parser_version_string[] = "0.5";
63  
64  
65  typedef PyObject* (*SeqMaker) (Py_ssize_t length);
66  typedef int (*SeqInserter) (PyObject* sequence,
67                              Py_ssize_t index,
68                              PyObject* element);
69  
70  /*  The function below is copyrighted by Stichting Mathematisch Centrum.  The
71   *  original copyright statement is included below, and continues to apply
72   *  in full to the function immediately following.  All other material is
73   *  original, copyrighted by Fred L. Drake, Jr. and Virginia Polytechnic
74   *  Institute and State University.  Changes were made to comply with the
75   *  new naming conventions.  Added arguments to provide support for creating
76   *  lists as well as tuples, and optionally including the line numbers.
77   */
78  
79  
80  static PyObject*
node2tuple(node * n,SeqMaker mkseq,SeqInserter addelem,int lineno,int col_offset)81  node2tuple(node *n,                     /* node to convert               */
82             SeqMaker mkseq,              /* create sequence               */
83             SeqInserter addelem,         /* func. to add elem. in seq.    */
84             int lineno,                  /* include line numbers?         */
85             int col_offset)              /* include column offsets?       */
86  {
87      PyObject *result = NULL, *w;
88  
89      if (n == NULL) {
90          Py_RETURN_NONE;
91      }
92  
93      if (ISNONTERMINAL(TYPE(n))) {
94          int i;
95  
96          result = mkseq(1 + NCH(n) + (TYPE(n) == encoding_decl));
97          if (result == NULL)
98              goto error;
99  
100          w = PyLong_FromLong(TYPE(n));
101          if (w == NULL)
102              goto error;
103          (void) addelem(result, 0, w);
104  
105          for (i = 0; i < NCH(n); i++) {
106              w = node2tuple(CHILD(n, i), mkseq, addelem, lineno, col_offset);
107              if (w == NULL)
108                  goto error;
109              (void) addelem(result, i+1, w);
110          }
111  
112          if (TYPE(n) == encoding_decl) {
113              w = PyUnicode_FromString(STR(n));
114              if (w == NULL)
115                  goto error;
116              (void) addelem(result, i+1, w);
117          }
118      }
119      else if (ISTERMINAL(TYPE(n))) {
120          result = mkseq(2 + lineno + col_offset);
121          if (result == NULL)
122              goto error;
123  
124          w = PyLong_FromLong(TYPE(n));
125          if (w == NULL)
126              goto error;
127          (void) addelem(result, 0, w);
128  
129          w = PyUnicode_FromString(STR(n));
130          if (w == NULL)
131              goto error;
132          (void) addelem(result, 1, w);
133  
134          if (lineno) {
135              w = PyLong_FromLong(n->n_lineno);
136              if (w == NULL)
137                  goto error;
138              (void) addelem(result, 2, w);
139          }
140  
141          if (col_offset) {
142              w = PyLong_FromLong(n->n_col_offset);
143              if (w == NULL)
144                  goto error;
145              (void) addelem(result, 2 + lineno, w);
146          }
147      }
148      else {
149          PyErr_SetString(PyExc_SystemError,
150                          "unrecognized parse tree node type");
151          return ((PyObject*) NULL);
152      }
153      return result;
154  
155  error:
156      Py_XDECREF(result);
157      return NULL;
158  }
159  /*
160   *  End of material copyrighted by Stichting Mathematisch Centrum.
161   */
162  
163  
164  
165  /*  There are two types of intermediate objects we're interested in:
166   *  'eval' and 'exec' types.  These constants can be used in the st_type
167   *  field of the object type to identify which any given object represents.
168   *  These should probably go in an external header to allow other extensions
169   *  to use them, but then, we really should be using C++ too.  ;-)
170   */
171  
172  #define PyST_EXPR  1
173  #define PyST_SUITE 2
174  
175  
176  /*  These are the internal objects and definitions required to implement the
177   *  ST type.  Most of the internal names are more reminiscent of the 'old'
178   *  naming style, but the code uses the new naming convention.
179   */
180  
181  static PyObject*
182  parser_error = 0;
183  
184  
185  typedef struct {
186      PyObject_HEAD                       /* standard object header           */
187      node* st_node;                      /* the node* returned by the parser */
188      int   st_type;                      /* EXPR or SUITE ?                  */
189      PyCompilerFlags st_flags;           /* Parser and compiler flags        */
190  } PyST_Object;
191  
192  
193  static void parser_free(PyST_Object *st);
194  static PyObject* parser_sizeof(PyST_Object *, void *);
195  static PyObject* parser_richcompare(PyObject *left, PyObject *right, int op);
196  static PyObject* parser_compilest(PyST_Object *, PyObject *, PyObject *);
197  static PyObject* parser_isexpr(PyST_Object *, PyObject *, PyObject *);
198  static PyObject* parser_issuite(PyST_Object *, PyObject *, PyObject *);
199  static PyObject* parser_st2list(PyST_Object *, PyObject *, PyObject *);
200  static PyObject* parser_st2tuple(PyST_Object *, PyObject *, PyObject *);
201  
202  #define PUBLIC_METHOD_TYPE (METH_VARARGS|METH_KEYWORDS)
203  
204  static PyMethodDef parser_methods[] = {
205      {"compile",         (PyCFunction)(void(*)(void))parser_compilest,  PUBLIC_METHOD_TYPE,
206          PyDoc_STR("Compile this ST object into a code object.")},
207      {"isexpr",          (PyCFunction)(void(*)(void))parser_isexpr,     PUBLIC_METHOD_TYPE,
208          PyDoc_STR("Determines if this ST object was created from an expression.")},
209      {"issuite",         (PyCFunction)(void(*)(void))parser_issuite,    PUBLIC_METHOD_TYPE,
210          PyDoc_STR("Determines if this ST object was created from a suite.")},
211      {"tolist",          (PyCFunction)(void(*)(void))parser_st2list,    PUBLIC_METHOD_TYPE,
212          PyDoc_STR("Creates a list-tree representation of this ST.")},
213      {"totuple",         (PyCFunction)(void(*)(void))parser_st2tuple,   PUBLIC_METHOD_TYPE,
214          PyDoc_STR("Creates a tuple-tree representation of this ST.")},
215      {"__sizeof__",      (PyCFunction)parser_sizeof,     METH_NOARGS,
216          PyDoc_STR("Returns size in memory, in bytes.")},
217      {NULL, NULL, 0, NULL}
218  };
219  
220  static
221  PyTypeObject PyST_Type = {
222      PyVarObject_HEAD_INIT(NULL, 0)
223      "parser.st",                        /* tp_name              */
224      (int) sizeof(PyST_Object),          /* tp_basicsize         */
225      0,                                  /* tp_itemsize          */
226      (destructor)parser_free,            /* tp_dealloc           */
227      0,                                  /* tp_vectorcall_offset */
228      0,                                  /* tp_getattr           */
229      0,                                  /* tp_setattr           */
230      0,                                  /* tp_as_async          */
231      0,                                  /* tp_repr              */
232      0,                                  /* tp_as_number         */
233      0,                                  /* tp_as_sequence       */
234      0,                                  /* tp_as_mapping        */
235      0,                                  /* tp_hash              */
236      0,                                  /* tp_call              */
237      0,                                  /* tp_str               */
238      0,                                  /* tp_getattro          */
239      0,                                  /* tp_setattro          */
240  
241      /* Functions to access object as input/output buffer */
242      0,                                  /* tp_as_buffer         */
243  
244      Py_TPFLAGS_DEFAULT,                 /* tp_flags             */
245  
246      /* __doc__ */
247      "Intermediate representation of a Python parse tree.",
248      0,                                  /* tp_traverse */
249      0,                                  /* tp_clear */
250      parser_richcompare,                 /* tp_richcompare */
251      0,                                  /* tp_weaklistoffset */
252      0,                                  /* tp_iter */
253      0,                                  /* tp_iternext */
254      parser_methods,                     /* tp_methods */
255  };  /* PyST_Type */
256  
257  
258  /* PyST_Type isn't subclassable, so just check ob_type */
259  #define PyST_Object_Check(v) Py_IS_TYPE(v, &PyST_Type)
260  
261  static int
parser_compare_nodes(node * left,node * right)262  parser_compare_nodes(node *left, node *right)
263  {
264      int j;
265  
266      if (TYPE(left) < TYPE(right))
267          return (-1);
268  
269      if (TYPE(right) < TYPE(left))
270          return (1);
271  
272      if (ISTERMINAL(TYPE(left)))
273          return (strcmp(STR(left), STR(right)));
274  
275      if (NCH(left) < NCH(right))
276          return (-1);
277  
278      if (NCH(right) < NCH(left))
279          return (1);
280  
281      for (j = 0; j < NCH(left); ++j) {
282          int v = parser_compare_nodes(CHILD(left, j), CHILD(right, j));
283  
284          if (v != 0)
285              return (v);
286      }
287      return (0);
288  }
289  
290  /*  parser_richcompare(PyObject* left, PyObject* right, int op)
291   *
292   *  Comparison function used by the Python operators ==, !=, <, >, <=, >=
293   *  This really just wraps a call to parser_compare_nodes() with some easy
294   *  checks and protection code.
295   *
296   */
297  
298  static PyObject *
parser_richcompare(PyObject * left,PyObject * right,int op)299  parser_richcompare(PyObject *left, PyObject *right, int op)
300  {
301      int result;
302  
303      /* neither argument should be NULL, unless something's gone wrong */
304      if (left == NULL || right == NULL) {
305          PyErr_BadInternalCall();
306          return NULL;
307      }
308  
309      /* both arguments should be instances of PyST_Object */
310      if (!PyST_Object_Check(left) || !PyST_Object_Check(right)) {
311          Py_RETURN_NOTIMPLEMENTED;
312      }
313  
314      if (left == right)
315          /* if arguments are identical, they're equal */
316          result = 0;
317      else
318          result = parser_compare_nodes(((PyST_Object *)left)->st_node,
319                                        ((PyST_Object *)right)->st_node);
320  
321      Py_RETURN_RICHCOMPARE(result, 0, op);
322  }
323  
324  /*  parser_newstobject(node* st)
325   *
326   *  Allocates a new Python object representing an ST.  This is simply the
327   *  'wrapper' object that holds a node* and allows it to be passed around in
328   *  Python code.
329   *
330   */
331  static PyObject*
parser_newstobject(node * st,int type)332  parser_newstobject(node *st, int type)
333  {
334      PyST_Object* o = PyObject_New(PyST_Object, &PyST_Type);
335  
336      if (o != 0) {
337          o->st_node = st;
338          o->st_type = type;
339          o->st_flags = _PyCompilerFlags_INIT;
340      }
341      else {
342          PyNode_Free(st);
343      }
344      return ((PyObject*)o);
345  }
346  
347  
348  /*  void parser_free(PyST_Object* st)
349   *
350   *  This is called by a del statement that reduces the reference count to 0.
351   *
352   */
353  static void
parser_free(PyST_Object * st)354  parser_free(PyST_Object *st)
355  {
356      PyNode_Free(st->st_node);
357      PyObject_Del(st);
358  }
359  
360  static PyObject *
parser_sizeof(PyST_Object * st,void * unused)361  parser_sizeof(PyST_Object *st, void *unused)
362  {
363      Py_ssize_t res;
364  
365      res = _PyObject_SIZE(Py_TYPE(st)) + _PyNode_SizeOf(st->st_node);
366      return PyLong_FromSsize_t(res);
367  }
368  
369  
370  /*  parser_st2tuple(PyObject* self, PyObject* args, PyObject* kw)
371   *
372   *  This provides conversion from a node* to a tuple object that can be
373   *  returned to the Python-level caller.  The ST object is not modified.
374   *
375   */
376  static PyObject*
parser_st2tuple(PyST_Object * self,PyObject * args,PyObject * kw)377  parser_st2tuple(PyST_Object *self, PyObject *args, PyObject *kw)
378  {
379      int line_info = 0;
380      int col_info = 0;
381      PyObject *res = 0;
382      int ok;
383  
384      static char *keywords[] = {"st", "line_info", "col_info", NULL};
385  
386      if (self == NULL || PyModule_Check(self)) {
387          ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|pp:st2tuple", keywords,
388                                           &PyST_Type, &self, &line_info,
389                                           &col_info);
390      }
391      else
392          ok = PyArg_ParseTupleAndKeywords(args, kw, "|pp:totuple", &keywords[1],
393                                           &line_info, &col_info);
394      if (ok != 0) {
395          /*
396           *  Convert ST into a tuple representation.  Use Guido's function,
397           *  since it's known to work already.
398           */
399          res = node2tuple(((PyST_Object*)self)->st_node,
400                           PyTuple_New, PyTuple_SetItem, line_info, col_info);
401      }
402      return (res);
403  }
404  
405  
406  /*  parser_st2list(PyObject* self, PyObject* args, PyObject* kw)
407   *
408   *  This provides conversion from a node* to a list object that can be
409   *  returned to the Python-level caller.  The ST object is not modified.
410   *
411   */
412  static PyObject*
parser_st2list(PyST_Object * self,PyObject * args,PyObject * kw)413  parser_st2list(PyST_Object *self, PyObject *args, PyObject *kw)
414  {
415      int line_info = 0;
416      int col_info = 0;
417      PyObject *res = 0;
418      int ok;
419  
420      static char *keywords[] = {"st", "line_info", "col_info", NULL};
421  
422      if (self == NULL || PyModule_Check(self))
423          ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|pp:st2list", keywords,
424                                           &PyST_Type, &self, &line_info,
425                                           &col_info);
426      else
427          ok = PyArg_ParseTupleAndKeywords(args, kw, "|pp:tolist", &keywords[1],
428                                           &line_info, &col_info);
429      if (ok) {
430          /*
431           *  Convert ST into a tuple representation.  Use Guido's function,
432           *  since it's known to work already.
433           */
434          res = node2tuple(self->st_node,
435                           PyList_New, PyList_SetItem, line_info, col_info);
436      }
437      return (res);
438  }
439  
440  
441  /*  parser_compilest(PyObject* self, PyObject* args)
442   *
443   *  This function creates code objects from the parse tree represented by
444   *  the passed-in data object.  An optional file name is passed in as well.
445   *
446   */
447  static PyObject*
parser_compilest(PyST_Object * self,PyObject * args,PyObject * kw)448  parser_compilest(PyST_Object *self, PyObject *args, PyObject *kw)
449  {
450      PyObject*     res = NULL;
451      PyArena*      arena = NULL;
452      mod_ty        mod;
453      PyObject*     filename = NULL;
454      int ok;
455  
456      static char *keywords[] = {"st", "filename", NULL};
457  
458      if (self == NULL || PyModule_Check(self))
459          ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|O&:compilest", keywords,
460                                           &PyST_Type, &self,
461                                           PyUnicode_FSDecoder, &filename);
462      else
463          ok = PyArg_ParseTupleAndKeywords(args, kw, "|O&:compile", &keywords[1],
464                                           PyUnicode_FSDecoder, &filename);
465      if (!ok)
466          goto error;
467  
468      if (filename == NULL) {
469          filename = PyUnicode_FromString("<syntax-tree>");
470          if (filename == NULL)
471              goto error;
472      }
473  
474      arena = PyArena_New();
475      if (!arena)
476          goto error;
477  
478      mod = PyAST_FromNodeObject(self->st_node, &self->st_flags,
479                                 filename, arena);
480      if (!mod)
481          goto error;
482  
483      res = (PyObject *)PyAST_CompileObject(mod, filename,
484                                            &self->st_flags, -1, arena);
485  error:
486      Py_XDECREF(filename);
487      if (arena != NULL)
488          PyArena_Free(arena);
489      return res;
490  }
491  
492  
493  /*  PyObject* parser_isexpr(PyObject* self, PyObject* args)
494   *  PyObject* parser_issuite(PyObject* self, PyObject* args)
495   *
496   *  Checks the passed-in ST object to determine if it is an expression or
497   *  a statement suite, respectively.  The return is a Python truth value.
498   *
499   */
500  static PyObject*
parser_isexpr(PyST_Object * self,PyObject * args,PyObject * kw)501  parser_isexpr(PyST_Object *self, PyObject *args, PyObject *kw)
502  {
503      PyObject* res = 0;
504      int ok;
505  
506      static char *keywords[] = {"st", NULL};
507  
508      if (self == NULL || PyModule_Check(self))
509          ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:isexpr", keywords,
510                                           &PyST_Type, &self);
511      else
512          ok = PyArg_ParseTupleAndKeywords(args, kw, ":isexpr", &keywords[1]);
513  
514      if (ok) {
515          /* Check to see if the ST represents an expression or not. */
516          res = (self->st_type == PyST_EXPR) ? Py_True : Py_False;
517          Py_INCREF(res);
518      }
519      return (res);
520  }
521  
522  
523  static PyObject*
parser_issuite(PyST_Object * self,PyObject * args,PyObject * kw)524  parser_issuite(PyST_Object *self, PyObject *args, PyObject *kw)
525  {
526      PyObject* res = 0;
527      int ok;
528  
529      static char *keywords[] = {"st", NULL};
530  
531      if (self == NULL || PyModule_Check(self))
532          ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:issuite", keywords,
533                                           &PyST_Type, &self);
534      else
535          ok = PyArg_ParseTupleAndKeywords(args, kw, ":issuite", &keywords[1]);
536  
537      if (ok) {
538          /* Check to see if the ST represents an expression or not. */
539          res = (self->st_type == PyST_EXPR) ? Py_False : Py_True;
540          Py_INCREF(res);
541      }
542      return (res);
543  }
544  
545  
546  /*  err_string(const char* message)
547   *
548   *  Sets the error string for an exception of type ParserError.
549   *
550   */
551  static void
err_string(const char * message)552  err_string(const char *message)
553  {
554      PyErr_SetString(parser_error, message);
555  }
556  
557  
558  /*  PyObject* parser_do_parse(PyObject* args, int type)
559   *
560   *  Internal function to actually execute the parse and return the result if
561   *  successful or set an exception if not.
562   *
563   */
564  static PyObject*
parser_do_parse(PyObject * args,PyObject * kw,const char * argspec,int type)565  parser_do_parse(PyObject *args, PyObject *kw, const char *argspec, int type)
566  {
567      char*     string = 0;
568      PyObject* res    = 0;
569      int flags        = 0;
570      perrdetail err;
571  
572      static char *keywords[] = {"source", NULL};
573  
574      if (PyArg_ParseTupleAndKeywords(args, kw, argspec, keywords, &string)) {
575          node* n = PyParser_ParseStringFlagsFilenameEx(string, NULL,
576                                                         &_PyParser_Grammar,
577                                                        (type == PyST_EXPR)
578                                                        ? eval_input : file_input,
579                                                        &err, &flags);
580  
581          if (n) {
582              res = parser_newstobject(n, type);
583              if (res) {
584                  ((PyST_Object *)res)->st_flags.cf_flags = flags & PyCF_MASK;
585                  ((PyST_Object *)res)->st_flags.cf_feature_version = PY_MINOR_VERSION;
586              }
587          }
588          else {
589              PyParser_SetError(&err);
590          }
591          PyParser_ClearError(&err);
592      }
593      return (res);
594  }
595  
596  
597  /*  PyObject* parser_expr(PyObject* self, PyObject* args)
598   *  PyObject* parser_suite(PyObject* self, PyObject* args)
599   *
600   *  External interfaces to the parser itself.  Which is called determines if
601   *  the parser attempts to recognize an expression ('eval' form) or statement
602   *  suite ('exec' form).  The real work is done by parser_do_parse() above.
603   *
604   */
605  static PyObject*
parser_expr(PyST_Object * self,PyObject * args,PyObject * kw)606  parser_expr(PyST_Object *self, PyObject *args, PyObject *kw)
607  {
608      NOTE(ARGUNUSED(self))
609      return (parser_do_parse(args, kw, "s:expr", PyST_EXPR));
610  }
611  
612  
613  static PyObject*
parser_suite(PyST_Object * self,PyObject * args,PyObject * kw)614  parser_suite(PyST_Object *self, PyObject *args, PyObject *kw)
615  {
616      NOTE(ARGUNUSED(self))
617      return (parser_do_parse(args, kw, "s:suite", PyST_SUITE));
618  }
619  
620  
621  
622  /*  This is the messy part of the code.  Conversion from a tuple to an ST
623   *  object requires that the input tuple be valid without having to rely on
624   *  catching an exception from the compiler.  This is done to allow the
625   *  compiler itself to remain fast, since most of its input will come from
626   *  the parser directly, and therefore be known to be syntactically correct.
627   *  This validation is done to ensure that we don't core dump the compile
628   *  phase, returning an exception instead.
629   *
630   *  Two aspects can be broken out in this code:  creating a node tree from
631   *  the tuple passed in, and verifying that it is indeed valid.  It may be
632   *  advantageous to expand the number of ST types to include funcdefs and
633   *  lambdadefs to take advantage of the optimizer, recognizing those STs
634   *  here.  They are not necessary, and not quite as useful in a raw form.
635   *  For now, let's get expressions and suites working reliably.
636   */
637  
638  
639  static node* build_node_tree(PyObject *tuple);
640  
641  static int
validate_node(node * tree)642  validate_node(node *tree)
643  {
644      int type = TYPE(tree);
645      int nch = NCH(tree);
646      state *dfa_state;
647      int pos, arc;
648  
649      assert(ISNONTERMINAL(type));
650      type -= NT_OFFSET;
651      if (type >= _PyParser_Grammar.g_ndfas) {
652          PyErr_Format(parser_error, "Unrecognized node type %d.", TYPE(tree));
653          return 0;
654      }
655      const dfa *nt_dfa = &_PyParser_Grammar.g_dfa[type];
656      REQ(tree, nt_dfa->d_type);
657  
658      /* Run the DFA for this nonterminal. */
659      dfa_state = nt_dfa->d_state;
660      for (pos = 0; pos < nch; ++pos) {
661          node *ch = CHILD(tree, pos);
662          int ch_type = TYPE(ch);
663          if ((ch_type >= NT_OFFSET + _PyParser_Grammar.g_ndfas)
664              || (ISTERMINAL(ch_type) && (ch_type >= N_TOKENS))
665              || (ch_type < 0)
666             ) {
667              PyErr_Format(parser_error, "Unrecognized node type %d.", ch_type);
668              return 0;
669          }
670          if (ch_type == suite && TYPE(tree) == funcdef) {
671              /* This is the opposite hack of what we do in parser.c
672                 (search for func_body_suite), except we don't ever
673                 support type comments here. */
674              ch_type = func_body_suite;
675          }
676          for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
677              short a_label = dfa_state->s_arc[arc].a_lbl;
678              assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels);
679  
680              const char *label_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str;
681              if ((_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type)
682                  && ((ch->n_str == NULL) || (label_str == NULL)
683                       || (strcmp(ch->n_str, label_str) == 0))
684                 ) {
685                  /* The child is acceptable; if non-terminal, validate it recursively. */
686                  if (ISNONTERMINAL(ch_type) && !validate_node(ch))
687                      return 0;
688  
689                  /* Update the state, and move on to the next child. */
690                  dfa_state = &nt_dfa->d_state[dfa_state->s_arc[arc].a_arrow];
691                  goto arc_found;
692              }
693          }
694          /* What would this state have accepted? */
695          {
696              short a_label = dfa_state->s_arc->a_lbl;
697              if (!a_label) /* Wouldn't accept any more children */
698                  goto illegal_num_children;
699  
700              int next_type = _PyParser_Grammar.g_ll.ll_label[a_label].lb_type;
701              const char *expected_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str;
702  
703              if (ISNONTERMINAL(next_type)) {
704                  PyErr_Format(parser_error, "Expected %s, got %s.",
705                               _PyParser_Grammar.g_dfa[next_type - NT_OFFSET].d_name,
706                               ISTERMINAL(ch_type) ? _PyParser_TokenNames[ch_type] :
707                               _PyParser_Grammar.g_dfa[ch_type - NT_OFFSET].d_name);
708              }
709              else if (expected_str != NULL) {
710                  PyErr_Format(parser_error, "Illegal terminal: expected '%s'.",
711                               expected_str);
712              }
713              else {
714                  PyErr_Format(parser_error, "Illegal terminal: expected %s.",
715                               _PyParser_TokenNames[next_type]);
716              }
717              return 0;
718          }
719  
720  arc_found:
721          continue;
722      }
723      /* Are we in a final state? If so, return 1 for successful validation. */
724      for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
725          if (!dfa_state->s_arc[arc].a_lbl) {
726              return 1;
727          }
728      }
729  
730  illegal_num_children:
731      PyErr_Format(parser_error,
732                   "Illegal number of children for %s node.", nt_dfa->d_name);
733      return 0;
734  }
735  
736  /*  PyObject* parser_tuple2st(PyObject* self, PyObject* args)
737   *
738   *  This is the public function, called from the Python code.  It receives a
739   *  single tuple object from the caller, and creates an ST object if the
740   *  tuple can be validated.  It does this by checking the first code of the
741   *  tuple, and, if acceptable, builds the internal representation.  If this
742   *  step succeeds, the internal representation is validated as fully as
743   *  possible with the recursive validate_node() routine defined above.
744   *
745   *  This function must be changed if support is to be added for PyST_FRAGMENT
746   *  ST objects.
747   *
748   */
749  static PyObject*
parser_tuple2st(PyST_Object * self,PyObject * args,PyObject * kw)750  parser_tuple2st(PyST_Object *self, PyObject *args, PyObject *kw)
751  {
752      NOTE(ARGUNUSED(self))
753      PyObject *st = 0;
754      PyObject *tuple;
755      node *tree;
756  
757      static char *keywords[] = {"sequence", NULL};
758  
759      if (!PyArg_ParseTupleAndKeywords(args, kw, "O:sequence2st", keywords,
760                                       &tuple))
761          return (0);
762      if (!PySequence_Check(tuple)) {
763          PyErr_SetString(PyExc_ValueError,
764                          "sequence2st() requires a single sequence argument");
765          return (0);
766      }
767      /*
768       *  Convert the tree to the internal form before checking it.
769       */
770      tree = build_node_tree(tuple);
771      if (tree != 0) {
772          node *validation_root = NULL;
773          int tree_type = 0;
774          switch (TYPE(tree)) {
775          case eval_input:
776              /*  Might be an eval form.  */
777              tree_type = PyST_EXPR;
778              validation_root = tree;
779              break;
780          case encoding_decl:
781              /* This looks like an encoding_decl so far. */
782              if (NCH(tree) == 1) {
783                  tree_type = PyST_SUITE;
784                  validation_root = CHILD(tree, 0);
785              }
786              else {
787                  err_string("Error Parsing encoding_decl");
788              }
789              break;
790          case file_input:
791              /*  This looks like an exec form so far.  */
792              tree_type = PyST_SUITE;
793              validation_root = tree;
794              break;
795          default:
796              /*  This is a fragment, at best. */
797              err_string("parse tree does not use a valid start symbol");
798          }
799  
800          if (validation_root != NULL && validate_node(validation_root))
801              st = parser_newstobject(tree, tree_type);
802          else
803              PyNode_Free(tree);
804      }
805      /*  Make sure we raise an exception on all errors.  We should never
806       *  get this, but we'd do well to be sure something is done.
807       */
808      if (st == NULL && !PyErr_Occurred())
809          err_string("unspecified ST error occurred");
810  
811      return st;
812  }
813  
814  
815  /*  node* build_node_children()
816   *
817   *  Iterate across the children of the current non-terminal node and build
818   *  their structures.  If successful, return the root of this portion of
819   *  the tree, otherwise, 0.  Any required exception will be specified already,
820   *  and no memory will have been deallocated.
821   *
822   */
823  static node*
build_node_children(PyObject * tuple,node * root,int * line_num)824  build_node_children(PyObject *tuple, node *root, int *line_num)
825  {
826      Py_ssize_t len = PyObject_Size(tuple);
827      Py_ssize_t i;
828      int  err;
829  
830      if (len < 0) {
831          return NULL;
832      }
833      for (i = 1; i < len; ++i) {
834          /* elem must always be a sequence, however simple */
835          PyObject* elem = PySequence_GetItem(tuple, i);
836          int ok = elem != NULL;
837          int type = 0;
838          char *strn = 0;
839  
840          if (ok)
841              ok = PySequence_Check(elem);
842          if (ok) {
843              PyObject *temp = PySequence_GetItem(elem, 0);
844              if (temp == NULL)
845                  ok = 0;
846              else {
847                  ok = PyLong_Check(temp);
848                  if (ok) {
849                      type = _PyLong_AsInt(temp);
850                      if (type == -1 && PyErr_Occurred()) {
851                          Py_DECREF(temp);
852                          Py_DECREF(elem);
853                          return NULL;
854                      }
855                  }
856                  Py_DECREF(temp);
857              }
858          }
859          if (!ok) {
860              PyObject *err = Py_BuildValue("Os", elem,
861                                            "Illegal node construct.");
862              PyErr_SetObject(parser_error, err);
863              Py_XDECREF(err);
864              Py_XDECREF(elem);
865              return NULL;
866          }
867          if (ISTERMINAL(type)) {
868              Py_ssize_t len = PyObject_Size(elem);
869              PyObject *temp;
870              const char *temp_str;
871  
872              if ((len != 2) && (len != 3)) {
873                  err_string("terminal nodes must have 2 or 3 entries");
874                  Py_DECREF(elem);
875                  return NULL;
876              }
877              temp = PySequence_GetItem(elem, 1);
878              if (temp == NULL) {
879                  Py_DECREF(elem);
880                  return NULL;
881              }
882              if (!PyUnicode_Check(temp)) {
883                  PyErr_Format(parser_error,
884                               "second item in terminal node must be a string,"
885                               " found %s",
886                               Py_TYPE(temp)->tp_name);
887                  Py_DECREF(temp);
888                  Py_DECREF(elem);
889                  return NULL;
890              }
891              if (len == 3) {
892                  PyObject *o = PySequence_GetItem(elem, 2);
893                  if (o == NULL) {
894                      Py_DECREF(temp);
895                      Py_DECREF(elem);
896                      return NULL;
897                  }
898                  if (PyLong_Check(o)) {
899                      int num = _PyLong_AsInt(o);
900                      if (num == -1 && PyErr_Occurred()) {
901                          Py_DECREF(o);
902                          Py_DECREF(temp);
903                          Py_DECREF(elem);
904                          return NULL;
905                      }
906                      *line_num = num;
907                  }
908                  else {
909                      PyErr_Format(parser_error,
910                                   "third item in terminal node must be an"
911                                   " integer, found %s",
912                                   Py_TYPE(temp)->tp_name);
913                      Py_DECREF(o);
914                      Py_DECREF(temp);
915                      Py_DECREF(elem);
916                      return NULL;
917                  }
918                  Py_DECREF(o);
919              }
920              temp_str = PyUnicode_AsUTF8AndSize(temp, &len);
921              if (temp_str == NULL) {
922                  Py_DECREF(temp);
923                  Py_DECREF(elem);
924                  return NULL;
925              }
926              strn = (char *)PyObject_MALLOC(len + 1);
927              if (strn == NULL) {
928                  Py_DECREF(temp);
929                  Py_DECREF(elem);
930                  PyErr_NoMemory();
931                  return NULL;
932              }
933              (void) memcpy(strn, temp_str, len + 1);
934              Py_DECREF(temp);
935          }
936          else if (!ISNONTERMINAL(type)) {
937              /*
938               *  It has to be one or the other; this is an error.
939               *  Raise an exception.
940               */
941              PyObject *err = Py_BuildValue("Os", elem, "unknown node type.");
942              PyErr_SetObject(parser_error, err);
943              Py_XDECREF(err);
944              Py_DECREF(elem);
945              return NULL;
946          }
947          err = PyNode_AddChild(root, type, strn, *line_num, 0, *line_num, 0);
948          if (err == E_NOMEM) {
949              Py_DECREF(elem);
950              PyObject_FREE(strn);
951              PyErr_NoMemory();
952              return NULL;
953          }
954          if (err == E_OVERFLOW) {
955              Py_DECREF(elem);
956              PyObject_FREE(strn);
957              PyErr_SetString(PyExc_ValueError,
958                              "unsupported number of child nodes");
959              return NULL;
960          }
961  
962          if (ISNONTERMINAL(type)) {
963              node* new_child = CHILD(root, i - 1);
964  
965              if (new_child != build_node_children(elem, new_child, line_num)) {
966                  Py_DECREF(elem);
967                  return NULL;
968              }
969          }
970          else if (type == NEWLINE) {     /* It's true:  we increment the     */
971              ++(*line_num);              /* line number *after* the newline! */
972          }
973          Py_DECREF(elem);
974      }
975      return root;
976  }
977  
978  
979  static node*
build_node_tree(PyObject * tuple)980  build_node_tree(PyObject *tuple)
981  {
982      node* res = 0;
983      PyObject *temp = PySequence_GetItem(tuple, 0);
984      long num = -1;
985  
986      if (temp != NULL)
987          num = PyLong_AsLong(temp);
988      Py_XDECREF(temp);
989      if (ISTERMINAL(num)) {
990          /*
991           *  The tuple is simple, but it doesn't start with a start symbol.
992           *  Raise an exception now and be done with it.
993           */
994          tuple = Py_BuildValue("Os", tuple,
995                      "Illegal syntax-tree; cannot start with terminal symbol.");
996          PyErr_SetObject(parser_error, tuple);
997          Py_XDECREF(tuple);
998      }
999      else if (ISNONTERMINAL(num)) {
1000          /*
1001           *  Not efficient, but that can be handled later.
1002           */
1003          int line_num = 0;
1004          PyObject *encoding = NULL;
1005  
1006          if (num == encoding_decl) {
1007              encoding = PySequence_GetItem(tuple, 2);
1008              if (encoding == NULL) {
1009                  PyErr_SetString(parser_error, "missed encoding");
1010                  return NULL;
1011              }
1012              if (!PyUnicode_Check(encoding)) {
1013                  PyErr_Format(parser_error,
1014                               "encoding must be a string, found %.200s",
1015                               Py_TYPE(encoding)->tp_name);
1016                  Py_DECREF(encoding);
1017                  return NULL;
1018              }
1019              /* tuple isn't borrowed anymore here, need to DECREF */
1020              tuple = PySequence_GetSlice(tuple, 0, 2);
1021              if (tuple == NULL) {
1022                  Py_DECREF(encoding);
1023                  return NULL;
1024              }
1025          }
1026          res = PyNode_New(num);
1027          if (res != NULL) {
1028              if (res != build_node_children(tuple, res, &line_num)) {
1029                  PyNode_Free(res);
1030                  res = NULL;
1031              }
1032              if (res && encoding) {
1033                  Py_ssize_t len;
1034                  const char *temp;
1035                  temp = PyUnicode_AsUTF8AndSize(encoding, &len);
1036                  if (temp == NULL) {
1037                      PyNode_Free(res);
1038                      Py_DECREF(encoding);
1039                      Py_DECREF(tuple);
1040                      return NULL;
1041                  }
1042                  res->n_str = (char *)PyObject_MALLOC(len + 1);
1043                  if (res->n_str == NULL) {
1044                      PyNode_Free(res);
1045                      Py_DECREF(encoding);
1046                      Py_DECREF(tuple);
1047                      PyErr_NoMemory();
1048                      return NULL;
1049                  }
1050                  (void) memcpy(res->n_str, temp, len + 1);
1051              }
1052          }
1053          if (encoding != NULL) {
1054              Py_DECREF(encoding);
1055              Py_DECREF(tuple);
1056          }
1057      }
1058      else {
1059          /*  The tuple is illegal -- if the number is neither TERMINAL nor
1060           *  NONTERMINAL, we can't use it.  Not sure the implementation
1061           *  allows this condition, but the API doesn't preclude it.
1062           */
1063          PyObject *err = Py_BuildValue("Os", tuple,
1064                                        "Illegal component tuple.");
1065          PyErr_SetObject(parser_error, err);
1066          Py_XDECREF(err);
1067      }
1068  
1069      return (res);
1070  }
1071  
1072  
1073  static PyObject*
1074  pickle_constructor = NULL;
1075  
1076  
1077  static PyObject*
parser__pickler(PyObject * self,PyObject * args)1078  parser__pickler(PyObject *self, PyObject *args)
1079  {
1080      NOTE(ARGUNUSED(self))
1081      PyObject *result = NULL;
1082      PyObject *st = NULL;
1083  
1084      if (PyArg_ParseTuple(args, "O!:_pickler", &PyST_Type, &st)) {
1085          PyObject *newargs;
1086          PyObject *tuple;
1087  
1088          if ((newargs = PyTuple_Pack(2, st, Py_True)) == NULL)
1089              return NULL;
1090          tuple = parser_st2tuple((PyST_Object*)NULL, newargs, NULL);
1091          if (tuple != NULL) {
1092              result = Py_BuildValue("O(O)", pickle_constructor, tuple);
1093              Py_DECREF(tuple);
1094          }
1095          Py_DECREF(newargs);
1096      }
1097  
1098      return (result);
1099  }
1100  
1101  
1102  /*  Functions exported by this module.  Most of this should probably
1103   *  be converted into an ST object with methods, but that is better
1104   *  done directly in Python, allowing subclasses to be created directly.
1105   *  We'd really have to write a wrapper around it all anyway to allow
1106   *  inheritance.
1107   */
1108  static PyMethodDef parser_functions[] =  {
1109      {"compilest",      (PyCFunction)(void(*)(void))parser_compilest,  PUBLIC_METHOD_TYPE,
1110          PyDoc_STR("Compiles an ST object into a code object.")},
1111      {"expr",            (PyCFunction)(void(*)(void))parser_expr,      PUBLIC_METHOD_TYPE,
1112          PyDoc_STR("Creates an ST object from an expression.")},
1113      {"isexpr",          (PyCFunction)(void(*)(void))parser_isexpr,    PUBLIC_METHOD_TYPE,
1114          PyDoc_STR("Determines if an ST object was created from an expression.")},
1115      {"issuite",         (PyCFunction)(void(*)(void))parser_issuite,   PUBLIC_METHOD_TYPE,
1116          PyDoc_STR("Determines if an ST object was created from a suite.")},
1117      {"suite",           (PyCFunction)(void(*)(void))parser_suite,     PUBLIC_METHOD_TYPE,
1118          PyDoc_STR("Creates an ST object from a suite.")},
1119      {"sequence2st",     (PyCFunction)(void(*)(void))parser_tuple2st,  PUBLIC_METHOD_TYPE,
1120          PyDoc_STR("Creates an ST object from a tree representation.")},
1121      {"st2tuple",        (PyCFunction)(void(*)(void))parser_st2tuple,  PUBLIC_METHOD_TYPE,
1122          PyDoc_STR("Creates a tuple-tree representation of an ST.")},
1123      {"st2list",         (PyCFunction)(void(*)(void))parser_st2list,   PUBLIC_METHOD_TYPE,
1124          PyDoc_STR("Creates a list-tree representation of an ST.")},
1125      {"tuple2st",        (PyCFunction)(void(*)(void))parser_tuple2st,  PUBLIC_METHOD_TYPE,
1126          PyDoc_STR("Creates an ST object from a tree representation.")},
1127  
1128      /* private stuff: support pickle module */
1129      {"_pickler",        (PyCFunction)parser__pickler,  METH_VARARGS,
1130          PyDoc_STR("Returns the pickle magic to allow ST objects to be pickled.")},
1131  
1132      {NULL, NULL, 0, NULL}
1133      };
1134  
1135  
1136  
1137  static struct PyModuleDef parsermodule = {
1138          PyModuleDef_HEAD_INIT,
1139          "parser",
1140          NULL,
1141          -1,
1142          parser_functions,
1143          NULL,
1144          NULL,
1145          NULL,
1146          NULL
1147  };
1148  
1149  PyMODINIT_FUNC PyInit_parser(void);  /* supply a prototype */
1150  
1151  PyMODINIT_FUNC
PyInit_parser(void)1152  PyInit_parser(void)
1153  {
1154      PyObject *module, *copyreg;
1155  
1156      if (PyErr_WarnEx(PyExc_DeprecationWarning,
1157              "The parser module is deprecated and will be removed "
1158              "in future versions of Python", 7) != 0) {
1159          return NULL;
1160      }
1161  
1162      if (PyType_Ready(&PyST_Type) < 0)
1163          return NULL;
1164      module = PyModule_Create(&parsermodule);
1165      if (module == NULL)
1166          return NULL;
1167  
1168      if (parser_error == 0)
1169          parser_error = PyErr_NewException("parser.ParserError", NULL, NULL);
1170  
1171      if (parser_error == 0)
1172          return NULL;
1173      /* CAUTION:  The code next used to skip bumping the refcount on
1174       * parser_error.  That's a disaster if PyInit_parser() gets called more
1175       * than once.  By incref'ing, we ensure that each module dict that
1176       * gets created owns its reference to the shared parser_error object,
1177       * and the file static parser_error vrbl owns a reference too.
1178       */
1179      Py_INCREF(parser_error);
1180      if (PyModule_AddObject(module, "ParserError", parser_error) != 0)
1181          return NULL;
1182  
1183      Py_INCREF(&PyST_Type);
1184      PyModule_AddObject(module, "STType", (PyObject*)&PyST_Type);
1185  
1186      PyModule_AddStringConstant(module, "__copyright__",
1187                                 parser_copyright_string);
1188      PyModule_AddStringConstant(module, "__doc__",
1189                                 parser_doc_string);
1190      PyModule_AddStringConstant(module, "__version__",
1191                                 parser_version_string);
1192  
1193      /* Register to support pickling.
1194       * If this fails, the import of this module will fail because an
1195       * exception will be raised here; should we clear the exception?
1196       */
1197      copyreg = PyImport_ImportModuleNoBlock("copyreg");
1198      if (copyreg != NULL) {
1199          PyObject *func, *pickler;
1200          _Py_IDENTIFIER(pickle);
1201          _Py_IDENTIFIER(sequence2st);
1202          _Py_IDENTIFIER(_pickler);
1203  
1204          func = _PyObject_GetAttrId(copyreg, &PyId_pickle);
1205          pickle_constructor = _PyObject_GetAttrId(module, &PyId_sequence2st);
1206          pickler = _PyObject_GetAttrId(module, &PyId__pickler);
1207          Py_XINCREF(pickle_constructor);
1208          if ((func != NULL) && (pickle_constructor != NULL)
1209              && (pickler != NULL)) {
1210              PyObject *res;
1211  
1212              res = PyObject_CallFunctionObjArgs(func, &PyST_Type, pickler,
1213                                                 pickle_constructor, NULL);
1214              Py_XDECREF(res);
1215          }
1216          Py_XDECREF(func);
1217          Py_XDECREF(pickle_constructor);
1218          Py_XDECREF(pickler);
1219          Py_DECREF(copyreg);
1220      }
1221      return module;
1222  }
1223