• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""This module defines the data structures used to represent a grammar.
5
6These are a bit arcane because they are derived from the data
7structures used by Python's 'pgen' parser generator.
8
9There's also a table here mapping operators to their names in the
10token module; the Python tokenize module reports all operators as the
11fallback token code OP, but the parser needs the actual token code.
12
13"""
14
15# Python imports
16import pickle
17
18# Local imports
19from . import token
20
21
22class Grammar(object):
23    """Pgen parsing tables conversion class.
24
25    Once initialized, this class supplies the grammar tables for the
26    parsing engine implemented by parse.py.  The parsing engine
27    accesses the instance variables directly.  The class here does not
28    provide initialization of the tables; several subclasses exist to
29    do this (see the conv and pgen modules).
30
31    The load() method reads the tables from a pickle file, which is
32    much faster than the other ways offered by subclasses.  The pickle
33    file is written by calling dump() (after loading the grammar
34    tables using a subclass).  The report() method prints a readable
35    representation of the tables to stdout, for debugging.
36
37    The instance variables are as follows:
38
39    symbol2number -- a dict mapping symbol names to numbers.  Symbol
40                     numbers are always 256 or higher, to distinguish
41                     them from token numbers, which are between 0 and
42                     255 (inclusive).
43
44    number2symbol -- a dict mapping numbers to symbol names;
45                     these two are each other's inverse.
46
47    states        -- a list of DFAs, where each DFA is a list of
48                     states, each state is a list of arcs, and each
49                     arc is a (i, j) pair where i is a label and j is
50                     a state number.  The DFA number is the index into
51                     this list.  (This name is slightly confusing.)
52                     Final states are represented by a special arc of
53                     the form (0, j) where j is its own state number.
54
55    dfas          -- a dict mapping symbol numbers to (DFA, first)
56                     pairs, where DFA is an item from the states list
57                     above, and first is a set of tokens that can
58                     begin this grammar rule (represented by a dict
59                     whose values are always 1).
60
61    labels        -- a list of (x, y) pairs where x is either a token
62                     number or a symbol number, and y is either None
63                     or a string; the strings are keywords.  The label
64                     number is the index in this list; label numbers
65                     are used to mark state transitions (arcs) in the
66                     DFAs.
67
68    start         -- the number of the grammar's start symbol.
69
70    keywords      -- a dict mapping keyword strings to arc labels.
71
72    tokens        -- a dict mapping token numbers to arc labels.
73
74    """
75
76    def __init__(self):
77        self.symbol2number = {}
78        self.number2symbol = {}
79        self.states = []
80        self.dfas = {}
81        self.labels = [(0, "EMPTY")]
82        self.keywords = {}
83        self.tokens = {}
84        self.symbol2label = {}
85        self.start = 256
86
87    def dump(self, filename):
88        """Dump the grammar tables to a pickle file."""
89        with open(filename, "wb") as f:
90            pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
91
92    def load(self, filename):
93        """Load the grammar tables from a pickle file."""
94        with open(filename, "rb") as f:
95            d = pickle.load(f)
96        self.__dict__.update(d)
97
98    def loads(self, pkl):
99        """Load the grammar tables from a pickle bytes object."""
100        self.__dict__.update(pickle.loads(pkl))
101
102    def copy(self):
103        """
104        Copy the grammar.
105        """
106        new = self.__class__()
107        for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
108                          "tokens", "symbol2label"):
109            setattr(new, dict_attr, getattr(self, dict_attr).copy())
110        new.labels = self.labels[:]
111        new.states = self.states[:]
112        new.start = self.start
113        return new
114
115    def report(self):
116        """Dump the grammar tables to standard output, for debugging."""
117        from pprint import pprint
118        print("s2n")
119        pprint(self.symbol2number)
120        print("n2s")
121        pprint(self.number2symbol)
122        print("states")
123        pprint(self.states)
124        print("dfas")
125        pprint(self.dfas)
126        print("labels")
127        pprint(self.labels)
128        print("start", self.start)
129
130
131# Map from operator to number (since tokenize doesn't do this)
132
133opmap_raw = """
134( LPAR
135) RPAR
136[ LSQB
137] RSQB
138: COLON
139, COMMA
140; SEMI
141+ PLUS
142- MINUS
143* STAR
144/ SLASH
145| VBAR
146& AMPER
147< LESS
148> GREATER
149= EQUAL
150. DOT
151% PERCENT
152` BACKQUOTE
153{ LBRACE
154} RBRACE
155@ AT
156@= ATEQUAL
157== EQEQUAL
158!= NOTEQUAL
159<> NOTEQUAL
160<= LESSEQUAL
161>= GREATEREQUAL
162~ TILDE
163^ CIRCUMFLEX
164<< LEFTSHIFT
165>> RIGHTSHIFT
166** DOUBLESTAR
167+= PLUSEQUAL
168-= MINEQUAL
169*= STAREQUAL
170/= SLASHEQUAL
171%= PERCENTEQUAL
172&= AMPEREQUAL
173|= VBAREQUAL
174^= CIRCUMFLEXEQUAL
175<<= LEFTSHIFTEQUAL
176>>= RIGHTSHIFTEQUAL
177**= DOUBLESTAREQUAL
178// DOUBLESLASH
179//= DOUBLESLASHEQUAL
180-> RARROW
181:= COLONEQUAL
182"""
183
184opmap = {}
185for line in opmap_raw.splitlines():
186    if line:
187        op, name = line.split()
188        opmap[op] = getattr(token, name)
189