1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. 2# Licensed to PSF under a Contributor Agreement. 3 4# Modifications: 5# Copyright 2006 Google, Inc. All Rights Reserved. 6# Licensed to PSF under a Contributor Agreement. 7 8"""Parser driver. 9 10This provides a high-level interface to parse a file into a syntax tree. 11 12""" 13 14__author__ = "Guido van Rossum <guido@python.org>" 15 16__all__ = ["Driver", "load_grammar"] 17 18# Python imports 19import io 20import os 21import logging 22import pkgutil 23import sys 24 25# Pgen imports 26from . import grammar, parse, token, tokenize, pgen 27 28 29class Driver(object): 30 31 def __init__(self, grammar, convert=None, logger=None): 32 self.grammar = grammar 33 if logger is None: 34 logger = logging.getLogger() 35 self.logger = logger 36 self.convert = convert 37 38 def parse_tokens(self, tokens, debug=False): 39 """Parse a series of tokens and return the syntax tree.""" 40 # XXX Move the prefix computation into a wrapper around tokenize. 41 p = parse.Parser(self.grammar, self.convert) 42 p.setup() 43 lineno = 1 44 column = 0 45 type = value = start = end = line_text = None 46 prefix = "" 47 for quintuple in tokens: 48 type, value, start, end, line_text = quintuple 49 if start != (lineno, column): 50 assert (lineno, column) <= start, ((lineno, column), start) 51 s_lineno, s_column = start 52 if lineno < s_lineno: 53 prefix += "\n" * (s_lineno - lineno) 54 lineno = s_lineno 55 column = 0 56 if column < s_column: 57 prefix += line_text[column:s_column] 58 column = s_column 59 if type in (tokenize.COMMENT, tokenize.NL): 60 prefix += value 61 lineno, column = end 62 if value.endswith("\n"): 63 lineno += 1 64 column = 0 65 continue 66 if type == token.OP: 67 type = grammar.opmap[value] 68 if debug: 69 self.logger.debug("%s %r (prefix=%r)", 70 token.tok_name[type], value, prefix) 71 if p.addtoken(type, value, (prefix, start)): 72 if debug: 73 self.logger.debug("Stop.") 74 break 75 prefix = "" 76 lineno, column = end 77 if value.endswith("\n"): 78 lineno += 1 79 column = 0 80 else: 81 # We never broke out -- EOF is too soon (how can this happen???) 82 raise parse.ParseError("incomplete input", 83 type, value, (prefix, start)) 84 return p.rootnode 85 86 def parse_stream_raw(self, stream, debug=False): 87 """Parse a stream and return the syntax tree.""" 88 tokens = tokenize.generate_tokens(stream.readline) 89 return self.parse_tokens(tokens, debug) 90 91 def parse_stream(self, stream, debug=False): 92 """Parse a stream and return the syntax tree.""" 93 return self.parse_stream_raw(stream, debug) 94 95 def parse_file(self, filename, encoding=None, debug=False): 96 """Parse a file and return the syntax tree.""" 97 with io.open(filename, "r", encoding=encoding) as stream: 98 return self.parse_stream(stream, debug) 99 100 def parse_string(self, text, debug=False): 101 """Parse a string and return the syntax tree.""" 102 tokens = tokenize.generate_tokens(io.StringIO(text).readline) 103 return self.parse_tokens(tokens, debug) 104 105 106def _generate_pickle_name(gt): 107 head, tail = os.path.splitext(gt) 108 if tail == ".txt": 109 tail = "" 110 return head + tail + ".".join(map(str, sys.version_info)) + ".pickle" 111 112 113def load_grammar(gt="Grammar.txt", gp=None, 114 save=True, force=False, logger=None): 115 """Load the grammar (maybe from a pickle).""" 116 if logger is None: 117 logger = logging.getLogger() 118 gp = _generate_pickle_name(gt) if gp is None else gp 119 if force or not _newer(gp, gt): 120 logger.info("Generating grammar tables from %s", gt) 121 g = pgen.generate_grammar(gt) 122 if save: 123 logger.info("Writing grammar tables to %s", gp) 124 try: 125 g.dump(gp) 126 except OSError as e: 127 logger.info("Writing failed: %s", e) 128 else: 129 g = grammar.Grammar() 130 g.load(gp) 131 return g 132 133 134def _newer(a, b): 135 """Inquire whether file a was written since file b.""" 136 if not os.path.exists(a): 137 return False 138 if not os.path.exists(b): 139 return True 140 return os.path.getmtime(a) >= os.path.getmtime(b) 141 142 143def load_packaged_grammar(package, grammar_source): 144 """Normally, loads a pickled grammar by doing 145 pkgutil.get_data(package, pickled_grammar) 146 where *pickled_grammar* is computed from *grammar_source* by adding the 147 Python version and using a ``.pickle`` extension. 148 149 However, if *grammar_source* is an extant file, load_grammar(grammar_source) 150 is called instead. This facilitates using a packaged grammar file when needed 151 but preserves load_grammar's automatic regeneration behavior when possible. 152 153 """ 154 if os.path.isfile(grammar_source): 155 return load_grammar(grammar_source) 156 pickled_name = _generate_pickle_name(os.path.basename(grammar_source)) 157 data = pkgutil.get_data(package, pickled_name) 158 g = grammar.Grammar() 159 g.loads(data) 160 return g 161 162 163def main(*args): 164 """Main program, when run as a script: produce grammar pickle files. 165 166 Calls load_grammar for each argument, a path to a grammar text file. 167 """ 168 if not args: 169 args = sys.argv[1:] 170 logging.basicConfig(level=logging.INFO, stream=sys.stdout, 171 format='%(message)s') 172 for gt in args: 173 load_grammar(gt, save=True, force=True) 174 return True 175 176if __name__ == "__main__": 177 sys.exit(int(not main())) 178