1#!/usr/bin/env python3.8 2 3"""pegen -- PEG Generator. 4 5Search the web for PEG Parsers for reference. 6""" 7 8import argparse 9import sys 10import time 11import token 12import traceback 13from typing import Tuple 14 15from pegen.grammar import Grammar 16from pegen.parser import Parser 17from pegen.parser_generator import ParserGenerator 18from pegen.tokenizer import Tokenizer 19from pegen.validator import validate_grammar 20 21 22def generate_c_code( 23 args: argparse.Namespace, 24) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 25 from pegen.build import build_c_parser_and_generator 26 27 verbose = args.verbose 28 verbose_tokenizer = verbose >= 3 29 verbose_parser = verbose == 2 or verbose >= 4 30 try: 31 grammar, parser, tokenizer, gen = build_c_parser_and_generator( 32 args.grammar_filename, 33 args.tokens_filename, 34 args.output, 35 args.compile_extension, 36 verbose_tokenizer, 37 verbose_parser, 38 args.verbose, 39 keep_asserts_in_extension=False if args.optimized else True, 40 skip_actions=args.skip_actions, 41 ) 42 return grammar, parser, tokenizer, gen 43 except Exception as err: 44 if args.verbose: 45 raise # Show traceback 46 traceback.print_exception(err.__class__, err, None) 47 sys.stderr.write("For full traceback, use -v\n") 48 sys.exit(1) 49 50 51def generate_python_code( 52 args: argparse.Namespace, 53) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 54 from pegen.build import build_python_parser_and_generator 55 56 verbose = args.verbose 57 verbose_tokenizer = verbose >= 3 58 verbose_parser = verbose == 2 or verbose >= 4 59 try: 60 grammar, parser, tokenizer, gen = build_python_parser_and_generator( 61 args.grammar_filename, 62 args.output, 63 verbose_tokenizer, 64 verbose_parser, 65 skip_actions=args.skip_actions, 66 ) 67 return grammar, parser, tokenizer, gen 68 except Exception as err: 69 if args.verbose: 70 raise # Show traceback 71 traceback.print_exception(err.__class__, err, None) 72 sys.stderr.write("For full traceback, use -v\n") 73 sys.exit(1) 74 75 76argparser = argparse.ArgumentParser( 77 prog="pegen", description="Experimental PEG-like parser generator" 78) 79argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar") 80argparser.add_argument( 81 "-v", 82 "--verbose", 83 action="count", 84 default=0, 85 help="Print timing stats; repeat for more debug output", 86) 87subparsers = argparser.add_subparsers(help="target language for the generated code") 88 89c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython") 90c_parser.set_defaults(func=generate_c_code) 91c_parser.add_argument("grammar_filename", help="Grammar description") 92c_parser.add_argument("tokens_filename", help="Tokens description") 93c_parser.add_argument( 94 "-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser" 95) 96c_parser.add_argument( 97 "--compile-extension", 98 action="store_true", 99 help="Compile generated C code into an extension module", 100) 101c_parser.add_argument( 102 "--optimized", action="store_true", help="Compile the extension in optimized mode" 103) 104c_parser.add_argument( 105 "--skip-actions", 106 action="store_true", 107 help="Suppress code emission for rule actions", 108) 109 110python_parser = subparsers.add_parser("python", help="Generate Python code") 111python_parser.set_defaults(func=generate_python_code) 112python_parser.add_argument("grammar_filename", help="Grammar description") 113python_parser.add_argument( 114 "-o", 115 "--output", 116 metavar="OUT", 117 default="parse.py", 118 help="Where to write the generated parser", 119) 120python_parser.add_argument( 121 "--skip-actions", 122 action="store_true", 123 help="Suppress code emission for rule actions", 124) 125 126 127def main() -> None: 128 from pegen.testutil import print_memstats 129 130 args = argparser.parse_args() 131 if "func" not in args: 132 argparser.error("Must specify the target language mode ('c' or 'python')") 133 134 t0 = time.time() 135 grammar, parser, tokenizer, gen = args.func(args) 136 t1 = time.time() 137 138 validate_grammar(grammar) 139 140 if not args.quiet: 141 if args.verbose: 142 print("Raw Grammar:") 143 for line in repr(grammar).splitlines(): 144 print(" ", line) 145 146 print("Clean Grammar:") 147 for line in str(grammar).splitlines(): 148 print(" ", line) 149 150 if args.verbose: 151 print("First Graph:") 152 for src, dsts in gen.first_graph.items(): 153 print(f" {src} -> {', '.join(dsts)}") 154 print("First SCCS:") 155 for scc in gen.first_sccs: 156 print(" ", scc, end="") 157 if len(scc) > 1: 158 print( 159 " # Indirectly left-recursive; leaders:", 160 {name for name in scc if grammar.rules[name].leader}, 161 ) 162 else: 163 name = next(iter(scc)) 164 if name in gen.first_graph[name]: 165 print(" # Left-recursive") 166 else: 167 print() 168 169 if args.verbose: 170 dt = t1 - t0 171 diag = tokenizer.diagnose() 172 nlines = diag.end[0] 173 if diag.type == token.ENDMARKER: 174 nlines -= 1 175 print(f"Total time: {dt:.3f} sec; {nlines} lines", end="") 176 if dt: 177 print(f"; {nlines / dt:.0f} lines/sec") 178 else: 179 print() 180 print("Caches sizes:") 181 print(f" token array : {len(tokenizer._tokens):10}") 182 print(f" cache : {len(parser._cache):10}") 183 if not print_memstats(): 184 print("(Can't find psutil; install it for memory stats.)") 185 186 187if __name__ == "__main__": 188 if sys.version_info < (3, 8): 189 print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr) 190 sys.exit(1) 191 main() 192