1#!/usr/bin/env python3.8 2 3"""pegen -- PEG Generator. 4 5Search the web for PEG Parsers for reference. 6""" 7 8import argparse 9import sys 10import time 11import token 12import traceback 13 14from typing import Tuple 15 16from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator 17 18 19def generate_c_code( 20 args: argparse.Namespace, 21) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 22 from pegen.build import build_c_parser_and_generator 23 24 verbose = args.verbose 25 verbose_tokenizer = verbose >= 3 26 verbose_parser = verbose == 2 or verbose >= 4 27 try: 28 grammar, parser, tokenizer, gen = build_c_parser_and_generator( 29 args.grammar_filename, 30 args.tokens_filename, 31 args.output, 32 args.compile_extension, 33 verbose_tokenizer, 34 verbose_parser, 35 args.verbose, 36 keep_asserts_in_extension=False if args.optimized else True, 37 skip_actions=args.skip_actions, 38 ) 39 return grammar, parser, tokenizer, gen 40 except Exception as err: 41 if args.verbose: 42 raise # Show traceback 43 traceback.print_exception(err.__class__, err, None) 44 sys.stderr.write("For full traceback, use -v\n") 45 sys.exit(1) 46 47 48def generate_python_code( 49 args: argparse.Namespace, 50) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 51 from pegen.build import build_python_parser_and_generator 52 53 verbose = args.verbose 54 verbose_tokenizer = verbose >= 3 55 verbose_parser = verbose == 2 or verbose >= 4 56 try: 57 grammar, parser, tokenizer, gen = build_python_parser_and_generator( 58 args.grammar_filename, 59 args.output, 60 verbose_tokenizer, 61 verbose_parser, 62 skip_actions=args.skip_actions, 63 ) 64 return grammar, parser, tokenizer, gen 65 except Exception as err: 66 if args.verbose: 67 raise # Show traceback 68 traceback.print_exception(err.__class__, err, None) 69 sys.stderr.write("For full traceback, use -v\n") 70 sys.exit(1) 71 72 73argparser = argparse.ArgumentParser( 74 prog="pegen", description="Experimental PEG-like parser generator" 75) 76argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar") 77argparser.add_argument( 78 "-v", 79 "--verbose", 80 action="count", 81 default=0, 82 help="Print timing stats; repeat for more debug output", 83) 84subparsers = argparser.add_subparsers(help="target language for the generated code") 85 86c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython") 87c_parser.set_defaults(func=generate_c_code) 88c_parser.add_argument("grammar_filename", help="Grammar description") 89c_parser.add_argument("tokens_filename", help="Tokens description") 90c_parser.add_argument( 91 "-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser" 92) 93c_parser.add_argument( 94 "--compile-extension", 95 action="store_true", 96 help="Compile generated C code into an extension module", 97) 98c_parser.add_argument( 99 "--optimized", action="store_true", help="Compile the extension in optimized mode" 100) 101c_parser.add_argument( 102 "--skip-actions", action="store_true", help="Suppress code emission for rule actions", 103) 104 105python_parser = subparsers.add_parser("python", help="Generate Python code") 106python_parser.set_defaults(func=generate_python_code) 107python_parser.add_argument("grammar_filename", help="Grammar description") 108python_parser.add_argument( 109 "-o", 110 "--output", 111 metavar="OUT", 112 default="parse.py", 113 help="Where to write the generated parser", 114) 115python_parser.add_argument( 116 "--skip-actions", action="store_true", help="Suppress code emission for rule actions", 117) 118 119 120def main() -> None: 121 from pegen.testutil import print_memstats 122 123 args = argparser.parse_args() 124 if "func" not in args: 125 argparser.error("Must specify the target language mode ('c' or 'python')") 126 127 t0 = time.time() 128 grammar, parser, tokenizer, gen = args.func(args) 129 t1 = time.time() 130 131 if not args.quiet: 132 if args.verbose: 133 print("Raw Grammar:") 134 for line in repr(grammar).splitlines(): 135 print(" ", line) 136 137 print("Clean Grammar:") 138 for line in str(grammar).splitlines(): 139 print(" ", line) 140 141 if args.verbose: 142 print("First Graph:") 143 for src, dsts in gen.first_graph.items(): 144 print(f" {src} -> {', '.join(dsts)}") 145 print("First SCCS:") 146 for scc in gen.first_sccs: 147 print(" ", scc, end="") 148 if len(scc) > 1: 149 print( 150 " # Indirectly left-recursive; leaders:", 151 {name for name in scc if grammar.rules[name].leader}, 152 ) 153 else: 154 name = next(iter(scc)) 155 if name in gen.first_graph[name]: 156 print(" # Left-recursive") 157 else: 158 print() 159 160 if args.verbose: 161 dt = t1 - t0 162 diag = tokenizer.diagnose() 163 nlines = diag.end[0] 164 if diag.type == token.ENDMARKER: 165 nlines -= 1 166 print(f"Total time: {dt:.3f} sec; {nlines} lines", end="") 167 if dt: 168 print(f"; {nlines / dt:.0f} lines/sec") 169 else: 170 print() 171 print("Caches sizes:") 172 print(f" token array : {len(tokenizer._tokens):10}") 173 print(f" cache : {len(parser._cache):10}") 174 if not print_memstats(): 175 print("(Can't find psutil; install it for memory stats.)") 176 177 178if __name__ == "__main__": 179 if sys.version_info < (3, 8): 180 print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr) 181 sys.exit(1) 182 main() 183