1#!/usr/bin/env python3.8 2 3"""pegen -- PEG Generator. 4 5Search the web for PEG Parsers for reference. 6""" 7 8import argparse 9import sys 10import time 11import token 12import traceback 13 14from typing import Tuple 15 16from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator 17from pegen.validator import validate_grammar 18 19 20def generate_c_code( 21 args: argparse.Namespace, 22) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 23 from pegen.build import build_c_parser_and_generator 24 25 verbose = args.verbose 26 verbose_tokenizer = verbose >= 3 27 verbose_parser = verbose == 2 or verbose >= 4 28 try: 29 grammar, parser, tokenizer, gen = build_c_parser_and_generator( 30 args.grammar_filename, 31 args.tokens_filename, 32 args.output, 33 args.compile_extension, 34 verbose_tokenizer, 35 verbose_parser, 36 args.verbose, 37 keep_asserts_in_extension=False if args.optimized else True, 38 skip_actions=args.skip_actions, 39 ) 40 return grammar, parser, tokenizer, gen 41 except Exception as err: 42 if args.verbose: 43 raise # Show traceback 44 traceback.print_exception(err.__class__, err, None) 45 sys.stderr.write("For full traceback, use -v\n") 46 sys.exit(1) 47 48 49def generate_python_code( 50 args: argparse.Namespace, 51) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 52 from pegen.build import build_python_parser_and_generator 53 54 verbose = args.verbose 55 verbose_tokenizer = verbose >= 3 56 verbose_parser = verbose == 2 or verbose >= 4 57 try: 58 grammar, parser, tokenizer, gen = build_python_parser_and_generator( 59 args.grammar_filename, 60 args.output, 61 verbose_tokenizer, 62 verbose_parser, 63 skip_actions=args.skip_actions, 64 ) 65 return grammar, parser, tokenizer, gen 66 except Exception as err: 67 if args.verbose: 68 raise # Show traceback 69 traceback.print_exception(err.__class__, err, None) 70 sys.stderr.write("For full traceback, use -v\n") 71 sys.exit(1) 72 73 74argparser = argparse.ArgumentParser( 75 prog="pegen", description="Experimental PEG-like parser generator" 76) 77argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar") 78argparser.add_argument( 79 "-v", 80 "--verbose", 81 action="count", 82 default=0, 83 help="Print timing stats; repeat for more debug output", 84) 85subparsers = argparser.add_subparsers(help="target language for the generated code") 86 87c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython") 88c_parser.set_defaults(func=generate_c_code) 89c_parser.add_argument("grammar_filename", help="Grammar description") 90c_parser.add_argument("tokens_filename", help="Tokens description") 91c_parser.add_argument( 92 "-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser" 93) 94c_parser.add_argument( 95 "--compile-extension", 96 action="store_true", 97 help="Compile generated C code into an extension module", 98) 99c_parser.add_argument( 100 "--optimized", action="store_true", help="Compile the extension in optimized mode" 101) 102c_parser.add_argument( 103 "--skip-actions", action="store_true", help="Suppress code emission for rule actions", 104) 105 106python_parser = subparsers.add_parser("python", help="Generate Python code") 107python_parser.set_defaults(func=generate_python_code) 108python_parser.add_argument("grammar_filename", help="Grammar description") 109python_parser.add_argument( 110 "-o", 111 "--output", 112 metavar="OUT", 113 default="parse.py", 114 help="Where to write the generated parser", 115) 116python_parser.add_argument( 117 "--skip-actions", action="store_true", help="Suppress code emission for rule actions", 118) 119 120 121def main() -> None: 122 from pegen.testutil import print_memstats 123 124 args = argparser.parse_args() 125 if "func" not in args: 126 argparser.error("Must specify the target language mode ('c' or 'python')") 127 128 t0 = time.time() 129 grammar, parser, tokenizer, gen = args.func(args) 130 t1 = time.time() 131 132 validate_grammar(grammar) 133 134 if not args.quiet: 135 if args.verbose: 136 print("Raw Grammar:") 137 for line in repr(grammar).splitlines(): 138 print(" ", line) 139 140 print("Clean Grammar:") 141 for line in str(grammar).splitlines(): 142 print(" ", line) 143 144 if args.verbose: 145 print("First Graph:") 146 for src, dsts in gen.first_graph.items(): 147 print(f" {src} -> {', '.join(dsts)}") 148 print("First SCCS:") 149 for scc in gen.first_sccs: 150 print(" ", scc, end="") 151 if len(scc) > 1: 152 print( 153 " # Indirectly left-recursive; leaders:", 154 {name for name in scc if grammar.rules[name].leader}, 155 ) 156 else: 157 name = next(iter(scc)) 158 if name in gen.first_graph[name]: 159 print(" # Left-recursive") 160 else: 161 print() 162 163 if args.verbose: 164 dt = t1 - t0 165 diag = tokenizer.diagnose() 166 nlines = diag.end[0] 167 if diag.type == token.ENDMARKER: 168 nlines -= 1 169 print(f"Total time: {dt:.3f} sec; {nlines} lines", end="") 170 if dt: 171 print(f"; {nlines / dt:.0f} lines/sec") 172 else: 173 print() 174 print("Caches sizes:") 175 print(f" token array : {len(tokenizer._tokens):10}") 176 print(f" cache : {len(parser._cache):10}") 177 if not print_memstats(): 178 print("(Can't find psutil; install it for memory stats.)") 179 180 181if __name__ == "__main__": 182 if sys.version_info < (3, 8): 183 print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr) 184 sys.exit(1) 185 main() 186