• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3.8
2
3"""pegen -- PEG Generator.
4
5Search the web for PEG Parsers for reference.
6"""
7
8import argparse
9import sys
10import time
11import token
12import traceback
13from typing import Tuple
14
15from pegen.grammar import Grammar
16from pegen.parser import Parser
17from pegen.parser_generator import ParserGenerator
18from pegen.tokenizer import Tokenizer
19from pegen.validator import validate_grammar
20
21
22def generate_c_code(
23    args: argparse.Namespace,
24) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
25    from pegen.build import build_c_parser_and_generator
26
27    verbose = args.verbose
28    verbose_tokenizer = verbose >= 3
29    verbose_parser = verbose == 2 or verbose >= 4
30    try:
31        grammar, parser, tokenizer, gen = build_c_parser_and_generator(
32            args.grammar_filename,
33            args.tokens_filename,
34            args.output,
35            args.compile_extension,
36            verbose_tokenizer,
37            verbose_parser,
38            args.verbose,
39            keep_asserts_in_extension=False if args.optimized else True,
40            skip_actions=args.skip_actions,
41        )
42        return grammar, parser, tokenizer, gen
43    except Exception as err:
44        if args.verbose:
45            raise  # Show traceback
46        traceback.print_exception(err.__class__, err, None)
47        sys.stderr.write("For full traceback, use -v\n")
48        sys.exit(1)
49
50
51def generate_python_code(
52    args: argparse.Namespace,
53) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
54    from pegen.build import build_python_parser_and_generator
55
56    verbose = args.verbose
57    verbose_tokenizer = verbose >= 3
58    verbose_parser = verbose == 2 or verbose >= 4
59    try:
60        grammar, parser, tokenizer, gen = build_python_parser_and_generator(
61            args.grammar_filename,
62            args.output,
63            verbose_tokenizer,
64            verbose_parser,
65            skip_actions=args.skip_actions,
66        )
67        return grammar, parser, tokenizer, gen
68    except Exception as err:
69        if args.verbose:
70            raise  # Show traceback
71        traceback.print_exception(err.__class__, err, None)
72        sys.stderr.write("For full traceback, use -v\n")
73        sys.exit(1)
74
75
76argparser = argparse.ArgumentParser(
77    prog="pegen", description="Experimental PEG-like parser generator"
78)
79argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar")
80argparser.add_argument(
81    "-v",
82    "--verbose",
83    action="count",
84    default=0,
85    help="Print timing stats; repeat for more debug output",
86)
87subparsers = argparser.add_subparsers(help="target language for the generated code")
88
89c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython")
90c_parser.set_defaults(func=generate_c_code)
91c_parser.add_argument("grammar_filename", help="Grammar description")
92c_parser.add_argument("tokens_filename", help="Tokens description")
93c_parser.add_argument(
94    "-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser"
95)
96c_parser.add_argument(
97    "--compile-extension",
98    action="store_true",
99    help="Compile generated C code into an extension module",
100)
101c_parser.add_argument(
102    "--optimized", action="store_true", help="Compile the extension in optimized mode"
103)
104c_parser.add_argument(
105    "--skip-actions",
106    action="store_true",
107    help="Suppress code emission for rule actions",
108)
109
110python_parser = subparsers.add_parser("python", help="Generate Python code")
111python_parser.set_defaults(func=generate_python_code)
112python_parser.add_argument("grammar_filename", help="Grammar description")
113python_parser.add_argument(
114    "-o",
115    "--output",
116    metavar="OUT",
117    default="parse.py",
118    help="Where to write the generated parser",
119)
120python_parser.add_argument(
121    "--skip-actions",
122    action="store_true",
123    help="Suppress code emission for rule actions",
124)
125
126
127def main() -> None:
128    from pegen.testutil import print_memstats
129
130    args = argparser.parse_args()
131    if "func" not in args:
132        argparser.error("Must specify the target language mode ('c' or 'python')")
133
134    t0 = time.time()
135    grammar, parser, tokenizer, gen = args.func(args)
136    t1 = time.time()
137
138    validate_grammar(grammar)
139
140    if not args.quiet:
141        if args.verbose:
142            print("Raw Grammar:")
143            for line in repr(grammar).splitlines():
144                print(" ", line)
145
146        print("Clean Grammar:")
147        for line in str(grammar).splitlines():
148            print(" ", line)
149
150    if args.verbose:
151        print("First Graph:")
152        for src, dsts in gen.first_graph.items():
153            print(f"  {src} -> {', '.join(dsts)}")
154        print("First SCCS:")
155        for scc in gen.first_sccs:
156            print(" ", scc, end="")
157            if len(scc) > 1:
158                print(
159                    "  # Indirectly left-recursive; leaders:",
160                    {name for name in scc if grammar.rules[name].leader},
161                )
162            else:
163                name = next(iter(scc))
164                if name in gen.first_graph[name]:
165                    print("  # Left-recursive")
166                else:
167                    print()
168
169    if args.verbose:
170        dt = t1 - t0
171        diag = tokenizer.diagnose()
172        nlines = diag.end[0]
173        if diag.type == token.ENDMARKER:
174            nlines -= 1
175        print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
176        if dt:
177            print(f"; {nlines / dt:.0f} lines/sec")
178        else:
179            print()
180        print("Caches sizes:")
181        print(f"  token array : {len(tokenizer._tokens):10}")
182        print(f"        cache : {len(parser._cache):10}")
183        if not print_memstats():
184            print("(Can't find psutil; install it for memory stats.)")
185
186
187if __name__ == "__main__":
188    if sys.version_info < (3, 8):
189        print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr)
190        sys.exit(1)
191    main()
192