• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3.8
2
3"""pegen -- PEG Generator.
4
5Search the web for PEG Parsers for reference.
6"""
7
8import argparse
9import sys
10import time
11import token
12import traceback
13
14from typing import Tuple
15
16from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator
17from pegen.validator import validate_grammar
18
19
20def generate_c_code(
21    args: argparse.Namespace,
22) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
23    from pegen.build import build_c_parser_and_generator
24
25    verbose = args.verbose
26    verbose_tokenizer = verbose >= 3
27    verbose_parser = verbose == 2 or verbose >= 4
28    try:
29        grammar, parser, tokenizer, gen = build_c_parser_and_generator(
30            args.grammar_filename,
31            args.tokens_filename,
32            args.output,
33            args.compile_extension,
34            verbose_tokenizer,
35            verbose_parser,
36            args.verbose,
37            keep_asserts_in_extension=False if args.optimized else True,
38            skip_actions=args.skip_actions,
39        )
40        return grammar, parser, tokenizer, gen
41    except Exception as err:
42        if args.verbose:
43            raise  # Show traceback
44        traceback.print_exception(err.__class__, err, None)
45        sys.stderr.write("For full traceback, use -v\n")
46        sys.exit(1)
47
48
49def generate_python_code(
50    args: argparse.Namespace,
51) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
52    from pegen.build import build_python_parser_and_generator
53
54    verbose = args.verbose
55    verbose_tokenizer = verbose >= 3
56    verbose_parser = verbose == 2 or verbose >= 4
57    try:
58        grammar, parser, tokenizer, gen = build_python_parser_and_generator(
59            args.grammar_filename,
60            args.output,
61            verbose_tokenizer,
62            verbose_parser,
63            skip_actions=args.skip_actions,
64        )
65        return grammar, parser, tokenizer, gen
66    except Exception as err:
67        if args.verbose:
68            raise  # Show traceback
69        traceback.print_exception(err.__class__, err, None)
70        sys.stderr.write("For full traceback, use -v\n")
71        sys.exit(1)
72
73
74argparser = argparse.ArgumentParser(
75    prog="pegen", description="Experimental PEG-like parser generator"
76)
77argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar")
78argparser.add_argument(
79    "-v",
80    "--verbose",
81    action="count",
82    default=0,
83    help="Print timing stats; repeat for more debug output",
84)
85subparsers = argparser.add_subparsers(help="target language for the generated code")
86
87c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython")
88c_parser.set_defaults(func=generate_c_code)
89c_parser.add_argument("grammar_filename", help="Grammar description")
90c_parser.add_argument("tokens_filename", help="Tokens description")
91c_parser.add_argument(
92    "-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser"
93)
94c_parser.add_argument(
95    "--compile-extension",
96    action="store_true",
97    help="Compile generated C code into an extension module",
98)
99c_parser.add_argument(
100    "--optimized", action="store_true", help="Compile the extension in optimized mode"
101)
102c_parser.add_argument(
103    "--skip-actions", action="store_true", help="Suppress code emission for rule actions",
104)
105
106python_parser = subparsers.add_parser("python", help="Generate Python code")
107python_parser.set_defaults(func=generate_python_code)
108python_parser.add_argument("grammar_filename", help="Grammar description")
109python_parser.add_argument(
110    "-o",
111    "--output",
112    metavar="OUT",
113    default="parse.py",
114    help="Where to write the generated parser",
115)
116python_parser.add_argument(
117    "--skip-actions", action="store_true", help="Suppress code emission for rule actions",
118)
119
120
121def main() -> None:
122    from pegen.testutil import print_memstats
123
124    args = argparser.parse_args()
125    if "func" not in args:
126        argparser.error("Must specify the target language mode ('c' or 'python')")
127
128    t0 = time.time()
129    grammar, parser, tokenizer, gen = args.func(args)
130    t1 = time.time()
131
132    validate_grammar(grammar)
133
134    if not args.quiet:
135        if args.verbose:
136            print("Raw Grammar:")
137            for line in repr(grammar).splitlines():
138                print(" ", line)
139
140        print("Clean Grammar:")
141        for line in str(grammar).splitlines():
142            print(" ", line)
143
144    if args.verbose:
145        print("First Graph:")
146        for src, dsts in gen.first_graph.items():
147            print(f"  {src} -> {', '.join(dsts)}")
148        print("First SCCS:")
149        for scc in gen.first_sccs:
150            print(" ", scc, end="")
151            if len(scc) > 1:
152                print(
153                    "  # Indirectly left-recursive; leaders:",
154                    {name for name in scc if grammar.rules[name].leader},
155                )
156            else:
157                name = next(iter(scc))
158                if name in gen.first_graph[name]:
159                    print("  # Left-recursive")
160                else:
161                    print()
162
163    if args.verbose:
164        dt = t1 - t0
165        diag = tokenizer.diagnose()
166        nlines = diag.end[0]
167        if diag.type == token.ENDMARKER:
168            nlines -= 1
169        print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
170        if dt:
171            print(f"; {nlines / dt:.0f} lines/sec")
172        else:
173            print()
174        print("Caches sizes:")
175        print(f"  token array : {len(tokenizer._tokens):10}")
176        print(f"        cache : {len(parser._cache):10}")
177        if not print_memstats():
178            print("(Can't find psutil; install it for memory stats.)")
179
180
181if __name__ == "__main__":
182    if sys.version_info < (3, 8):
183        print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr)
184        sys.exit(1)
185    main()
186