• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import pathlib
2import shutil
3import tokenize
4import sys
5import sysconfig
6import tempfile
7import itertools
8
9from typing import Optional, Tuple, List, IO, Set, Dict
10
11from pegen.c_generator import CParserGenerator
12from pegen.grammar import Grammar
13from pegen.grammar_parser import GeneratedParser as GrammarParser
14from pegen.parser import Parser
15from pegen.parser_generator import ParserGenerator
16from pegen.python_generator import PythonParserGenerator
17from pegen.tokenizer import Tokenizer
18
19MOD_DIR = pathlib.Path(__file__).resolve().parent
20
21TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
22
23
24def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
25    flags = sysconfig.get_config_var(compiler_flags)
26    py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
27    if flags is None or py_flags_nodist is None:
28        return []
29    return f"{flags} {py_flags_nodist}".split()
30
31
32def compile_c_extension(
33    generated_source_path: str,
34    build_dir: Optional[str] = None,
35    verbose: bool = False,
36    keep_asserts: bool = True,
37    disable_optimization: bool = True,  # Significant test_peg_generator speedup.
38) -> str:
39    """Compile the generated source for a parser generator into an extension module.
40
41    The extension module will be generated in the same directory as the provided path
42    for the generated source, with the same basename (in addition to extension module
43    metadata). For example, for the source mydir/parser.c the generated extension
44    in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so.
45
46    If *build_dir* is provided, that path will be used as the temporary build directory
47    of distutils (this is useful in case you want to use a temporary directory).
48    """
49    import distutils.log
50    from distutils.core import Distribution, Extension
51    from distutils.command.clean import clean  # type: ignore
52    from distutils.command.build_ext import build_ext  # type: ignore
53    from distutils.tests.support import fixup_build_ext  # type: ignore
54
55    if verbose:
56        distutils.log.set_verbosity(distutils.log.DEBUG)
57
58    source_file_path = pathlib.Path(generated_source_path)
59    extension_name = source_file_path.stem
60    extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
61    extra_compile_args.append("-DPy_BUILD_CORE_MODULE")
62    # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c
63    extra_compile_args.append('-D_Py_TEST_PEGEN')
64    extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
65    if keep_asserts:
66        extra_compile_args.append("-UNDEBUG")
67    if disable_optimization:
68        if sys.platform == 'win32':
69            extra_compile_args.append("/Od")
70            extra_link_args.append("/LTCG:OFF")
71        else:
72            extra_compile_args.append("-O0")
73            if sysconfig.get_config_var("GNULD") == "yes":
74                extra_link_args.append("-fno-lto")
75    extension = [
76        Extension(
77            extension_name,
78            sources=[
79                str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
80                str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
81                str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
82                str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
83                str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
84                str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
85                generated_source_path,
86            ],
87            include_dirs=[
88                str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
89                str(MOD_DIR.parent.parent.parent / "Parser"),
90            ],
91            extra_compile_args=extra_compile_args,
92            extra_link_args=extra_link_args,
93        )
94    ]
95    dist = Distribution({"name": extension_name, "ext_modules": extension})
96    cmd = build_ext(dist)
97    fixup_build_ext(cmd)
98    cmd.inplace = True
99    if build_dir:
100        cmd.build_temp = build_dir
101        cmd.build_lib = build_dir
102    cmd.ensure_finalized()
103    cmd.run()
104
105    extension_path = source_file_path.parent / cmd.get_ext_filename(extension_name)
106    shutil.move(cmd.get_ext_fullpath(extension_name), extension_path)
107
108    cmd = clean(dist)
109    cmd.finalize_options()
110    cmd.run()
111
112    return extension_path
113
114
115def build_parser(
116    grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
117) -> Tuple[Grammar, Parser, Tokenizer]:
118    with open(grammar_file) as file:
119        tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
120        parser = GrammarParser(tokenizer, verbose=verbose_parser)
121        grammar = parser.start()
122
123        if not grammar:
124            raise parser.make_syntax_error(grammar_file)
125
126    return grammar, parser, tokenizer
127
128
129def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions:
130    all_tokens = {}
131    exact_tokens = {}
132    non_exact_tokens = set()
133    numbers = itertools.count(0)
134
135    for line in tokens:
136        line = line.strip()
137
138        if not line or line.startswith("#"):
139            continue
140
141        pieces = line.split()
142        index = next(numbers)
143
144        if len(pieces) == 1:
145            (token,) = pieces
146            non_exact_tokens.add(token)
147            all_tokens[index] = token
148        elif len(pieces) == 2:
149            token, op = pieces
150            exact_tokens[op.strip("'")] = index
151            all_tokens[index] = token
152        else:
153            raise ValueError(f"Unexpected line found in Tokens file: {line}")
154
155    return all_tokens, exact_tokens, non_exact_tokens
156
157
158def build_c_generator(
159    grammar: Grammar,
160    grammar_file: str,
161    tokens_file: str,
162    output_file: str,
163    compile_extension: bool = False,
164    verbose_c_extension: bool = False,
165    keep_asserts_in_extension: bool = True,
166    skip_actions: bool = False,
167) -> ParserGenerator:
168    with open(tokens_file, "r") as tok_file:
169        all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
170    with open(output_file, "w") as file:
171        gen: ParserGenerator = CParserGenerator(
172            grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions
173        )
174        gen.generate(grammar_file)
175
176    if compile_extension:
177        with tempfile.TemporaryDirectory() as build_dir:
178            compile_c_extension(
179                output_file,
180                build_dir=build_dir,
181                verbose=verbose_c_extension,
182                keep_asserts=keep_asserts_in_extension,
183            )
184    return gen
185
186
187def build_python_generator(
188    grammar: Grammar, grammar_file: str, output_file: str, skip_actions: bool = False,
189) -> ParserGenerator:
190    with open(output_file, "w") as file:
191        gen: ParserGenerator = PythonParserGenerator(grammar, file)  # TODO: skip_actions
192        gen.generate(grammar_file)
193    return gen
194
195
196def build_c_parser_and_generator(
197    grammar_file: str,
198    tokens_file: str,
199    output_file: str,
200    compile_extension: bool = False,
201    verbose_tokenizer: bool = False,
202    verbose_parser: bool = False,
203    verbose_c_extension: bool = False,
204    keep_asserts_in_extension: bool = True,
205    skip_actions: bool = False,
206) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
207    """Generate rules, C parser, tokenizer, parser generator for a given grammar
208
209    Args:
210        grammar_file (string): Path for the grammar file
211        tokens_file (string): Path for the tokens file
212        output_file (string): Path for the output file
213        compile_extension (bool, optional): Whether to compile the C extension.
214          Defaults to False.
215        verbose_tokenizer (bool, optional): Whether to display additional output
216          when generating the tokenizer. Defaults to False.
217        verbose_parser (bool, optional): Whether to display additional output
218          when generating the parser. Defaults to False.
219        verbose_c_extension (bool, optional): Whether to display additional
220          output when compiling the C extension . Defaults to False.
221        keep_asserts_in_extension (bool, optional): Whether to keep the assert statements
222          when compiling the extension module. Defaults to True.
223        skip_actions (bool, optional): Whether to pretend no rule has any actions.
224    """
225    grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
226    gen = build_c_generator(
227        grammar,
228        grammar_file,
229        tokens_file,
230        output_file,
231        compile_extension,
232        verbose_c_extension,
233        keep_asserts_in_extension,
234        skip_actions=skip_actions,
235    )
236
237    return grammar, parser, tokenizer, gen
238
239
240def build_python_parser_and_generator(
241    grammar_file: str,
242    output_file: str,
243    verbose_tokenizer: bool = False,
244    verbose_parser: bool = False,
245    skip_actions: bool = False,
246) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
247    """Generate rules, python parser, tokenizer, parser generator for a given grammar
248
249    Args:
250        grammar_file (string): Path for the grammar file
251        output_file (string): Path for the output file
252        verbose_tokenizer (bool, optional): Whether to display additional output
253          when generating the tokenizer. Defaults to False.
254        verbose_parser (bool, optional): Whether to display additional output
255          when generating the parser. Defaults to False.
256        skip_actions (bool, optional): Whether to pretend no rule has any actions.
257    """
258    grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
259    gen = build_python_generator(grammar, grammar_file, output_file, skip_actions=skip_actions,)
260    return grammar, parser, tokenizer, gen
261