• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import pathlib
2import shutil
3import tokenize
4import sysconfig
5import tempfile
6import itertools
7
8from typing import Optional, Tuple, List, IO, Set, Dict
9
10from pegen.c_generator import CParserGenerator
11from pegen.grammar import Grammar
12from pegen.grammar_parser import GeneratedParser as GrammarParser
13from pegen.parser import Parser
14from pegen.parser_generator import ParserGenerator
15from pegen.python_generator import PythonParserGenerator
16from pegen.tokenizer import Tokenizer
17
18MOD_DIR = pathlib.Path(__file__).resolve().parent
19
20TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
21
22
23def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
24    flags = sysconfig.get_config_var(compiler_flags)
25    py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
26    if flags is None or py_flags_nodist is None:
27        return []
28    return f"{flags} {py_flags_nodist}".split()
29
30
31def compile_c_extension(
32    generated_source_path: str,
33    build_dir: Optional[str] = None,
34    verbose: bool = False,
35    keep_asserts: bool = True,
36) -> str:
37    """Compile the generated source for a parser generator into an extension module.
38
39    The extension module will be generated in the same directory as the provided path
40    for the generated source, with the same basename (in addition to extension module
41    metadata). For example, for the source mydir/parser.c the generated extension
42    in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so.
43
44    If *build_dir* is provided, that path will be used as the temporary build directory
45    of distutils (this is useful in case you want to use a temporary directory).
46    """
47    import distutils.log
48    from distutils.core import Distribution, Extension
49    from distutils.command.clean import clean  # type: ignore
50    from distutils.command.build_ext import build_ext  # type: ignore
51    from distutils.tests.support import fixup_build_ext  # type: ignore
52
53    if verbose:
54        distutils.log.set_verbosity(distutils.log.DEBUG)
55
56    source_file_path = pathlib.Path(generated_source_path)
57    extension_name = source_file_path.stem
58    extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
59    extra_compile_args.append("-DPy_BUILD_CORE_MODULE")
60    # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c
61    extra_compile_args.append('-D_Py_TEST_PEGEN')
62    extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
63    if keep_asserts:
64        extra_compile_args.append("-UNDEBUG")
65    extension = [
66        Extension(
67            extension_name,
68            sources=[
69                str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
70                str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
71                str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
72                str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
73                str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
74                str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
75                generated_source_path,
76            ],
77            include_dirs=[
78                str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
79                str(MOD_DIR.parent.parent.parent / "Parser"),
80            ],
81            extra_compile_args=extra_compile_args,
82            extra_link_args=extra_link_args,
83        )
84    ]
85    dist = Distribution({"name": extension_name, "ext_modules": extension})
86    cmd = build_ext(dist)
87    fixup_build_ext(cmd)
88    cmd.inplace = True
89    if build_dir:
90        cmd.build_temp = build_dir
91        cmd.build_lib = build_dir
92    cmd.ensure_finalized()
93    cmd.run()
94
95    extension_path = source_file_path.parent / cmd.get_ext_filename(extension_name)
96    shutil.move(cmd.get_ext_fullpath(extension_name), extension_path)
97
98    cmd = clean(dist)
99    cmd.finalize_options()
100    cmd.run()
101
102    return extension_path
103
104
105def build_parser(
106    grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
107) -> Tuple[Grammar, Parser, Tokenizer]:
108    with open(grammar_file) as file:
109        tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
110        parser = GrammarParser(tokenizer, verbose=verbose_parser)
111        grammar = parser.start()
112
113        if not grammar:
114            raise parser.make_syntax_error(grammar_file)
115
116    return grammar, parser, tokenizer
117
118
119def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions:
120    all_tokens = {}
121    exact_tokens = {}
122    non_exact_tokens = set()
123    numbers = itertools.count(0)
124
125    for line in tokens:
126        line = line.strip()
127
128        if not line or line.startswith("#"):
129            continue
130
131        pieces = line.split()
132        index = next(numbers)
133
134        if len(pieces) == 1:
135            (token,) = pieces
136            non_exact_tokens.add(token)
137            all_tokens[index] = token
138        elif len(pieces) == 2:
139            token, op = pieces
140            exact_tokens[op.strip("'")] = index
141            all_tokens[index] = token
142        else:
143            raise ValueError(f"Unexpected line found in Tokens file: {line}")
144
145    return all_tokens, exact_tokens, non_exact_tokens
146
147
148def build_c_generator(
149    grammar: Grammar,
150    grammar_file: str,
151    tokens_file: str,
152    output_file: str,
153    compile_extension: bool = False,
154    verbose_c_extension: bool = False,
155    keep_asserts_in_extension: bool = True,
156    skip_actions: bool = False,
157) -> ParserGenerator:
158    with open(tokens_file, "r") as tok_file:
159        all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
160    with open(output_file, "w") as file:
161        gen: ParserGenerator = CParserGenerator(
162            grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions
163        )
164        gen.generate(grammar_file)
165
166    if compile_extension:
167        with tempfile.TemporaryDirectory() as build_dir:
168            compile_c_extension(
169                output_file,
170                build_dir=build_dir,
171                verbose=verbose_c_extension,
172                keep_asserts=keep_asserts_in_extension,
173            )
174    return gen
175
176
177def build_python_generator(
178    grammar: Grammar, grammar_file: str, output_file: str, skip_actions: bool = False,
179) -> ParserGenerator:
180    with open(output_file, "w") as file:
181        gen: ParserGenerator = PythonParserGenerator(grammar, file)  # TODO: skip_actions
182        gen.generate(grammar_file)
183    return gen
184
185
186def build_c_parser_and_generator(
187    grammar_file: str,
188    tokens_file: str,
189    output_file: str,
190    compile_extension: bool = False,
191    verbose_tokenizer: bool = False,
192    verbose_parser: bool = False,
193    verbose_c_extension: bool = False,
194    keep_asserts_in_extension: bool = True,
195    skip_actions: bool = False,
196) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
197    """Generate rules, C parser, tokenizer, parser generator for a given grammar
198
199    Args:
200        grammar_file (string): Path for the grammar file
201        tokens_file (string): Path for the tokens file
202        output_file (string): Path for the output file
203        compile_extension (bool, optional): Whether to compile the C extension.
204          Defaults to False.
205        verbose_tokenizer (bool, optional): Whether to display additional output
206          when generating the tokenizer. Defaults to False.
207        verbose_parser (bool, optional): Whether to display additional output
208          when generating the parser. Defaults to False.
209        verbose_c_extension (bool, optional): Whether to display additional
210          output when compiling the C extension . Defaults to False.
211        keep_asserts_in_extension (bool, optional): Whether to keep the assert statements
212          when compiling the extension module. Defaults to True.
213        skip_actions (bool, optional): Whether to pretend no rule has any actions.
214    """
215    grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
216    gen = build_c_generator(
217        grammar,
218        grammar_file,
219        tokens_file,
220        output_file,
221        compile_extension,
222        verbose_c_extension,
223        keep_asserts_in_extension,
224        skip_actions=skip_actions,
225    )
226
227    return grammar, parser, tokenizer, gen
228
229
230def build_python_parser_and_generator(
231    grammar_file: str,
232    output_file: str,
233    verbose_tokenizer: bool = False,
234    verbose_parser: bool = False,
235    skip_actions: bool = False,
236) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
237    """Generate rules, python parser, tokenizer, parser generator for a given grammar
238
239    Args:
240        grammar_file (string): Path for the grammar file
241        output_file (string): Path for the output file
242        verbose_tokenizer (bool, optional): Whether to display additional output
243          when generating the tokenizer. Defaults to False.
244        verbose_parser (bool, optional): Whether to display additional output
245          when generating the parser. Defaults to False.
246        skip_actions (bool, optional): Whether to pretend no rule has any actions.
247    """
248    grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
249    gen = build_python_generator(grammar, grammar_file, output_file, skip_actions=skip_actions,)
250    return grammar, parser, tokenizer, gen
251