1import pathlib 2import shutil 3import tokenize 4import sysconfig 5import tempfile 6import itertools 7 8from typing import Optional, Tuple, List, IO, Set, Dict 9 10from pegen.c_generator import CParserGenerator 11from pegen.grammar import Grammar 12from pegen.grammar_parser import GeneratedParser as GrammarParser 13from pegen.parser import Parser 14from pegen.parser_generator import ParserGenerator 15from pegen.python_generator import PythonParserGenerator 16from pegen.tokenizer import Tokenizer 17 18MOD_DIR = pathlib.Path(__file__).resolve().parent 19 20TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]] 21 22 23def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]: 24 flags = sysconfig.get_config_var(compiler_flags) 25 py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist) 26 if flags is None or py_flags_nodist is None: 27 return [] 28 return f"{flags} {py_flags_nodist}".split() 29 30 31def compile_c_extension( 32 generated_source_path: str, 33 build_dir: Optional[str] = None, 34 verbose: bool = False, 35 keep_asserts: bool = True, 36) -> str: 37 """Compile the generated source for a parser generator into an extension module. 38 39 The extension module will be generated in the same directory as the provided path 40 for the generated source, with the same basename (in addition to extension module 41 metadata). For example, for the source mydir/parser.c the generated extension 42 in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so. 43 44 If *build_dir* is provided, that path will be used as the temporary build directory 45 of distutils (this is useful in case you want to use a temporary directory). 46 """ 47 import distutils.log 48 from distutils.core import Distribution, Extension 49 from distutils.command.clean import clean # type: ignore 50 from distutils.command.build_ext import build_ext # type: ignore 51 from distutils.tests.support import fixup_build_ext # type: ignore 52 53 if verbose: 54 distutils.log.set_verbosity(distutils.log.DEBUG) 55 56 source_file_path = pathlib.Path(generated_source_path) 57 extension_name = source_file_path.stem 58 extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST") 59 extra_compile_args.append("-DPy_BUILD_CORE_MODULE") 60 # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c 61 extra_compile_args.append('-D_Py_TEST_PEGEN') 62 extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST") 63 if keep_asserts: 64 extra_compile_args.append("-UNDEBUG") 65 extension = [ 66 Extension( 67 extension_name, 68 sources=[ 69 str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"), 70 str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"), 71 str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"), 72 str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"), 73 str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"), 74 str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"), 75 generated_source_path, 76 ], 77 include_dirs=[ 78 str(MOD_DIR.parent.parent.parent / "Include" / "internal"), 79 str(MOD_DIR.parent.parent.parent / "Parser"), 80 ], 81 extra_compile_args=extra_compile_args, 82 extra_link_args=extra_link_args, 83 ) 84 ] 85 dist = Distribution({"name": extension_name, "ext_modules": extension}) 86 cmd = build_ext(dist) 87 fixup_build_ext(cmd) 88 cmd.inplace = True 89 if build_dir: 90 cmd.build_temp = build_dir 91 cmd.build_lib = build_dir 92 cmd.ensure_finalized() 93 cmd.run() 94 95 extension_path = source_file_path.parent / cmd.get_ext_filename(extension_name) 96 shutil.move(cmd.get_ext_fullpath(extension_name), extension_path) 97 98 cmd = clean(dist) 99 cmd.finalize_options() 100 cmd.run() 101 102 return extension_path 103 104 105def build_parser( 106 grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False 107) -> Tuple[Grammar, Parser, Tokenizer]: 108 with open(grammar_file) as file: 109 tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer) 110 parser = GrammarParser(tokenizer, verbose=verbose_parser) 111 grammar = parser.start() 112 113 if not grammar: 114 raise parser.make_syntax_error(grammar_file) 115 116 return grammar, parser, tokenizer 117 118 119def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions: 120 all_tokens = {} 121 exact_tokens = {} 122 non_exact_tokens = set() 123 numbers = itertools.count(0) 124 125 for line in tokens: 126 line = line.strip() 127 128 if not line or line.startswith("#"): 129 continue 130 131 pieces = line.split() 132 index = next(numbers) 133 134 if len(pieces) == 1: 135 (token,) = pieces 136 non_exact_tokens.add(token) 137 all_tokens[index] = token 138 elif len(pieces) == 2: 139 token, op = pieces 140 exact_tokens[op.strip("'")] = index 141 all_tokens[index] = token 142 else: 143 raise ValueError(f"Unexpected line found in Tokens file: {line}") 144 145 return all_tokens, exact_tokens, non_exact_tokens 146 147 148def build_c_generator( 149 grammar: Grammar, 150 grammar_file: str, 151 tokens_file: str, 152 output_file: str, 153 compile_extension: bool = False, 154 verbose_c_extension: bool = False, 155 keep_asserts_in_extension: bool = True, 156 skip_actions: bool = False, 157) -> ParserGenerator: 158 with open(tokens_file, "r") as tok_file: 159 all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file) 160 with open(output_file, "w") as file: 161 gen: ParserGenerator = CParserGenerator( 162 grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions 163 ) 164 gen.generate(grammar_file) 165 166 if compile_extension: 167 with tempfile.TemporaryDirectory() as build_dir: 168 compile_c_extension( 169 output_file, 170 build_dir=build_dir, 171 verbose=verbose_c_extension, 172 keep_asserts=keep_asserts_in_extension, 173 ) 174 return gen 175 176 177def build_python_generator( 178 grammar: Grammar, grammar_file: str, output_file: str, skip_actions: bool = False, 179) -> ParserGenerator: 180 with open(output_file, "w") as file: 181 gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions 182 gen.generate(grammar_file) 183 return gen 184 185 186def build_c_parser_and_generator( 187 grammar_file: str, 188 tokens_file: str, 189 output_file: str, 190 compile_extension: bool = False, 191 verbose_tokenizer: bool = False, 192 verbose_parser: bool = False, 193 verbose_c_extension: bool = False, 194 keep_asserts_in_extension: bool = True, 195 skip_actions: bool = False, 196) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 197 """Generate rules, C parser, tokenizer, parser generator for a given grammar 198 199 Args: 200 grammar_file (string): Path for the grammar file 201 tokens_file (string): Path for the tokens file 202 output_file (string): Path for the output file 203 compile_extension (bool, optional): Whether to compile the C extension. 204 Defaults to False. 205 verbose_tokenizer (bool, optional): Whether to display additional output 206 when generating the tokenizer. Defaults to False. 207 verbose_parser (bool, optional): Whether to display additional output 208 when generating the parser. Defaults to False. 209 verbose_c_extension (bool, optional): Whether to display additional 210 output when compiling the C extension . Defaults to False. 211 keep_asserts_in_extension (bool, optional): Whether to keep the assert statements 212 when compiling the extension module. Defaults to True. 213 skip_actions (bool, optional): Whether to pretend no rule has any actions. 214 """ 215 grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) 216 gen = build_c_generator( 217 grammar, 218 grammar_file, 219 tokens_file, 220 output_file, 221 compile_extension, 222 verbose_c_extension, 223 keep_asserts_in_extension, 224 skip_actions=skip_actions, 225 ) 226 227 return grammar, parser, tokenizer, gen 228 229 230def build_python_parser_and_generator( 231 grammar_file: str, 232 output_file: str, 233 verbose_tokenizer: bool = False, 234 verbose_parser: bool = False, 235 skip_actions: bool = False, 236) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 237 """Generate rules, python parser, tokenizer, parser generator for a given grammar 238 239 Args: 240 grammar_file (string): Path for the grammar file 241 output_file (string): Path for the output file 242 verbose_tokenizer (bool, optional): Whether to display additional output 243 when generating the tokenizer. Defaults to False. 244 verbose_parser (bool, optional): Whether to display additional output 245 when generating the parser. Defaults to False. 246 skip_actions (bool, optional): Whether to pretend no rule has any actions. 247 """ 248 grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) 249 gen = build_python_generator(grammar, grammar_file, output_file, skip_actions=skip_actions,) 250 return grammar, parser, tokenizer, gen 251