1import pathlib 2import shutil 3import tokenize 4import sys 5import sysconfig 6import tempfile 7import itertools 8 9from typing import Optional, Tuple, List, IO, Set, Dict 10 11from pegen.c_generator import CParserGenerator 12from pegen.grammar import Grammar 13from pegen.grammar_parser import GeneratedParser as GrammarParser 14from pegen.parser import Parser 15from pegen.parser_generator import ParserGenerator 16from pegen.python_generator import PythonParserGenerator 17from pegen.tokenizer import Tokenizer 18 19MOD_DIR = pathlib.Path(__file__).resolve().parent 20 21TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]] 22 23 24def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]: 25 flags = sysconfig.get_config_var(compiler_flags) 26 py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist) 27 if flags is None or py_flags_nodist is None: 28 return [] 29 return f"{flags} {py_flags_nodist}".split() 30 31 32def compile_c_extension( 33 generated_source_path: str, 34 build_dir: Optional[str] = None, 35 verbose: bool = False, 36 keep_asserts: bool = True, 37 disable_optimization: bool = True, # Significant test_peg_generator speedup. 38) -> str: 39 """Compile the generated source for a parser generator into an extension module. 40 41 The extension module will be generated in the same directory as the provided path 42 for the generated source, with the same basename (in addition to extension module 43 metadata). For example, for the source mydir/parser.c the generated extension 44 in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so. 45 46 If *build_dir* is provided, that path will be used as the temporary build directory 47 of distutils (this is useful in case you want to use a temporary directory). 48 """ 49 import distutils.log 50 from distutils.core import Distribution, Extension 51 from distutils.command.clean import clean # type: ignore 52 from distutils.command.build_ext import build_ext # type: ignore 53 from distutils.tests.support import fixup_build_ext # type: ignore 54 55 if verbose: 56 distutils.log.set_verbosity(distutils.log.DEBUG) 57 58 source_file_path = pathlib.Path(generated_source_path) 59 extension_name = source_file_path.stem 60 extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST") 61 extra_compile_args.append("-DPy_BUILD_CORE_MODULE") 62 # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c 63 extra_compile_args.append('-D_Py_TEST_PEGEN') 64 extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST") 65 if keep_asserts: 66 extra_compile_args.append("-UNDEBUG") 67 if disable_optimization: 68 if sys.platform == 'win32': 69 extra_compile_args.append("/Od") 70 extra_link_args.append("/LTCG:OFF") 71 else: 72 extra_compile_args.append("-O0") 73 if sysconfig.get_config_var("GNULD") == "yes": 74 extra_link_args.append("-fno-lto") 75 extension = [ 76 Extension( 77 extension_name, 78 sources=[ 79 str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"), 80 str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"), 81 str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"), 82 str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"), 83 str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"), 84 str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"), 85 generated_source_path, 86 ], 87 include_dirs=[ 88 str(MOD_DIR.parent.parent.parent / "Include" / "internal"), 89 str(MOD_DIR.parent.parent.parent / "Parser"), 90 ], 91 extra_compile_args=extra_compile_args, 92 extra_link_args=extra_link_args, 93 ) 94 ] 95 dist = Distribution({"name": extension_name, "ext_modules": extension}) 96 cmd = build_ext(dist) 97 fixup_build_ext(cmd) 98 cmd.inplace = True 99 if build_dir: 100 cmd.build_temp = build_dir 101 cmd.build_lib = build_dir 102 cmd.ensure_finalized() 103 cmd.run() 104 105 extension_path = source_file_path.parent / cmd.get_ext_filename(extension_name) 106 shutil.move(cmd.get_ext_fullpath(extension_name), extension_path) 107 108 cmd = clean(dist) 109 cmd.finalize_options() 110 cmd.run() 111 112 return extension_path 113 114 115def build_parser( 116 grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False 117) -> Tuple[Grammar, Parser, Tokenizer]: 118 with open(grammar_file) as file: 119 tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer) 120 parser = GrammarParser(tokenizer, verbose=verbose_parser) 121 grammar = parser.start() 122 123 if not grammar: 124 raise parser.make_syntax_error(grammar_file) 125 126 return grammar, parser, tokenizer 127 128 129def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions: 130 all_tokens = {} 131 exact_tokens = {} 132 non_exact_tokens = set() 133 numbers = itertools.count(0) 134 135 for line in tokens: 136 line = line.strip() 137 138 if not line or line.startswith("#"): 139 continue 140 141 pieces = line.split() 142 index = next(numbers) 143 144 if len(pieces) == 1: 145 (token,) = pieces 146 non_exact_tokens.add(token) 147 all_tokens[index] = token 148 elif len(pieces) == 2: 149 token, op = pieces 150 exact_tokens[op.strip("'")] = index 151 all_tokens[index] = token 152 else: 153 raise ValueError(f"Unexpected line found in Tokens file: {line}") 154 155 return all_tokens, exact_tokens, non_exact_tokens 156 157 158def build_c_generator( 159 grammar: Grammar, 160 grammar_file: str, 161 tokens_file: str, 162 output_file: str, 163 compile_extension: bool = False, 164 verbose_c_extension: bool = False, 165 keep_asserts_in_extension: bool = True, 166 skip_actions: bool = False, 167) -> ParserGenerator: 168 with open(tokens_file, "r") as tok_file: 169 all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file) 170 with open(output_file, "w") as file: 171 gen: ParserGenerator = CParserGenerator( 172 grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions 173 ) 174 gen.generate(grammar_file) 175 176 if compile_extension: 177 with tempfile.TemporaryDirectory() as build_dir: 178 compile_c_extension( 179 output_file, 180 build_dir=build_dir, 181 verbose=verbose_c_extension, 182 keep_asserts=keep_asserts_in_extension, 183 ) 184 return gen 185 186 187def build_python_generator( 188 grammar: Grammar, grammar_file: str, output_file: str, skip_actions: bool = False, 189) -> ParserGenerator: 190 with open(output_file, "w") as file: 191 gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions 192 gen.generate(grammar_file) 193 return gen 194 195 196def build_c_parser_and_generator( 197 grammar_file: str, 198 tokens_file: str, 199 output_file: str, 200 compile_extension: bool = False, 201 verbose_tokenizer: bool = False, 202 verbose_parser: bool = False, 203 verbose_c_extension: bool = False, 204 keep_asserts_in_extension: bool = True, 205 skip_actions: bool = False, 206) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 207 """Generate rules, C parser, tokenizer, parser generator for a given grammar 208 209 Args: 210 grammar_file (string): Path for the grammar file 211 tokens_file (string): Path for the tokens file 212 output_file (string): Path for the output file 213 compile_extension (bool, optional): Whether to compile the C extension. 214 Defaults to False. 215 verbose_tokenizer (bool, optional): Whether to display additional output 216 when generating the tokenizer. Defaults to False. 217 verbose_parser (bool, optional): Whether to display additional output 218 when generating the parser. Defaults to False. 219 verbose_c_extension (bool, optional): Whether to display additional 220 output when compiling the C extension . Defaults to False. 221 keep_asserts_in_extension (bool, optional): Whether to keep the assert statements 222 when compiling the extension module. Defaults to True. 223 skip_actions (bool, optional): Whether to pretend no rule has any actions. 224 """ 225 grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) 226 gen = build_c_generator( 227 grammar, 228 grammar_file, 229 tokens_file, 230 output_file, 231 compile_extension, 232 verbose_c_extension, 233 keep_asserts_in_extension, 234 skip_actions=skip_actions, 235 ) 236 237 return grammar, parser, tokenizer, gen 238 239 240def build_python_parser_and_generator( 241 grammar_file: str, 242 output_file: str, 243 verbose_tokenizer: bool = False, 244 verbose_parser: bool = False, 245 skip_actions: bool = False, 246) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 247 """Generate rules, python parser, tokenizer, parser generator for a given grammar 248 249 Args: 250 grammar_file (string): Path for the grammar file 251 output_file (string): Path for the output file 252 verbose_tokenizer (bool, optional): Whether to display additional output 253 when generating the tokenizer. Defaults to False. 254 verbose_parser (bool, optional): Whether to display additional output 255 when generating the parser. Defaults to False. 256 skip_actions (bool, optional): Whether to pretend no rule has any actions. 257 """ 258 grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) 259 gen = build_python_generator(grammar, grammar_file, output_file, skip_actions=skip_actions,) 260 return grammar, parser, tokenizer, gen 261