1import pathlib 2import shutil 3import tokenize 4import sysconfig 5import tempfile 6import itertools 7 8from typing import Optional, Tuple, List, IO, Set, Dict 9 10from pegen.c_generator import CParserGenerator 11from pegen.grammar import Grammar 12from pegen.grammar_parser import GeneratedParser as GrammarParser 13from pegen.parser import Parser 14from pegen.parser_generator import ParserGenerator 15from pegen.python_generator import PythonParserGenerator 16from pegen.tokenizer import Tokenizer 17 18MOD_DIR = pathlib.Path(__file__).resolve().parent 19 20TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]] 21 22 23def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]: 24 flags = sysconfig.get_config_var(compiler_flags) 25 py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist) 26 if flags is None or py_flags_nodist is None: 27 return [] 28 return f"{flags} {py_flags_nodist}".split() 29 30 31def compile_c_extension( 32 generated_source_path: str, 33 build_dir: Optional[str] = None, 34 verbose: bool = False, 35 keep_asserts: bool = True, 36) -> str: 37 """Compile the generated source for a parser generator into an extension module. 38 39 The extension module will be generated in the same directory as the provided path 40 for the generated source, with the same basename (in addition to extension module 41 metadata). For example, for the source mydir/parser.c the generated extension 42 in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so. 43 44 If *build_dir* is provided, that path will be used as the temporary build directory 45 of distutils (this is useful in case you want to use a temporary directory). 46 """ 47 import distutils.log 48 from distutils.core import Distribution, Extension 49 from distutils.command.clean import clean # type: ignore 50 from distutils.command.build_ext import build_ext # type: ignore 51 from distutils.tests.support import fixup_build_ext # type: ignore 52 53 if verbose: 54 distutils.log.set_verbosity(distutils.log.DEBUG) 55 56 source_file_path = pathlib.Path(generated_source_path) 57 extension_name = source_file_path.stem 58 extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST") 59 extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST") 60 if keep_asserts: 61 extra_compile_args.append("-UNDEBUG") 62 extension = [ 63 Extension( 64 extension_name, 65 sources=[ 66 str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"), 67 str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"), 68 str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"), 69 str(MOD_DIR.parent.parent.parent / "Parser" / "pegen" / "pegen.c"), 70 str(MOD_DIR.parent.parent.parent / "Parser" / "pegen" / "parse_string.c"), 71 str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"), 72 generated_source_path, 73 ], 74 include_dirs=[ 75 str(MOD_DIR.parent.parent.parent / "Include" / "internal"), 76 str(MOD_DIR.parent.parent.parent / "Parser"), 77 str(MOD_DIR.parent.parent.parent / "Parser" / "pegen"), 78 ], 79 extra_compile_args=extra_compile_args, 80 extra_link_args=extra_link_args, 81 ) 82 ] 83 dist = Distribution({"name": extension_name, "ext_modules": extension}) 84 cmd = build_ext(dist) 85 fixup_build_ext(cmd) 86 cmd.inplace = True 87 if build_dir: 88 cmd.build_temp = build_dir 89 cmd.build_lib = build_dir 90 cmd.ensure_finalized() 91 cmd.run() 92 93 extension_path = source_file_path.parent / cmd.get_ext_filename(extension_name) 94 shutil.move(cmd.get_ext_fullpath(extension_name), extension_path) 95 96 cmd = clean(dist) 97 cmd.finalize_options() 98 cmd.run() 99 100 return extension_path 101 102 103def build_parser( 104 grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False 105) -> Tuple[Grammar, Parser, Tokenizer]: 106 with open(grammar_file) as file: 107 tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer) 108 parser = GrammarParser(tokenizer, verbose=verbose_parser) 109 grammar = parser.start() 110 111 if not grammar: 112 raise parser.make_syntax_error(grammar_file) 113 114 return grammar, parser, tokenizer 115 116 117def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions: 118 all_tokens = {} 119 exact_tokens = {} 120 non_exact_tokens = set() 121 numbers = itertools.count(0) 122 123 for line in tokens: 124 line = line.strip() 125 126 if not line or line.startswith("#"): 127 continue 128 129 pieces = line.split() 130 index = next(numbers) 131 132 if len(pieces) == 1: 133 (token,) = pieces 134 non_exact_tokens.add(token) 135 all_tokens[index] = token 136 elif len(pieces) == 2: 137 token, op = pieces 138 exact_tokens[op.strip("'")] = index 139 all_tokens[index] = token 140 else: 141 raise ValueError(f"Unexpected line found in Tokens file: {line}") 142 143 return all_tokens, exact_tokens, non_exact_tokens 144 145 146def build_c_generator( 147 grammar: Grammar, 148 grammar_file: str, 149 tokens_file: str, 150 output_file: str, 151 compile_extension: bool = False, 152 verbose_c_extension: bool = False, 153 keep_asserts_in_extension: bool = True, 154 skip_actions: bool = False, 155) -> ParserGenerator: 156 with open(tokens_file, "r") as tok_file: 157 all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file) 158 with open(output_file, "w") as file: 159 gen: ParserGenerator = CParserGenerator( 160 grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions 161 ) 162 gen.generate(grammar_file) 163 164 if compile_extension: 165 with tempfile.TemporaryDirectory() as build_dir: 166 compile_c_extension( 167 output_file, 168 build_dir=build_dir, 169 verbose=verbose_c_extension, 170 keep_asserts=keep_asserts_in_extension, 171 ) 172 return gen 173 174 175def build_python_generator( 176 grammar: Grammar, grammar_file: str, output_file: str, skip_actions: bool = False, 177) -> ParserGenerator: 178 with open(output_file, "w") as file: 179 gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions 180 gen.generate(grammar_file) 181 return gen 182 183 184def build_c_parser_and_generator( 185 grammar_file: str, 186 tokens_file: str, 187 output_file: str, 188 compile_extension: bool = False, 189 verbose_tokenizer: bool = False, 190 verbose_parser: bool = False, 191 verbose_c_extension: bool = False, 192 keep_asserts_in_extension: bool = True, 193 skip_actions: bool = False, 194) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 195 """Generate rules, C parser, tokenizer, parser generator for a given grammar 196 197 Args: 198 grammar_file (string): Path for the grammar file 199 tokens_file (string): Path for the tokens file 200 output_file (string): Path for the output file 201 compile_extension (bool, optional): Whether to compile the C extension. 202 Defaults to False. 203 verbose_tokenizer (bool, optional): Whether to display additional output 204 when generating the tokenizer. Defaults to False. 205 verbose_parser (bool, optional): Whether to display additional output 206 when generating the parser. Defaults to False. 207 verbose_c_extension (bool, optional): Whether to display additional 208 output when compiling the C extension . Defaults to False. 209 keep_asserts_in_extension (bool, optional): Whether to keep the assert statements 210 when compiling the extension module. Defaults to True. 211 skip_actions (bool, optional): Whether to pretend no rule has any actions. 212 """ 213 grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) 214 gen = build_c_generator( 215 grammar, 216 grammar_file, 217 tokens_file, 218 output_file, 219 compile_extension, 220 verbose_c_extension, 221 keep_asserts_in_extension, 222 skip_actions=skip_actions, 223 ) 224 225 return grammar, parser, tokenizer, gen 226 227 228def build_python_parser_and_generator( 229 grammar_file: str, 230 output_file: str, 231 verbose_tokenizer: bool = False, 232 verbose_parser: bool = False, 233 skip_actions: bool = False, 234) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 235 """Generate rules, python parser, tokenizer, parser generator for a given grammar 236 237 Args: 238 grammar_file (string): Path for the grammar file 239 output_file (string): Path for the output file 240 verbose_tokenizer (bool, optional): Whether to display additional output 241 when generating the tokenizer. Defaults to False. 242 verbose_parser (bool, optional): Whether to display additional output 243 when generating the parser. Defaults to False. 244 skip_actions (bool, optional): Whether to pretend no rule has any actions. 245 """ 246 grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) 247 gen = build_python_generator(grammar, grammar_file, output_file, skip_actions=skip_actions,) 248 return grammar, parser, tokenizer, gen 249