1import importlib.util 2import io 3import os 4import pathlib 5import sys 6import textwrap 7import tokenize 8import token 9 10from typing import Any, cast, Dict, IO, Type, Final 11 12from pegen.build import compile_c_extension 13from pegen.c_generator import CParserGenerator 14from pegen.grammar import Grammar 15from pegen.grammar_parser import GeneratedParser as GrammarParser 16from pegen.parser import Parser 17from pegen.python_generator import PythonParserGenerator 18from pegen.tokenizer import Tokenizer 19 20ALL_TOKENS = token.tok_name 21EXACT_TOKENS = token.EXACT_TOKEN_TYPES # type: ignore 22NON_EXACT_TOKENS = { 23 name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values() 24} 25 26 27def generate_parser(grammar: Grammar) -> Type[Parser]: 28 # Generate a parser. 29 out = io.StringIO() 30 genr = PythonParserGenerator(grammar, out) 31 genr.generate("<string>") 32 33 # Load the generated parser class. 34 ns: Dict[str, Any] = {} 35 exec(out.getvalue(), ns) 36 return ns["GeneratedParser"] 37 38 39def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any: 40 # Run a parser on a file (stream). 41 tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore # typeshed issue #3515 42 parser = parser_class(tokenizer, verbose=verbose) 43 result = parser.start() 44 if result is None: 45 raise parser.make_syntax_error() 46 return result 47 48 49def parse_string( 50 source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False 51) -> Any: 52 # Run the parser on a string. 53 if dedent: 54 source = textwrap.dedent(source) 55 file = io.StringIO(source) 56 return run_parser(file, parser_class, verbose=verbose) # type: ignore # typeshed issue #3515 57 58 59def make_parser(source: str) -> Type[Parser]: 60 # Combine parse_string() and generate_parser(). 61 grammar = parse_string(source, GrammarParser) 62 return generate_parser(grammar) 63 64 65def import_file(full_name: str, path: str) -> Any: 66 """Import a python module from a path""" 67 68 spec = importlib.util.spec_from_file_location(full_name, path) 69 mod = importlib.util.module_from_spec(spec) 70 71 # We assume this is not None and has an exec_module() method. 72 # See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading 73 loader = cast(Any, spec.loader) 74 loader.exec_module(mod) 75 return mod 76 77 78def generate_c_parser_source(grammar: Grammar) -> str: 79 out = io.StringIO() 80 genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out) 81 genr.generate("<string>") 82 return out.getvalue() 83 84 85def generate_parser_c_extension( 86 grammar: Grammar, path: pathlib.PurePath, debug: bool = False 87) -> Any: 88 """Generate a parser c extension for the given grammar in the given path 89 90 Returns a module object with a parse_string() method. 91 TODO: express that using a Protocol. 92 """ 93 # Make sure that the working directory is empty: reusing non-empty temporary 94 # directories when generating extensions can lead to segmentation faults. 95 # Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more 96 # context. 97 assert not os.listdir(path) 98 source = path / "parse.c" 99 with open(source, "w", encoding="utf-8") as file: 100 genr = CParserGenerator( 101 grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug 102 ) 103 genr.generate("parse.c") 104 compile_c_extension(str(source), build_dir=str(path)) 105 106 107def print_memstats() -> bool: 108 MiB: Final = 2 ** 20 109 try: 110 import psutil # type: ignore 111 except ImportError: 112 return False 113 print("Memory stats:") 114 process = psutil.Process() 115 meminfo = process.memory_info() 116 res = {} 117 res["rss"] = meminfo.rss / MiB 118 res["vms"] = meminfo.vms / MiB 119 if sys.platform == "win32": 120 res["maxrss"] = meminfo.peak_wset / MiB 121 else: 122 # See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process 123 import resource # Since it doesn't exist on Windows. 124 125 rusage = resource.getrusage(resource.RUSAGE_SELF) 126 if sys.platform == "darwin": 127 factor = 1 128 else: 129 factor = 1024 # Linux 130 res["maxrss"] = rusage.ru_maxrss * factor / MiB 131 for key, value in res.items(): 132 print(f" {key:12.12s}: {value:10.0f} MiB") 133 return True 134