1import importlib.util 2import io 3import os 4import pathlib 5import sys 6import textwrap 7import token 8import tokenize 9from typing import IO, Any, Dict, Final, Optional, Type, cast 10 11from pegen.build import compile_c_extension 12from pegen.c_generator import CParserGenerator 13from pegen.grammar import Grammar 14from pegen.grammar_parser import GeneratedParser as GrammarParser 15from pegen.parser import Parser 16from pegen.python_generator import PythonParserGenerator 17from pegen.tokenizer import Tokenizer 18 19ALL_TOKENS = token.tok_name 20EXACT_TOKENS = token.EXACT_TOKEN_TYPES 21NON_EXACT_TOKENS = { 22 name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values() 23} 24 25 26def generate_parser(grammar: Grammar) -> Type[Parser]: 27 # Generate a parser. 28 out = io.StringIO() 29 genr = PythonParserGenerator(grammar, out) 30 genr.generate("<string>") 31 32 # Load the generated parser class. 33 ns: Dict[str, Any] = {} 34 exec(out.getvalue(), ns) 35 return ns["GeneratedParser"] 36 37 38def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any: 39 # Run a parser on a file (stream). 40 tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore[arg-type] # typeshed issue #3515 41 parser = parser_class(tokenizer, verbose=verbose) 42 result = parser.start() 43 if result is None: 44 raise parser.make_syntax_error("invalid syntax") 45 return result 46 47 48def parse_string( 49 source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False 50) -> Any: 51 # Run the parser on a string. 52 if dedent: 53 source = textwrap.dedent(source) 54 file = io.StringIO(source) 55 return run_parser(file, parser_class, verbose=verbose) # type: ignore[arg-type] # typeshed issue #3515 56 57 58def make_parser(source: str) -> Type[Parser]: 59 # Combine parse_string() and generate_parser(). 60 grammar = parse_string(source, GrammarParser) 61 return generate_parser(grammar) 62 63 64def import_file(full_name: str, path: str) -> Any: 65 """Import a python module from a path""" 66 67 spec = importlib.util.spec_from_file_location(full_name, path) 68 assert spec is not None 69 mod = importlib.util.module_from_spec(spec) 70 71 # We assume this is not None and has an exec_module() method. 72 # See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading 73 loader = cast(Any, spec.loader) 74 loader.exec_module(mod) 75 return mod 76 77 78def generate_c_parser_source(grammar: Grammar) -> str: 79 out = io.StringIO() 80 genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out) 81 genr.generate("<string>") 82 return out.getvalue() 83 84 85def generate_parser_c_extension( 86 grammar: Grammar, 87 path: pathlib.PurePath, 88 debug: bool = False, 89 library_dir: Optional[str] = None, 90) -> Any: 91 """Generate a parser c extension for the given grammar in the given path 92 93 Returns a module object with a parse_string() method. 94 TODO: express that using a Protocol. 95 """ 96 # Make sure that the working directory is empty: reusing non-empty temporary 97 # directories when generating extensions can lead to segmentation faults. 98 # Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more 99 # context. 100 assert not os.listdir(path) 101 source = path / "parse.c" 102 with open(source, "w", encoding="utf-8") as file: 103 genr = CParserGenerator( 104 grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug 105 ) 106 genr.generate("parse.c") 107 compile_c_extension( 108 str(source), 109 build_dir=str(path), 110 # Significant test_peg_generator speedups 111 disable_optimization=True, 112 library_dir=library_dir, 113 ) 114 115 116def print_memstats() -> bool: 117 MiB: Final = 2**20 118 try: 119 import psutil 120 except ImportError: 121 return False 122 print("Memory stats:") 123 process = psutil.Process() 124 meminfo = process.memory_info() 125 res = {} 126 res["rss"] = meminfo.rss / MiB 127 res["vms"] = meminfo.vms / MiB 128 if sys.platform == "win32": 129 res["maxrss"] = meminfo.peak_wset / MiB 130 else: 131 # See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process 132 import resource # Since it doesn't exist on Windows. 133 134 rusage = resource.getrusage(resource.RUSAGE_SELF) 135 if sys.platform == "darwin": 136 factor = 1 137 else: 138 factor = 1024 # Linux 139 res["maxrss"] = rusage.ru_maxrss * factor / MiB 140 for key, value in res.items(): 141 print(f" {key:12.12s}: {value:10.0f} MiB") 142 return True 143