• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import importlib.util
2import io
3import os
4import pathlib
5import sys
6import textwrap
7import tokenize
8import token
9
10from typing import Any, cast, Dict, IO, Type, Final
11
12from pegen.build import compile_c_extension
13from pegen.c_generator import CParserGenerator
14from pegen.grammar import Grammar
15from pegen.grammar_parser import GeneratedParser as GrammarParser
16from pegen.parser import Parser
17from pegen.python_generator import PythonParserGenerator
18from pegen.tokenizer import Tokenizer
19
20ALL_TOKENS = token.tok_name
21EXACT_TOKENS = token.EXACT_TOKEN_TYPES  # type: ignore
22NON_EXACT_TOKENS = {
23    name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
24}
25
26
27def generate_parser(grammar: Grammar) -> Type[Parser]:
28    # Generate a parser.
29    out = io.StringIO()
30    genr = PythonParserGenerator(grammar, out)
31    genr.generate("<string>")
32
33    # Load the generated parser class.
34    ns: Dict[str, Any] = {}
35    exec(out.getvalue(), ns)
36    return ns["GeneratedParser"]
37
38
39def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:
40    # Run a parser on a file (stream).
41    tokenizer = Tokenizer(tokenize.generate_tokens(file.readline))  # type: ignore # typeshed issue #3515
42    parser = parser_class(tokenizer, verbose=verbose)
43    result = parser.start()
44    if result is None:
45        raise parser.make_syntax_error()
46    return result
47
48
49def parse_string(
50    source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False
51) -> Any:
52    # Run the parser on a string.
53    if dedent:
54        source = textwrap.dedent(source)
55    file = io.StringIO(source)
56    return run_parser(file, parser_class, verbose=verbose)  # type: ignore # typeshed issue #3515
57
58
59def make_parser(source: str) -> Type[Parser]:
60    # Combine parse_string() and generate_parser().
61    grammar = parse_string(source, GrammarParser)
62    return generate_parser(grammar)
63
64
65def import_file(full_name: str, path: str) -> Any:
66    """Import a python module from a path"""
67
68    spec = importlib.util.spec_from_file_location(full_name, path)
69    mod = importlib.util.module_from_spec(spec)
70
71    # We assume this is not None and has an exec_module() method.
72    # See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading
73    loader = cast(Any, spec.loader)
74    loader.exec_module(mod)
75    return mod
76
77
78def generate_c_parser_source(grammar: Grammar) -> str:
79    out = io.StringIO()
80    genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out)
81    genr.generate("<string>")
82    return out.getvalue()
83
84
85def generate_parser_c_extension(
86    grammar: Grammar, path: pathlib.PurePath, debug: bool = False
87) -> Any:
88    """Generate a parser c extension for the given grammar in the given path
89
90    Returns a module object with a parse_string() method.
91    TODO: express that using a Protocol.
92    """
93    # Make sure that the working directory is empty: reusing non-empty temporary
94    # directories when generating extensions can lead to segmentation faults.
95    # Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more
96    # context.
97    assert not os.listdir(path)
98    source = path / "parse.c"
99    with open(source, "w", encoding="utf-8") as file:
100        genr = CParserGenerator(
101            grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug
102        )
103        genr.generate("parse.c")
104    compile_c_extension(str(source), build_dir=str(path))
105
106
107def print_memstats() -> bool:
108    MiB: Final = 2 ** 20
109    try:
110        import psutil  # type: ignore
111    except ImportError:
112        return False
113    print("Memory stats:")
114    process = psutil.Process()
115    meminfo = process.memory_info()
116    res = {}
117    res["rss"] = meminfo.rss / MiB
118    res["vms"] = meminfo.vms / MiB
119    if sys.platform == "win32":
120        res["maxrss"] = meminfo.peak_wset / MiB
121    else:
122        # See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process
123        import resource  # Since it doesn't exist on Windows.
124
125        rusage = resource.getrusage(resource.RUSAGE_SELF)
126        if sys.platform == "darwin":
127            factor = 1
128        else:
129            factor = 1024  # Linux
130        res["maxrss"] = rusage.ru_maxrss * factor / MiB
131    for key, value in res.items():
132        print(f"  {key:12.12s}: {value:10.0f} MiB")
133    return True
134