• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import importlib.util
2import io
3import os
4import pathlib
5import sys
6import textwrap
7import token
8import tokenize
9from typing import IO, Any, Dict, Final, Optional, Type, cast
10
11from pegen.build import compile_c_extension
12from pegen.c_generator import CParserGenerator
13from pegen.grammar import Grammar
14from pegen.grammar_parser import GeneratedParser as GrammarParser
15from pegen.parser import Parser
16from pegen.python_generator import PythonParserGenerator
17from pegen.tokenizer import Tokenizer
18
19ALL_TOKENS = token.tok_name
20EXACT_TOKENS = token.EXACT_TOKEN_TYPES
21NON_EXACT_TOKENS = {
22    name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
23}
24
25
26def generate_parser(grammar: Grammar) -> Type[Parser]:
27    # Generate a parser.
28    out = io.StringIO()
29    genr = PythonParserGenerator(grammar, out)
30    genr.generate("<string>")
31
32    # Load the generated parser class.
33    ns: Dict[str, Any] = {}
34    exec(out.getvalue(), ns)
35    return ns["GeneratedParser"]
36
37
38def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:
39    # Run a parser on a file (stream).
40    tokenizer = Tokenizer(tokenize.generate_tokens(file.readline))  # type: ignore[arg-type] # typeshed issue #3515
41    parser = parser_class(tokenizer, verbose=verbose)
42    result = parser.start()
43    if result is None:
44        raise parser.make_syntax_error("invalid syntax")
45    return result
46
47
48def parse_string(
49    source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False
50) -> Any:
51    # Run the parser on a string.
52    if dedent:
53        source = textwrap.dedent(source)
54    file = io.StringIO(source)
55    return run_parser(file, parser_class, verbose=verbose)  # type: ignore[arg-type] # typeshed issue #3515
56
57
58def make_parser(source: str) -> Type[Parser]:
59    # Combine parse_string() and generate_parser().
60    grammar = parse_string(source, GrammarParser)
61    return generate_parser(grammar)
62
63
64def import_file(full_name: str, path: str) -> Any:
65    """Import a python module from a path"""
66
67    spec = importlib.util.spec_from_file_location(full_name, path)
68    assert spec is not None
69    mod = importlib.util.module_from_spec(spec)
70
71    # We assume this is not None and has an exec_module() method.
72    # See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading
73    loader = cast(Any, spec.loader)
74    loader.exec_module(mod)
75    return mod
76
77
78def generate_c_parser_source(grammar: Grammar) -> str:
79    out = io.StringIO()
80    genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out)
81    genr.generate("<string>")
82    return out.getvalue()
83
84
85def generate_parser_c_extension(
86    grammar: Grammar,
87    path: pathlib.PurePath,
88    debug: bool = False,
89    library_dir: Optional[str] = None,
90) -> Any:
91    """Generate a parser c extension for the given grammar in the given path
92
93    Returns a module object with a parse_string() method.
94    TODO: express that using a Protocol.
95    """
96    # Make sure that the working directory is empty: reusing non-empty temporary
97    # directories when generating extensions can lead to segmentation faults.
98    # Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more
99    # context.
100    assert not os.listdir(path)
101    source = path / "parse.c"
102    with open(source, "w", encoding="utf-8") as file:
103        genr = CParserGenerator(
104            grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug
105        )
106        genr.generate("parse.c")
107    compile_c_extension(
108        str(source),
109        build_dir=str(path),
110        # Significant test_peg_generator speedups
111        disable_optimization=True,
112        library_dir=library_dir,
113    )
114
115
116def print_memstats() -> bool:
117    MiB: Final = 2**20
118    try:
119        import psutil
120    except ImportError:
121        return False
122    print("Memory stats:")
123    process = psutil.Process()
124    meminfo = process.memory_info()
125    res = {}
126    res["rss"] = meminfo.rss / MiB
127    res["vms"] = meminfo.vms / MiB
128    if sys.platform == "win32":
129        res["maxrss"] = meminfo.peak_wset / MiB
130    else:
131        # See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process
132        import resource  # Since it doesn't exist on Windows.
133
134        rusage = resource.getrusage(resource.RUSAGE_SELF)
135        if sys.platform == "darwin":
136            factor = 1
137        else:
138            factor = 1024  # Linux
139        res["maxrss"] = rusage.ru_maxrss * factor / MiB
140    for key, value in res.items():
141        print(f"  {key:12.12s}: {value:10.0f} MiB")
142    return True
143