• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1from __future__ import annotations
2import collections
3import dataclasses as dc
4import re
5import shlex
6from typing import Any
7
8import libclinic
9from libclinic import fail, ClinicError
10from libclinic.language import Language
11from libclinic.function import (
12    Module, Class, Function)
13
14
15@dc.dataclass(slots=True, repr=False)
16class Block:
17    r"""
18    Represents a single block of text embedded in
19    another file.  If dsl_name is None, the block represents
20    verbatim text, raw original text from the file, in
21    which case "input" will be the only non-false member.
22    If dsl_name is not None, the block represents a Clinic
23    block.
24
25    input is always str, with embedded \n characters.
26    input represents the original text from the file;
27    if it's a Clinic block, it is the original text with
28    the body_prefix and redundant leading whitespace removed.
29
30    dsl_name is either str or None.  If str, it's the text
31    found on the start line of the block between the square
32    brackets.
33
34    signatures is a list.
35    It may only contain clinic.Module, clinic.Class, and
36    clinic.Function objects.  At the moment it should
37    contain at most one of each.
38
39    output is either str or None.  If str, it's the output
40    from this block, with embedded '\n' characters.
41
42    indent is a str.  It's the leading whitespace
43    that was found on every line of input.  (If body_prefix is
44    not empty, this is the indent *after* removing the
45    body_prefix.)
46
47    "indent" is different from the concept of "preindent"
48    (which is not stored as state on Block objects).
49    "preindent" is the whitespace that
50    was found in front of every line of input *before* the
51    "body_prefix" (see the Language object).  If body_prefix
52    is empty, preindent must always be empty too.
53
54    To illustrate the difference between "indent" and "preindent":
55
56    Assume that '_' represents whitespace.
57    If the block processed was in a Python file, and looked like this:
58      ____#/*[python]
59      ____#__for a in range(20):
60      ____#____print(a)
61      ____#[python]*/
62    "preindent" would be "____" and "indent" would be "__".
63
64    """
65    input: str
66    dsl_name: str | None = None
67    signatures: list[Module | Class | Function] = dc.field(default_factory=list)
68    output: Any = None  # TODO: Very dynamic; probably untypeable in its current form?
69    indent: str = ''
70
71    def __repr__(self) -> str:
72        dsl_name = self.dsl_name or "text"
73        def summarize(s: object) -> str:
74            s = repr(s)
75            if len(s) > 30:
76                return s[:26] + "..." + s[0]
77            return s
78        parts = (
79            repr(dsl_name),
80            f"input={summarize(self.input)}",
81            f"output={summarize(self.output)}"
82        )
83        return f"<clinic.Block {' '.join(parts)}>"
84
85
86class BlockParser:
87    """
88    Block-oriented parser for Argument Clinic.
89    Iterator, yields Block objects.
90    """
91
92    def __init__(
93            self,
94            input: str,
95            language: Language,
96            *,
97            verify: bool = True
98    ) -> None:
99        """
100        "input" should be a str object
101        with embedded \n characters.
102
103        "language" should be a Language object.
104        """
105        language.validate()
106
107        self.input = collections.deque(reversed(input.splitlines(keepends=True)))
108        self.block_start_line_number = self.line_number = 0
109
110        self.language = language
111        before, _, after = language.start_line.partition('{dsl_name}')
112        assert _ == '{dsl_name}'
113        self.find_start_re = libclinic.create_regex(before, after,
114                                                    whole_line=False)
115        self.start_re = libclinic.create_regex(before, after)
116        self.verify = verify
117        self.last_checksum_re: re.Pattern[str] | None = None
118        self.last_dsl_name: str | None = None
119        self.dsl_name: str | None = None
120        self.first_block = True
121
122    def __iter__(self) -> BlockParser:
123        return self
124
125    def __next__(self) -> Block:
126        while True:
127            if not self.input:
128                raise StopIteration
129
130            if self.dsl_name:
131                try:
132                    return_value = self.parse_clinic_block(self.dsl_name)
133                except ClinicError as exc:
134                    exc.filename = self.language.filename
135                    exc.lineno = self.line_number
136                    raise
137                self.dsl_name = None
138                self.first_block = False
139                return return_value
140            block = self.parse_verbatim_block()
141            if self.first_block and not block.input:
142                continue
143            self.first_block = False
144            return block
145
146
147    def is_start_line(self, line: str) -> str | None:
148        match = self.start_re.match(line.lstrip())
149        return match.group(1) if match else None
150
151    def _line(self, lookahead: bool = False) -> str:
152        self.line_number += 1
153        line = self.input.pop()
154        if not lookahead:
155            self.language.parse_line(line)
156        return line
157
158    def parse_verbatim_block(self) -> Block:
159        lines = []
160        self.block_start_line_number = self.line_number
161
162        while self.input:
163            line = self._line()
164            dsl_name = self.is_start_line(line)
165            if dsl_name:
166                self.dsl_name = dsl_name
167                break
168            lines.append(line)
169
170        return Block("".join(lines))
171
172    def parse_clinic_block(self, dsl_name: str) -> Block:
173        in_lines = []
174        self.block_start_line_number = self.line_number + 1
175        stop_line = self.language.stop_line.format(dsl_name=dsl_name)
176        body_prefix = self.language.body_prefix.format(dsl_name=dsl_name)
177
178        def is_stop_line(line: str) -> bool:
179            # make sure to recognize stop line even if it
180            # doesn't end with EOL (it could be the very end of the file)
181            if line.startswith(stop_line):
182                remainder = line.removeprefix(stop_line)
183                if remainder and not remainder.isspace():
184                    fail(f"Garbage after stop line: {remainder!r}")
185                return True
186            else:
187                # gh-92256: don't allow incorrectly formatted stop lines
188                if line.lstrip().startswith(stop_line):
189                    fail(f"Whitespace is not allowed before the stop line: {line!r}")
190                return False
191
192        # consume body of program
193        while self.input:
194            line = self._line()
195            if is_stop_line(line) or self.is_start_line(line):
196                break
197            if body_prefix:
198                line = line.lstrip()
199                assert line.startswith(body_prefix)
200                line = line.removeprefix(body_prefix)
201            in_lines.append(line)
202
203        # consume output and checksum line, if present.
204        if self.last_dsl_name == dsl_name:
205            checksum_re = self.last_checksum_re
206        else:
207            before, _, after = self.language.checksum_line.format(dsl_name=dsl_name, arguments='{arguments}').partition('{arguments}')
208            assert _ == '{arguments}'
209            checksum_re = libclinic.create_regex(before, after, word=False)
210            self.last_dsl_name = dsl_name
211            self.last_checksum_re = checksum_re
212        assert checksum_re is not None
213
214        # scan forward for checksum line
215        out_lines = []
216        arguments = None
217        while self.input:
218            line = self._line(lookahead=True)
219            match = checksum_re.match(line.lstrip())
220            arguments = match.group(1) if match else None
221            if arguments:
222                break
223            out_lines.append(line)
224            if self.is_start_line(line):
225                break
226
227        output: str | None
228        output = "".join(out_lines)
229        if arguments:
230            d = {}
231            for field in shlex.split(arguments):
232                name, equals, value = field.partition('=')
233                if not equals:
234                    fail(f"Mangled Argument Clinic marker line: {line!r}")
235                d[name.strip()] = value.strip()
236
237            if self.verify:
238                if 'input' in d:
239                    checksum = d['output']
240                else:
241                    checksum = d['checksum']
242
243                computed = libclinic.compute_checksum(output, len(checksum))
244                if checksum != computed:
245                    fail("Checksum mismatch! "
246                         f"Expected {checksum!r}, computed {computed!r}. "
247                         "Suggested fix: remove all generated code including "
248                         "the end marker, or use the '-f' option.")
249        else:
250            # put back output
251            output_lines = output.splitlines(keepends=True)
252            self.line_number -= len(output_lines)
253            self.input.extend(reversed(output_lines))
254            output = None
255
256        return Block("".join(in_lines), dsl_name, output=output)
257