• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import contextlib
2import re
3from dataclasses import dataclass
4from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union
5
6from .specifiers import Specifier
7
8
9@dataclass
10class Token:
11    name: str
12    text: str
13    position: int
14
15
16class ParserSyntaxError(Exception):
17    """The provided source text could not be parsed correctly."""
18
19    def __init__(
20        self,
21        message: str,
22        *,
23        source: str,
24        span: Tuple[int, int],
25    ) -> None:
26        self.span = span
27        self.message = message
28        self.source = source
29
30        super().__init__()
31
32    def __str__(self) -> str:
33        marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^"
34        return "\n    ".join([self.message, self.source, marker])
35
36
37DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = {
38    "LEFT_PARENTHESIS": r"\(",
39    "RIGHT_PARENTHESIS": r"\)",
40    "LEFT_BRACKET": r"\[",
41    "RIGHT_BRACKET": r"\]",
42    "SEMICOLON": r";",
43    "COMMA": r",",
44    "QUOTED_STRING": re.compile(
45        r"""
46            (
47                ('[^']*')
48                |
49                ("[^"]*")
50            )
51        """,
52        re.VERBOSE,
53    ),
54    "OP": r"(===|==|~=|!=|<=|>=|<|>)",
55    "BOOLOP": r"\b(or|and)\b",
56    "IN": r"\bin\b",
57    "NOT": r"\bnot\b",
58    "VARIABLE": re.compile(
59        r"""
60            \b(
61                python_version
62                |python_full_version
63                |os[._]name
64                |sys[._]platform
65                |platform_(release|system)
66                |platform[._](version|machine|python_implementation)
67                |python_implementation
68                |implementation_(name|version)
69                |extra
70            )\b
71        """,
72        re.VERBOSE,
73    ),
74    "SPECIFIER": re.compile(
75        Specifier._operator_regex_str + Specifier._version_regex_str,
76        re.VERBOSE | re.IGNORECASE,
77    ),
78    "AT": r"\@",
79    "URL": r"[^ \t]+",
80    "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
81    "VERSION_PREFIX_TRAIL": r"\.\*",
82    "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
83    "WS": r"[ \t]+",
84    "END": r"$",
85}
86
87
88class Tokenizer:
89    """Context-sensitive token parsing.
90
91    Provides methods to examine the input stream to check whether the next token
92    matches.
93    """
94
95    def __init__(
96        self,
97        source: str,
98        *,
99        rules: "Dict[str, Union[str, re.Pattern[str]]]",
100    ) -> None:
101        self.source = source
102        self.rules: Dict[str, re.Pattern[str]] = {
103            name: re.compile(pattern) for name, pattern in rules.items()
104        }
105        self.next_token: Optional[Token] = None
106        self.position = 0
107
108    def consume(self, name: str) -> None:
109        """Move beyond provided token name, if at current position."""
110        if self.check(name):
111            self.read()
112
113    def check(self, name: str, *, peek: bool = False) -> bool:
114        """Check whether the next token has the provided name.
115
116        By default, if the check succeeds, the token *must* be read before
117        another check. If `peek` is set to `True`, the token is not loaded and
118        would need to be checked again.
119        """
120        assert (
121            self.next_token is None
122        ), f"Cannot check for {name!r}, already have {self.next_token!r}"
123        assert name in self.rules, f"Unknown token name: {name!r}"
124
125        expression = self.rules[name]
126
127        match = expression.match(self.source, self.position)
128        if match is None:
129            return False
130        if not peek:
131            self.next_token = Token(name, match[0], self.position)
132        return True
133
134    def expect(self, name: str, *, expected: str) -> Token:
135        """Expect a certain token name next, failing with a syntax error otherwise.
136
137        The token is *not* read.
138        """
139        if not self.check(name):
140            raise self.raise_syntax_error(f"Expected {expected}")
141        return self.read()
142
143    def read(self) -> Token:
144        """Consume the next token and return it."""
145        token = self.next_token
146        assert token is not None
147
148        self.position += len(token.text)
149        self.next_token = None
150
151        return token
152
153    def raise_syntax_error(
154        self,
155        message: str,
156        *,
157        span_start: Optional[int] = None,
158        span_end: Optional[int] = None,
159    ) -> NoReturn:
160        """Raise ParserSyntaxError at the given position."""
161        span = (
162            self.position if span_start is None else span_start,
163            self.position if span_end is None else span_end,
164        )
165        raise ParserSyntaxError(
166            message,
167            source=self.source,
168            span=span,
169        )
170
171    @contextlib.contextmanager
172    def enclosing_tokens(
173        self, open_token: str, close_token: str, *, around: str
174    ) -> Iterator[None]:
175        if self.check(open_token):
176            open_position = self.position
177            self.read()
178        else:
179            open_position = None
180
181        yield
182
183        if open_position is None:
184            return
185
186        if not self.check(close_token):
187            self.raise_syntax_error(
188                f"Expected matching {close_token} for {open_token}, after {around}",
189                span_start=open_position,
190            )
191
192        self.read()
193