1import contextlib 2import re 3from dataclasses import dataclass 4from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union 5 6from .specifiers import Specifier 7 8 9@dataclass 10class Token: 11 name: str 12 text: str 13 position: int 14 15 16class ParserSyntaxError(Exception): 17 """The provided source text could not be parsed correctly.""" 18 19 def __init__( 20 self, 21 message: str, 22 *, 23 source: str, 24 span: Tuple[int, int], 25 ) -> None: 26 self.span = span 27 self.message = message 28 self.source = source 29 30 super().__init__() 31 32 def __str__(self) -> str: 33 marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" 34 return "\n ".join([self.message, self.source, marker]) 35 36 37DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = { 38 "LEFT_PARENTHESIS": r"\(", 39 "RIGHT_PARENTHESIS": r"\)", 40 "LEFT_BRACKET": r"\[", 41 "RIGHT_BRACKET": r"\]", 42 "SEMICOLON": r";", 43 "COMMA": r",", 44 "QUOTED_STRING": re.compile( 45 r""" 46 ( 47 ('[^']*') 48 | 49 ("[^"]*") 50 ) 51 """, 52 re.VERBOSE, 53 ), 54 "OP": r"(===|==|~=|!=|<=|>=|<|>)", 55 "BOOLOP": r"\b(or|and)\b", 56 "IN": r"\bin\b", 57 "NOT": r"\bnot\b", 58 "VARIABLE": re.compile( 59 r""" 60 \b( 61 python_version 62 |python_full_version 63 |os[._]name 64 |sys[._]platform 65 |platform_(release|system) 66 |platform[._](version|machine|python_implementation) 67 |python_implementation 68 |implementation_(name|version) 69 |extra 70 )\b 71 """, 72 re.VERBOSE, 73 ), 74 "SPECIFIER": re.compile( 75 Specifier._operator_regex_str + Specifier._version_regex_str, 76 re.VERBOSE | re.IGNORECASE, 77 ), 78 "AT": r"\@", 79 "URL": r"[^ \t]+", 80 "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b", 81 "VERSION_PREFIX_TRAIL": r"\.\*", 82 "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*", 83 "WS": r"[ \t]+", 84 "END": r"$", 85} 86 87 88class Tokenizer: 89 """Context-sensitive token parsing. 90 91 Provides methods to examine the input stream to check whether the next token 92 matches. 93 """ 94 95 def __init__( 96 self, 97 source: str, 98 *, 99 rules: "Dict[str, Union[str, re.Pattern[str]]]", 100 ) -> None: 101 self.source = source 102 self.rules: Dict[str, re.Pattern[str]] = { 103 name: re.compile(pattern) for name, pattern in rules.items() 104 } 105 self.next_token: Optional[Token] = None 106 self.position = 0 107 108 def consume(self, name: str) -> None: 109 """Move beyond provided token name, if at current position.""" 110 if self.check(name): 111 self.read() 112 113 def check(self, name: str, *, peek: bool = False) -> bool: 114 """Check whether the next token has the provided name. 115 116 By default, if the check succeeds, the token *must* be read before 117 another check. If `peek` is set to `True`, the token is not loaded and 118 would need to be checked again. 119 """ 120 assert ( 121 self.next_token is None 122 ), f"Cannot check for {name!r}, already have {self.next_token!r}" 123 assert name in self.rules, f"Unknown token name: {name!r}" 124 125 expression = self.rules[name] 126 127 match = expression.match(self.source, self.position) 128 if match is None: 129 return False 130 if not peek: 131 self.next_token = Token(name, match[0], self.position) 132 return True 133 134 def expect(self, name: str, *, expected: str) -> Token: 135 """Expect a certain token name next, failing with a syntax error otherwise. 136 137 The token is *not* read. 138 """ 139 if not self.check(name): 140 raise self.raise_syntax_error(f"Expected {expected}") 141 return self.read() 142 143 def read(self) -> Token: 144 """Consume the next token and return it.""" 145 token = self.next_token 146 assert token is not None 147 148 self.position += len(token.text) 149 self.next_token = None 150 151 return token 152 153 def raise_syntax_error( 154 self, 155 message: str, 156 *, 157 span_start: Optional[int] = None, 158 span_end: Optional[int] = None, 159 ) -> NoReturn: 160 """Raise ParserSyntaxError at the given position.""" 161 span = ( 162 self.position if span_start is None else span_start, 163 self.position if span_end is None else span_end, 164 ) 165 raise ParserSyntaxError( 166 message, 167 source=self.source, 168 span=span, 169 ) 170 171 @contextlib.contextmanager 172 def enclosing_tokens( 173 self, open_token: str, close_token: str, *, around: str 174 ) -> Iterator[None]: 175 if self.check(open_token): 176 open_position = self.position 177 self.read() 178 else: 179 open_position = None 180 181 yield 182 183 if open_position is None: 184 return 185 186 if not self.check(close_token): 187 self.raise_syntax_error( 188 f"Expected matching {close_token} for {open_token}, after {around}", 189 span_start=open_position, 190 ) 191 192 self.read() 193