1import token 2import tokenize 3from typing import List, Iterator 4 5Mark = int # NewType('Mark', int) 6 7exact_token_types = token.EXACT_TOKEN_TYPES # type: ignore 8 9 10def shorttok(tok: tokenize.TokenInfo) -> str: 11 return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}" 12 13 14class Tokenizer: 15 """Caching wrapper for the tokenize module. 16 17 This is pretty tied to Python's syntax. 18 """ 19 20 _tokens: List[tokenize.TokenInfo] 21 22 def __init__(self, tokengen: Iterator[tokenize.TokenInfo], *, verbose: bool = False): 23 self._tokengen = tokengen 24 self._tokens = [] 25 self._index = 0 26 self._verbose = verbose 27 if verbose: 28 self.report(False, False) 29 30 def getnext(self) -> tokenize.TokenInfo: 31 """Return the next token and updates the index.""" 32 cached = True 33 while self._index == len(self._tokens): 34 tok = next(self._tokengen) 35 if tok.type in (tokenize.NL, tokenize.COMMENT): 36 continue 37 if tok.type == token.ERRORTOKEN and tok.string.isspace(): 38 continue 39 self._tokens.append(tok) 40 cached = False 41 tok = self._tokens[self._index] 42 self._index += 1 43 if self._verbose: 44 self.report(cached, False) 45 return tok 46 47 def peek(self) -> tokenize.TokenInfo: 48 """Return the next token *without* updating the index.""" 49 while self._index == len(self._tokens): 50 tok = next(self._tokengen) 51 if tok.type in (tokenize.NL, tokenize.COMMENT): 52 continue 53 if tok.type == token.ERRORTOKEN and tok.string.isspace(): 54 continue 55 self._tokens.append(tok) 56 return self._tokens[self._index] 57 58 def diagnose(self) -> tokenize.TokenInfo: 59 if not self._tokens: 60 self.getnext() 61 return self._tokens[-1] 62 63 def mark(self) -> Mark: 64 return self._index 65 66 def reset(self, index: Mark) -> None: 67 if index == self._index: 68 return 69 assert 0 <= index <= len(self._tokens), (index, len(self._tokens)) 70 old_index = self._index 71 self._index = index 72 if self._verbose: 73 self.report(True, index < old_index) 74 75 def report(self, cached: bool, back: bool) -> None: 76 if back: 77 fill = "-" * self._index + "-" 78 elif cached: 79 fill = "-" * self._index + ">" 80 else: 81 fill = "-" * self._index + "*" 82 if self._index == 0: 83 print(f"{fill} (Bof)") 84 else: 85 tok = self._tokens[self._index - 1] 86 print(f"{fill} {shorttok(tok)}") 87