1from fontTools.voltLib.error import VoltLibError 2 3class Lexer(object): 4 NUMBER = "NUMBER" 5 STRING = "STRING" 6 NAME = "NAME" 7 NEWLINE = "NEWLINE" 8 9 CHAR_WHITESPACE_ = " \t" 10 CHAR_NEWLINE_ = "\r\n" 11 CHAR_DIGIT_ = "0123456789" 12 CHAR_UC_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 13 CHAR_LC_LETTER_ = "abcdefghijklmnopqrstuvwxyz" 14 CHAR_UNDERSCORE_ = "_" 15 CHAR_PERIOD_ = "." 16 CHAR_NAME_START_ = CHAR_UC_LETTER_ + CHAR_LC_LETTER_ + CHAR_PERIOD_ + \ 17 CHAR_UNDERSCORE_ 18 CHAR_NAME_CONTINUATION_ = CHAR_NAME_START_ + CHAR_DIGIT_ 19 20 def __init__(self, text, filename): 21 self.filename_ = filename 22 self.line_ = 1 23 self.pos_ = 0 24 self.line_start_ = 0 25 self.text_ = text 26 self.text_length_ = len(text) 27 28 def __iter__(self): 29 return self 30 31 def next(self): # Python 2 32 return self.__next__() 33 34 def __next__(self): # Python 3 35 while True: 36 token_type, token, location = self.next_() 37 if token_type not in {Lexer.NEWLINE}: 38 return (token_type, token, location) 39 40 def location_(self): 41 column = self.pos_ - self.line_start_ + 1 42 return (self.filename_ or "<volt>", self.line_, column) 43 44 def next_(self): 45 self.scan_over_(Lexer.CHAR_WHITESPACE_) 46 location = self.location_() 47 start = self.pos_ 48 text = self.text_ 49 limit = len(text) 50 if start >= limit: 51 raise StopIteration() 52 cur_char = text[start] 53 next_char = text[start + 1] if start + 1 < limit else None 54 55 if cur_char == "\n": 56 self.pos_ += 1 57 self.line_ += 1 58 self.line_start_ = self.pos_ 59 return (Lexer.NEWLINE, None, location) 60 if cur_char == "\r": 61 self.pos_ += (2 if next_char == "\n" else 1) 62 self.line_ += 1 63 self.line_start_ = self.pos_ 64 return (Lexer.NEWLINE, None, location) 65 if cur_char == '"': 66 self.pos_ += 1 67 self.scan_until_('"\r\n') 68 if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"': 69 self.pos_ += 1 70 return (Lexer.STRING, text[start + 1:self.pos_ - 1], location) 71 else: 72 raise VoltLibError("Expected '\"' to terminate string", 73 location) 74 if cur_char in Lexer.CHAR_NAME_START_: 75 self.pos_ += 1 76 self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_) 77 token = text[start:self.pos_] 78 return (Lexer.NAME, token, location) 79 if cur_char in Lexer.CHAR_DIGIT_: 80 self.scan_over_(Lexer.CHAR_DIGIT_) 81 return (Lexer.NUMBER, int(text[start:self.pos_], 10), location) 82 if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_: 83 self.pos_ += 1 84 self.scan_over_(Lexer.CHAR_DIGIT_) 85 return (Lexer.NUMBER, int(text[start:self.pos_], 10), location) 86 raise VoltLibError("Unexpected character: '%s'" % cur_char, 87 location) 88 89 def scan_over_(self, valid): 90 p = self.pos_ 91 while p < self.text_length_ and self.text_[p] in valid: 92 p += 1 93 self.pos_ = p 94 95 def scan_until_(self, stop_at): 96 p = self.pos_ 97 while p < self.text_length_ and self.text_[p] not in stop_at: 98 p += 1 99 self.pos_ = p 100