1from fontTools.feaLib.error import FeatureLibError, IncludedFeaNotFound 2from fontTools.feaLib.location import FeatureLibLocation 3import re 4import os 5 6 7class Lexer(object): 8 NUMBER = "NUMBER" 9 HEXADECIMAL = "HEXADECIMAL" 10 OCTAL = "OCTAL" 11 NUMBERS = (NUMBER, HEXADECIMAL, OCTAL) 12 FLOAT = "FLOAT" 13 STRING = "STRING" 14 NAME = "NAME" 15 FILENAME = "FILENAME" 16 GLYPHCLASS = "GLYPHCLASS" 17 CID = "CID" 18 SYMBOL = "SYMBOL" 19 COMMENT = "COMMENT" 20 NEWLINE = "NEWLINE" 21 ANONYMOUS_BLOCK = "ANONYMOUS_BLOCK" 22 23 CHAR_WHITESPACE_ = " \t" 24 CHAR_NEWLINE_ = "\r\n" 25 CHAR_SYMBOL_ = ",;:-+'{}[]<>()=" 26 CHAR_DIGIT_ = "0123456789" 27 CHAR_HEXDIGIT_ = "0123456789ABCDEFabcdef" 28 CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" 29 CHAR_NAME_START_ = CHAR_LETTER_ + "_+*:.^~!\\" 30 CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/-" 31 32 RE_GLYPHCLASS = re.compile(r"^[A-Za-z_0-9.\-]+$") 33 34 MODE_NORMAL_ = "NORMAL" 35 MODE_FILENAME_ = "FILENAME" 36 37 def __init__(self, text, filename): 38 self.filename_ = filename 39 self.line_ = 1 40 self.pos_ = 0 41 self.line_start_ = 0 42 self.text_ = text 43 self.text_length_ = len(text) 44 self.mode_ = Lexer.MODE_NORMAL_ 45 46 def __iter__(self): 47 return self 48 49 def next(self): # Python 2 50 return self.__next__() 51 52 def __next__(self): # Python 3 53 while True: 54 token_type, token, location = self.next_() 55 if token_type != Lexer.NEWLINE: 56 return (token_type, token, location) 57 58 def location_(self): 59 column = self.pos_ - self.line_start_ + 1 60 return FeatureLibLocation(self.filename_ or "<features>", self.line_, column) 61 62 def next_(self): 63 self.scan_over_(Lexer.CHAR_WHITESPACE_) 64 location = self.location_() 65 start = self.pos_ 66 text = self.text_ 67 limit = len(text) 68 if start >= limit: 69 raise StopIteration() 70 cur_char = text[start] 71 next_char = text[start + 1] if start + 1 < limit else None 72 73 if cur_char == "\n": 74 self.pos_ += 1 75 self.line_ += 1 76 self.line_start_ = self.pos_ 77 return (Lexer.NEWLINE, None, location) 78 if cur_char == "\r": 79 self.pos_ += 2 if next_char == "\n" else 1 80 self.line_ += 1 81 self.line_start_ = self.pos_ 82 return (Lexer.NEWLINE, None, location) 83 if cur_char == "#": 84 self.scan_until_(Lexer.CHAR_NEWLINE_) 85 return (Lexer.COMMENT, text[start : self.pos_], location) 86 87 if self.mode_ is Lexer.MODE_FILENAME_: 88 if cur_char != "(": 89 raise FeatureLibError("Expected '(' before file name", location) 90 self.scan_until_(")") 91 cur_char = text[self.pos_] if self.pos_ < limit else None 92 if cur_char != ")": 93 raise FeatureLibError("Expected ')' after file name", location) 94 self.pos_ += 1 95 self.mode_ = Lexer.MODE_NORMAL_ 96 return (Lexer.FILENAME, text[start + 1 : self.pos_ - 1], location) 97 98 if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_: 99 self.pos_ += 1 100 self.scan_over_(Lexer.CHAR_DIGIT_) 101 return (Lexer.CID, int(text[start + 1 : self.pos_], 10), location) 102 if cur_char == "@": 103 self.pos_ += 1 104 self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_) 105 glyphclass = text[start + 1 : self.pos_] 106 if len(glyphclass) < 1: 107 raise FeatureLibError("Expected glyph class name", location) 108 if len(glyphclass) > 63: 109 raise FeatureLibError( 110 "Glyph class names must not be longer than 63 characters", location 111 ) 112 if not Lexer.RE_GLYPHCLASS.match(glyphclass): 113 raise FeatureLibError( 114 "Glyph class names must consist of letters, digits, " 115 "underscore, period or hyphen", 116 location, 117 ) 118 return (Lexer.GLYPHCLASS, glyphclass, location) 119 if cur_char in Lexer.CHAR_NAME_START_: 120 self.pos_ += 1 121 self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_) 122 token = text[start : self.pos_] 123 if token == "include": 124 self.mode_ = Lexer.MODE_FILENAME_ 125 return (Lexer.NAME, token, location) 126 if cur_char == "0" and next_char in "xX": 127 self.pos_ += 2 128 self.scan_over_(Lexer.CHAR_HEXDIGIT_) 129 return (Lexer.HEXADECIMAL, int(text[start : self.pos_], 16), location) 130 if cur_char == "0" and next_char in Lexer.CHAR_DIGIT_: 131 self.scan_over_(Lexer.CHAR_DIGIT_) 132 return (Lexer.OCTAL, int(text[start : self.pos_], 8), location) 133 if cur_char in Lexer.CHAR_DIGIT_: 134 self.scan_over_(Lexer.CHAR_DIGIT_) 135 if self.pos_ >= limit or text[self.pos_] != ".": 136 return (Lexer.NUMBER, int(text[start : self.pos_], 10), location) 137 self.scan_over_(".") 138 self.scan_over_(Lexer.CHAR_DIGIT_) 139 return (Lexer.FLOAT, float(text[start : self.pos_]), location) 140 if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_: 141 self.pos_ += 1 142 self.scan_over_(Lexer.CHAR_DIGIT_) 143 if self.pos_ >= limit or text[self.pos_] != ".": 144 return (Lexer.NUMBER, int(text[start : self.pos_], 10), location) 145 self.scan_over_(".") 146 self.scan_over_(Lexer.CHAR_DIGIT_) 147 return (Lexer.FLOAT, float(text[start : self.pos_]), location) 148 if cur_char in Lexer.CHAR_SYMBOL_: 149 self.pos_ += 1 150 return (Lexer.SYMBOL, cur_char, location) 151 if cur_char == '"': 152 self.pos_ += 1 153 self.scan_until_('"') 154 if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"': 155 self.pos_ += 1 156 # strip newlines embedded within a string 157 string = re.sub("[\r\n]", "", text[start + 1 : self.pos_ - 1]) 158 return (Lexer.STRING, string, location) 159 else: 160 raise FeatureLibError("Expected '\"' to terminate string", location) 161 raise FeatureLibError("Unexpected character: %r" % cur_char, location) 162 163 def scan_over_(self, valid): 164 p = self.pos_ 165 while p < self.text_length_ and self.text_[p] in valid: 166 p += 1 167 self.pos_ = p 168 169 def scan_until_(self, stop_at): 170 p = self.pos_ 171 while p < self.text_length_ and self.text_[p] not in stop_at: 172 p += 1 173 self.pos_ = p 174 175 def scan_anonymous_block(self, tag): 176 location = self.location_() 177 tag = tag.strip() 178 self.scan_until_(Lexer.CHAR_NEWLINE_) 179 self.scan_over_(Lexer.CHAR_NEWLINE_) 180 regexp = r"}\s*" + tag + r"\s*;" 181 split = re.split(regexp, self.text_[self.pos_ :], maxsplit=1) 182 if len(split) != 2: 183 raise FeatureLibError( 184 "Expected '} %s;' to terminate anonymous block" % tag, location 185 ) 186 self.pos_ += len(split[0]) 187 return (Lexer.ANONYMOUS_BLOCK, split[0], location) 188 189 190class IncludingLexer(object): 191 """A Lexer that follows include statements. 192 193 The OpenType feature file specification states that due to 194 historical reasons, relative imports should be resolved in this 195 order: 196 197 1. If the source font is UFO format, then relative to the UFO's 198 font directory 199 2. relative to the top-level include file 200 3. relative to the parent include file 201 202 We only support 1 (via includeDir) and 2. 203 """ 204 205 def __init__(self, featurefile, *, includeDir=None): 206 """Initializes an IncludingLexer. 207 208 Behavior: 209 If includeDir is passed, it will be used to determine the top-level 210 include directory to use for all encountered include statements. If it is 211 not passed, ``os.path.dirname(featurefile)`` will be considered the 212 include directory. 213 """ 214 215 self.lexers_ = [self.make_lexer_(featurefile)] 216 self.featurefilepath = self.lexers_[0].filename_ 217 self.includeDir = includeDir 218 219 def __iter__(self): 220 return self 221 222 def next(self): # Python 2 223 return self.__next__() 224 225 def __next__(self): # Python 3 226 while self.lexers_: 227 lexer = self.lexers_[-1] 228 try: 229 token_type, token, location = next(lexer) 230 except StopIteration: 231 self.lexers_.pop() 232 continue 233 if token_type is Lexer.NAME and token == "include": 234 fname_type, fname_token, fname_location = lexer.next() 235 if fname_type is not Lexer.FILENAME: 236 raise FeatureLibError("Expected file name", fname_location) 237 # semi_type, semi_token, semi_location = lexer.next() 238 # if semi_type is not Lexer.SYMBOL or semi_token != ";": 239 # raise FeatureLibError("Expected ';'", semi_location) 240 if os.path.isabs(fname_token): 241 path = fname_token 242 else: 243 if self.includeDir is not None: 244 curpath = self.includeDir 245 elif self.featurefilepath is not None: 246 curpath = os.path.dirname(self.featurefilepath) 247 else: 248 # if the IncludingLexer was initialized from an in-memory 249 # file-like stream, it doesn't have a 'name' pointing to 250 # its filesystem path, therefore we fall back to using the 251 # current working directory to resolve relative includes 252 curpath = os.getcwd() 253 path = os.path.join(curpath, fname_token) 254 if len(self.lexers_) >= 5: 255 raise FeatureLibError("Too many recursive includes", fname_location) 256 try: 257 self.lexers_.append(self.make_lexer_(path)) 258 except FileNotFoundError as err: 259 raise IncludedFeaNotFound(fname_token, fname_location) from err 260 else: 261 return (token_type, token, location) 262 raise StopIteration() 263 264 @staticmethod 265 def make_lexer_(file_or_path): 266 if hasattr(file_or_path, "read"): 267 fileobj, closing = file_or_path, False 268 else: 269 filename, closing = file_or_path, True 270 fileobj = open(filename, "r", encoding="utf-8") 271 data = fileobj.read() 272 filename = getattr(fileobj, "name", None) 273 if closing: 274 fileobj.close() 275 return Lexer(data, filename) 276 277 def scan_anonymous_block(self, tag): 278 return self.lexers_[-1].scan_anonymous_block(tag) 279 280 281class NonIncludingLexer(IncludingLexer): 282 """Lexer that does not follow `include` statements, emits them as-is.""" 283 284 def __next__(self): # Python 3 285 return next(self.lexers_[0]) 286