1import fontTools.voltLib.ast as ast 2from fontTools.voltLib.lexer import Lexer 3from fontTools.voltLib.error import VoltLibError 4from io import open 5 6PARSE_FUNCS = { 7 "DEF_GLYPH": "parse_def_glyph_", 8 "DEF_GROUP": "parse_def_group_", 9 "DEF_SCRIPT": "parse_def_script_", 10 "DEF_LOOKUP": "parse_def_lookup_", 11 "DEF_ANCHOR": "parse_def_anchor_", 12 "GRID_PPEM": "parse_ppem_", 13 "PRESENTATION_PPEM": "parse_ppem_", 14 "PPOSITIONING_PPEM": "parse_ppem_", 15 "COMPILER_USEEXTENSIONLOOKUPS": "parse_noarg_option_", 16 "COMPILER_USEPAIRPOSFORMAT2": "parse_noarg_option_", 17 "CMAP_FORMAT": "parse_cmap_format", 18 "DO_NOT_TOUCH_CMAP": "parse_noarg_option_", 19} 20 21 22class Parser(object): 23 def __init__(self, path): 24 self.doc_ = ast.VoltFile() 25 self.glyphs_ = OrderedSymbolTable() 26 self.groups_ = SymbolTable() 27 self.anchors_ = {} # dictionary of SymbolTable() keyed by glyph 28 self.scripts_ = SymbolTable() 29 self.langs_ = SymbolTable() 30 self.lookups_ = SymbolTable() 31 self.next_token_type_, self.next_token_ = (None, None) 32 self.next_token_location_ = None 33 self.make_lexer_(path) 34 self.advance_lexer_() 35 36 def make_lexer_(self, file_or_path): 37 if hasattr(file_or_path, "read"): 38 filename = getattr(file_or_path, "name", None) 39 data = file_or_path.read() 40 else: 41 filename = file_or_path 42 with open(file_or_path, "r") as f: 43 data = f.read() 44 self.lexer_ = Lexer(data, filename) 45 46 def parse(self): 47 statements = self.doc_.statements 48 while self.next_token_type_ is not None: 49 self.advance_lexer_() 50 if self.cur_token_ in PARSE_FUNCS.keys(): 51 func = getattr(self, PARSE_FUNCS[self.cur_token_]) 52 statements.append(func()) 53 elif self.is_cur_keyword_("END"): 54 break 55 else: 56 raise VoltLibError( 57 "Expected " + ", ".join(sorted(PARSE_FUNCS.keys())), 58 self.cur_token_location_) 59 return self.doc_ 60 61 def parse_def_glyph_(self): 62 assert self.is_cur_keyword_("DEF_GLYPH") 63 location = self.cur_token_location_ 64 name = self.expect_string_() 65 self.expect_keyword_("ID") 66 gid = self.expect_number_() 67 if gid < 0: 68 raise VoltLibError("Invalid glyph ID", self.cur_token_location_) 69 gunicode = None 70 if self.next_token_ == "UNICODE": 71 self.expect_keyword_("UNICODE") 72 gunicode = [self.expect_number_()] 73 if gunicode[0] < 0: 74 raise VoltLibError("Invalid glyph UNICODE", 75 self.cur_token_location_) 76 elif self.next_token_ == "UNICODEVALUES": 77 self.expect_keyword_("UNICODEVALUES") 78 gunicode = self.parse_unicode_values_() 79 gtype = None 80 if self.next_token_ == "TYPE": 81 self.expect_keyword_("TYPE") 82 gtype = self.expect_name_() 83 assert gtype in ("BASE", "LIGATURE", "MARK", "COMPONENT") 84 components = None 85 if self.next_token_ == "COMPONENTS": 86 self.expect_keyword_("COMPONENTS") 87 components = self.expect_number_() 88 self.expect_keyword_("END_GLYPH") 89 if self.glyphs_.resolve(name) is not None: 90 raise VoltLibError( 91 'Glyph "%s" (gid %i) already defined' % (name, gid), 92 location 93 ) 94 def_glyph = ast.GlyphDefinition(name, gid, 95 gunicode, gtype, components, 96 location=location) 97 self.glyphs_.define(name, def_glyph) 98 return def_glyph 99 100 def parse_def_group_(self): 101 assert self.is_cur_keyword_("DEF_GROUP") 102 location = self.cur_token_location_ 103 name = self.expect_string_() 104 enum = None 105 if self.next_token_ == "ENUM": 106 enum = self.parse_enum_() 107 self.expect_keyword_("END_GROUP") 108 if self.groups_.resolve(name) is not None: 109 raise VoltLibError( 110 'Glyph group "%s" already defined, ' 111 'group names are case insensitive' % name, 112 location 113 ) 114 def_group = ast.GroupDefinition(name, enum, 115 location=location) 116 self.groups_.define(name, def_group) 117 return def_group 118 119 def parse_def_script_(self): 120 assert self.is_cur_keyword_("DEF_SCRIPT") 121 location = self.cur_token_location_ 122 name = None 123 if self.next_token_ == "NAME": 124 self.expect_keyword_("NAME") 125 name = self.expect_string_() 126 self.expect_keyword_("TAG") 127 tag = self.expect_string_() 128 if self.scripts_.resolve(tag) is not None: 129 raise VoltLibError( 130 'Script "%s" already defined, ' 131 'script tags are case insensitive' % tag, 132 location 133 ) 134 self.langs_.enter_scope() 135 langs = [] 136 while self.next_token_ != "END_SCRIPT": 137 self.advance_lexer_() 138 lang = self.parse_langsys_() 139 self.expect_keyword_("END_LANGSYS") 140 if self.langs_.resolve(lang.tag) is not None: 141 raise VoltLibError( 142 'Language "%s" already defined in script "%s", ' 143 'language tags are case insensitive' % (lang.tag, tag), 144 location 145 ) 146 self.langs_.define(lang.tag, lang) 147 langs.append(lang) 148 self.expect_keyword_("END_SCRIPT") 149 self.langs_.exit_scope() 150 def_script = ast.ScriptDefinition(name, tag, langs, location=location) 151 self.scripts_.define(tag, def_script) 152 return def_script 153 154 def parse_langsys_(self): 155 assert self.is_cur_keyword_("DEF_LANGSYS") 156 location = self.cur_token_location_ 157 name = None 158 if self.next_token_ == "NAME": 159 self.expect_keyword_("NAME") 160 name = self.expect_string_() 161 self.expect_keyword_("TAG") 162 tag = self.expect_string_() 163 features = [] 164 while self.next_token_ != "END_LANGSYS": 165 self.advance_lexer_() 166 feature = self.parse_feature_() 167 self.expect_keyword_("END_FEATURE") 168 features.append(feature) 169 def_langsys = ast.LangSysDefinition(name, tag, features, 170 location=location) 171 return def_langsys 172 173 def parse_feature_(self): 174 assert self.is_cur_keyword_("DEF_FEATURE") 175 location = self.cur_token_location_ 176 self.expect_keyword_("NAME") 177 name = self.expect_string_() 178 self.expect_keyword_("TAG") 179 tag = self.expect_string_() 180 lookups = [] 181 while self.next_token_ != "END_FEATURE": 182 # self.advance_lexer_() 183 self.expect_keyword_("LOOKUP") 184 lookup = self.expect_string_() 185 lookups.append(lookup) 186 feature = ast.FeatureDefinition(name, tag, lookups, 187 location=location) 188 return feature 189 190 def parse_def_lookup_(self): 191 assert self.is_cur_keyword_("DEF_LOOKUP") 192 location = self.cur_token_location_ 193 name = self.expect_string_() 194 if not name[0].isalpha(): 195 raise VoltLibError( 196 'Lookup name "%s" must start with a letter' % name, 197 location 198 ) 199 if self.lookups_.resolve(name) is not None: 200 raise VoltLibError( 201 'Lookup "%s" already defined, ' 202 'lookup names are case insensitive' % name, 203 location 204 ) 205 process_base = True 206 if self.next_token_ == "PROCESS_BASE": 207 self.advance_lexer_() 208 elif self.next_token_ == "SKIP_BASE": 209 self.advance_lexer_() 210 process_base = False 211 process_marks = True 212 mark_glyph_set = None 213 if self.next_token_ == "PROCESS_MARKS": 214 self.advance_lexer_() 215 if self.next_token_ == "MARK_GLYPH_SET": 216 self.advance_lexer_() 217 mark_glyph_set = self.expect_string_() 218 elif self.next_token_ == "ALL": 219 self.advance_lexer_() 220 elif self.next_token_ == "NONE": 221 self.advance_lexer_() 222 process_marks = False 223 elif self.next_token_type_ == Lexer.STRING: 224 process_marks = self.expect_string_() 225 else: 226 raise VoltLibError( 227 "Expected ALL, NONE, MARK_GLYPH_SET or an ID. " 228 "Got %s" % (self.next_token_type_), 229 location) 230 elif self.next_token_ == "SKIP_MARKS": 231 self.advance_lexer_() 232 process_marks = False 233 direction = None 234 if self.next_token_ == "DIRECTION": 235 self.expect_keyword_("DIRECTION") 236 direction = self.expect_name_() 237 assert direction in ("LTR", "RTL") 238 reversal = None 239 if self.next_token_ == "REVERSAL": 240 self.expect_keyword_("REVERSAL") 241 reversal = True 242 comments = None 243 if self.next_token_ == "COMMENTS": 244 self.expect_keyword_("COMMENTS") 245 comments = self.expect_string_().replace(r'\n', '\n') 246 context = [] 247 while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"): 248 context = self.parse_context_() 249 as_pos_or_sub = self.expect_name_() 250 sub = None 251 pos = None 252 if as_pos_or_sub == "AS_SUBSTITUTION": 253 sub = self.parse_substitution_(reversal) 254 elif as_pos_or_sub == "AS_POSITION": 255 pos = self.parse_position_() 256 else: 257 raise VoltLibError( 258 "Expected AS_SUBSTITUTION or AS_POSITION. " 259 "Got %s" % (as_pos_or_sub), 260 location) 261 def_lookup = ast.LookupDefinition( 262 name, process_base, process_marks, mark_glyph_set, direction, 263 reversal, comments, context, sub, pos, location=location) 264 self.lookups_.define(name, def_lookup) 265 return def_lookup 266 267 def parse_context_(self): 268 location = self.cur_token_location_ 269 contexts = [] 270 while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"): 271 side = None 272 coverage = None 273 ex_or_in = self.expect_name_() 274 # side_contexts = [] # XXX 275 if self.next_token_ != "END_CONTEXT": 276 left = [] 277 right = [] 278 while self.next_token_ in ("LEFT", "RIGHT"): 279 side = self.expect_name_() 280 coverage = self.parse_coverage_() 281 if side == "LEFT": 282 left.append(coverage) 283 else: 284 right.append(coverage) 285 self.expect_keyword_("END_CONTEXT") 286 context = ast.ContextDefinition(ex_or_in, left, 287 right, location=location) 288 contexts.append(context) 289 else: 290 self.expect_keyword_("END_CONTEXT") 291 return contexts 292 293 def parse_substitution_(self, reversal): 294 assert self.is_cur_keyword_("AS_SUBSTITUTION") 295 location = self.cur_token_location_ 296 src = [] 297 dest = [] 298 if self.next_token_ != "SUB": 299 raise VoltLibError("Expected SUB", location) 300 while self.next_token_ == "SUB": 301 self.expect_keyword_("SUB") 302 src.append(self.parse_coverage_()) 303 self.expect_keyword_("WITH") 304 dest.append(self.parse_coverage_()) 305 self.expect_keyword_("END_SUB") 306 self.expect_keyword_("END_SUBSTITUTION") 307 max_src = max([len(cov) for cov in src]) 308 max_dest = max([len(cov) for cov in dest]) 309 # many to many or mixed is invalid 310 if ((max_src > 1 and max_dest > 1) or 311 (reversal and (max_src > 1 or max_dest > 1))): 312 raise VoltLibError( 313 "Invalid substitution type", 314 location) 315 mapping = dict(zip(tuple(src), tuple(dest))) 316 if max_src == 1 and max_dest == 1: 317 if reversal: 318 sub = ast.SubstitutionReverseChainingSingleDefinition( 319 mapping, location=location) 320 else: 321 sub = ast.SubstitutionSingleDefinition(mapping, 322 location=location) 323 elif max_src == 1 and max_dest > 1: 324 sub = ast.SubstitutionMultipleDefinition(mapping, 325 location=location) 326 elif max_src > 1 and max_dest == 1: 327 sub = ast.SubstitutionLigatureDefinition(mapping, 328 location=location) 329 return sub 330 331 def parse_position_(self): 332 assert self.is_cur_keyword_("AS_POSITION") 333 location = self.cur_token_location_ 334 pos_type = self.expect_name_() 335 if pos_type not in ( 336 "ATTACH", "ATTACH_CURSIVE", "ADJUST_PAIR", "ADJUST_SINGLE"): 337 raise VoltLibError( 338 "Expected ATTACH, ATTACH_CURSIVE, ADJUST_PAIR, ADJUST_SINGLE", 339 location) 340 if pos_type == "ATTACH": 341 position = self.parse_attach_() 342 elif pos_type == "ATTACH_CURSIVE": 343 position = self.parse_attach_cursive_() 344 elif pos_type == "ADJUST_PAIR": 345 position = self.parse_adjust_pair_() 346 elif pos_type == "ADJUST_SINGLE": 347 position = self.parse_adjust_single_() 348 self.expect_keyword_("END_POSITION") 349 return position 350 351 def parse_attach_(self): 352 assert self.is_cur_keyword_("ATTACH") 353 location = self.cur_token_location_ 354 coverage = self.parse_coverage_() 355 coverage_to = [] 356 self.expect_keyword_("TO") 357 while self.next_token_ != "END_ATTACH": 358 cov = self.parse_coverage_() 359 self.expect_keyword_("AT") 360 self.expect_keyword_("ANCHOR") 361 anchor_name = self.expect_string_() 362 coverage_to.append((cov, anchor_name)) 363 self.expect_keyword_("END_ATTACH") 364 position = ast.PositionAttachDefinition( 365 coverage, coverage_to, location=location) 366 return position 367 368 def parse_attach_cursive_(self): 369 assert self.is_cur_keyword_("ATTACH_CURSIVE") 370 location = self.cur_token_location_ 371 coverages_exit = [] 372 coverages_enter = [] 373 while self.next_token_ != "ENTER": 374 self.expect_keyword_("EXIT") 375 coverages_exit.append(self.parse_coverage_()) 376 while self.next_token_ != "END_ATTACH": 377 self.expect_keyword_("ENTER") 378 coverages_enter.append(self.parse_coverage_()) 379 self.expect_keyword_("END_ATTACH") 380 position = ast.PositionAttachCursiveDefinition( 381 coverages_exit, coverages_enter, location=location) 382 return position 383 384 def parse_adjust_pair_(self): 385 assert self.is_cur_keyword_("ADJUST_PAIR") 386 location = self.cur_token_location_ 387 coverages_1 = [] 388 coverages_2 = [] 389 adjust_pair = {} 390 while self.next_token_ == "FIRST": 391 self.advance_lexer_() 392 coverage_1 = self.parse_coverage_() 393 coverages_1.append(coverage_1) 394 while self.next_token_ == "SECOND": 395 self.advance_lexer_() 396 coverage_2 = self.parse_coverage_() 397 coverages_2.append(coverage_2) 398 while self.next_token_ != "END_ADJUST": 399 id_1 = self.expect_number_() 400 id_2 = self.expect_number_() 401 self.expect_keyword_("BY") 402 pos_1 = self.parse_pos_() 403 pos_2 = self.parse_pos_() 404 adjust_pair[(id_1, id_2)] = (pos_1, pos_2) 405 self.expect_keyword_("END_ADJUST") 406 position = ast.PositionAdjustPairDefinition( 407 coverages_1, coverages_2, adjust_pair, location=location) 408 return position 409 410 def parse_adjust_single_(self): 411 assert self.is_cur_keyword_("ADJUST_SINGLE") 412 location = self.cur_token_location_ 413 adjust_single = [] 414 while self.next_token_ != "END_ADJUST": 415 coverages = self.parse_coverage_() 416 self.expect_keyword_("BY") 417 pos = self.parse_pos_() 418 adjust_single.append((coverages, pos)) 419 self.expect_keyword_("END_ADJUST") 420 position = ast.PositionAdjustSingleDefinition( 421 adjust_single, location=location) 422 return position 423 424 def parse_def_anchor_(self): 425 assert self.is_cur_keyword_("DEF_ANCHOR") 426 location = self.cur_token_location_ 427 name = self.expect_string_() 428 self.expect_keyword_("ON") 429 gid = self.expect_number_() 430 self.expect_keyword_("GLYPH") 431 glyph_name = self.expect_name_() 432 self.expect_keyword_("COMPONENT") 433 component = self.expect_number_() 434 # check for duplicate anchor names on this glyph 435 if glyph_name in self.anchors_: 436 anchor = self.anchors_[glyph_name].resolve(name) 437 if anchor is not None and anchor.component == component: 438 raise VoltLibError( 439 'Anchor "%s" already defined, ' 440 'anchor names are case insensitive' % name, 441 location 442 ) 443 if self.next_token_ == "LOCKED": 444 locked = True 445 self.advance_lexer_() 446 else: 447 locked = False 448 self.expect_keyword_("AT") 449 pos = self.parse_pos_() 450 self.expect_keyword_("END_ANCHOR") 451 anchor = ast.AnchorDefinition(name, gid, glyph_name, 452 component, locked, pos, 453 location=location) 454 if glyph_name not in self.anchors_: 455 self.anchors_[glyph_name] = SymbolTable() 456 self.anchors_[glyph_name].define(name, anchor) 457 return anchor 458 459 def parse_adjust_by_(self): 460 self.advance_lexer_() 461 assert self.is_cur_keyword_("ADJUST_BY") 462 adjustment = self.expect_number_() 463 self.expect_keyword_("AT") 464 size = self.expect_number_() 465 return adjustment, size 466 467 def parse_pos_(self): 468 # VOLT syntax doesn't seem to take device Y advance 469 self.advance_lexer_() 470 location = self.cur_token_location_ 471 assert self.is_cur_keyword_("POS"), location 472 adv = None 473 dx = None 474 dy = None 475 adv_adjust_by = {} 476 dx_adjust_by = {} 477 dy_adjust_by = {} 478 if self.next_token_ == "ADV": 479 self.advance_lexer_() 480 adv = self.expect_number_() 481 while self.next_token_ == "ADJUST_BY": 482 adjustment, size = self.parse_adjust_by_() 483 adv_adjust_by[size] = adjustment 484 if self.next_token_ == "DX": 485 self.advance_lexer_() 486 dx = self.expect_number_() 487 while self.next_token_ == "ADJUST_BY": 488 adjustment, size = self.parse_adjust_by_() 489 dx_adjust_by[size] = adjustment 490 if self.next_token_ == "DY": 491 self.advance_lexer_() 492 dy = self.expect_number_() 493 while self.next_token_ == "ADJUST_BY": 494 adjustment, size = self.parse_adjust_by_() 495 dy_adjust_by[size] = adjustment 496 self.expect_keyword_("END_POS") 497 return ast.Pos(adv, dx, dy, adv_adjust_by, dx_adjust_by, dy_adjust_by) 498 499 def parse_unicode_values_(self): 500 location = self.cur_token_location_ 501 try: 502 unicode_values = self.expect_string_().split(",") 503 unicode_values = [ 504 int(uni[2:], 16) 505 for uni in unicode_values if uni != ""] 506 except ValueError as err: 507 raise VoltLibError(str(err), location) 508 return unicode_values if unicode_values != [] else None 509 510 def parse_enum_(self): 511 self.expect_keyword_("ENUM") 512 location = self.cur_token_location_ 513 enum = ast.Enum(self.parse_coverage_(), location=location) 514 self.expect_keyword_("END_ENUM") 515 return enum 516 517 def parse_coverage_(self): 518 coverage = [] 519 location = self.cur_token_location_ 520 while self.next_token_ in ("GLYPH", "GROUP", "RANGE", "ENUM"): 521 if self.next_token_ == "ENUM": 522 enum = self.parse_enum_() 523 coverage.append(enum) 524 elif self.next_token_ == "GLYPH": 525 self.expect_keyword_("GLYPH") 526 name = self.expect_string_() 527 coverage.append(ast.GlyphName(name, location=location)) 528 elif self.next_token_ == "GROUP": 529 self.expect_keyword_("GROUP") 530 name = self.expect_string_() 531 coverage.append(ast.GroupName(name, self, location=location)) 532 elif self.next_token_ == "RANGE": 533 self.expect_keyword_("RANGE") 534 start = self.expect_string_() 535 self.expect_keyword_("TO") 536 end = self.expect_string_() 537 coverage.append(ast.Range(start, end, self, location=location)) 538 return tuple(coverage) 539 540 def resolve_group(self, group_name): 541 return self.groups_.resolve(group_name) 542 543 def glyph_range(self, start, end): 544 return self.glyphs_.range(start, end) 545 546 def parse_ppem_(self): 547 location = self.cur_token_location_ 548 ppem_name = self.cur_token_ 549 value = self.expect_number_() 550 setting = ast.SettingDefinition(ppem_name, value, location=location) 551 return setting 552 553 def parse_noarg_option_(self): 554 location = self.cur_token_location_ 555 name = self.cur_token_ 556 value = True 557 setting = ast.SettingDefinition(name, value, location=location) 558 return setting 559 560 def parse_cmap_format(self): 561 location = self.cur_token_location_ 562 name = self.cur_token_ 563 value = (self.expect_number_(), self.expect_number_(), 564 self.expect_number_()) 565 setting = ast.SettingDefinition(name, value, location=location) 566 return setting 567 568 def is_cur_keyword_(self, k): 569 return (self.cur_token_type_ is Lexer.NAME) and (self.cur_token_ == k) 570 571 def expect_string_(self): 572 self.advance_lexer_() 573 if self.cur_token_type_ is not Lexer.STRING: 574 raise VoltLibError("Expected a string", self.cur_token_location_) 575 return self.cur_token_ 576 577 def expect_keyword_(self, keyword): 578 self.advance_lexer_() 579 if self.cur_token_type_ is Lexer.NAME and self.cur_token_ == keyword: 580 return self.cur_token_ 581 raise VoltLibError("Expected \"%s\"" % keyword, 582 self.cur_token_location_) 583 584 def expect_name_(self): 585 self.advance_lexer_() 586 if self.cur_token_type_ is Lexer.NAME: 587 return self.cur_token_ 588 raise VoltLibError("Expected a name", self.cur_token_location_) 589 590 def expect_number_(self): 591 self.advance_lexer_() 592 if self.cur_token_type_ is not Lexer.NUMBER: 593 raise VoltLibError("Expected a number", self.cur_token_location_) 594 return self.cur_token_ 595 596 def advance_lexer_(self): 597 self.cur_token_type_, self.cur_token_, self.cur_token_location_ = ( 598 self.next_token_type_, self.next_token_, self.next_token_location_) 599 try: 600 if self.is_cur_keyword_("END"): 601 raise StopIteration 602 (self.next_token_type_, self.next_token_, 603 self.next_token_location_) = self.lexer_.next() 604 except StopIteration: 605 self.next_token_type_, self.next_token_ = (None, None) 606 607 608class SymbolTable(object): 609 def __init__(self): 610 self.scopes_ = [{}] 611 612 def enter_scope(self): 613 self.scopes_.append({}) 614 615 def exit_scope(self): 616 self.scopes_.pop() 617 618 def define(self, name, item): 619 self.scopes_[-1][name] = item 620 621 def resolve(self, name, case_insensitive=True): 622 for scope in reversed(self.scopes_): 623 item = scope.get(name) 624 if item: 625 return item 626 if case_insensitive: 627 for key in scope: 628 if key.lower() == name.lower(): 629 return scope[key] 630 return None 631 632 633class OrderedSymbolTable(SymbolTable): 634 def __init__(self): 635 self.scopes_ = [{}] 636 637 def enter_scope(self): 638 self.scopes_.append({}) 639 640 def resolve(self, name, case_insensitive=False): 641 SymbolTable.resolve(self, name, case_insensitive=case_insensitive) 642 643 def range(self, start, end): 644 for scope in reversed(self.scopes_): 645 if start in scope and end in scope: 646 start_idx = list(scope.keys()).index(start) 647 end_idx = list(scope.keys()).index(end) 648 return list(scope.keys())[start_idx:end_idx + 1] 649 return None 650