1from __future__ import ( 2 print_function, division, absolute_import, unicode_literals) 3from collections import OrderedDict 4import fontTools.voltLib.ast as ast 5from fontTools.voltLib.lexer import Lexer 6from fontTools.voltLib.error import VoltLibError 7from io import open 8 9PARSE_FUNCS = { 10 "DEF_GLYPH": "parse_def_glyph_", 11 "DEF_GROUP": "parse_def_group_", 12 "DEF_SCRIPT": "parse_def_script_", 13 "DEF_LOOKUP": "parse_def_lookup_", 14 "DEF_ANCHOR": "parse_def_anchor_", 15 "GRID_PPEM": "parse_ppem_", 16 "PRESENTATION_PPEM": "parse_ppem_", 17 "PPOSITIONING_PPEM": "parse_ppem_", 18 "COMPILER_USEEXTENSIONLOOKUPS": "parse_compiler_flag_", 19 "COMPILER_USEPAIRPOSFORMAT2": "parse_compiler_flag_", 20 "CMAP_FORMAT": "parse_cmap_format", 21} 22 23 24class Parser(object): 25 def __init__(self, path): 26 self.doc_ = ast.VoltFile() 27 self.glyphs_ = OrderedSymbolTable() 28 self.groups_ = SymbolTable() 29 self.anchors_ = {} # dictionary of SymbolTable() keyed by glyph 30 self.scripts_ = SymbolTable() 31 self.langs_ = SymbolTable() 32 self.lookups_ = SymbolTable() 33 self.next_token_type_, self.next_token_ = (None, None) 34 self.next_token_location_ = None 35 self.make_lexer_(path) 36 self.advance_lexer_() 37 38 def make_lexer_(self, file_or_path): 39 if hasattr(file_or_path, "read"): 40 filename = getattr(file_or_path, "name", None) 41 data = file_or_path.read() 42 else: 43 filename = file_or_path 44 with open(file_or_path, "r") as f: 45 data = f.read() 46 self.lexer_ = Lexer(data, filename) 47 48 def parse(self): 49 statements = self.doc_.statements 50 while self.next_token_type_ is not None: 51 self.advance_lexer_() 52 if self.cur_token_ in PARSE_FUNCS.keys(): 53 func = getattr(self, PARSE_FUNCS[self.cur_token_]) 54 statements.append(func()) 55 elif self.is_cur_keyword_("END"): 56 break 57 else: 58 raise VoltLibError( 59 "Expected " + ", ".join(sorted(PARSE_FUNCS.keys())), 60 self.cur_token_location_) 61 return self.doc_ 62 63 def parse_def_glyph_(self): 64 assert self.is_cur_keyword_("DEF_GLYPH") 65 location = self.cur_token_location_ 66 name = self.expect_string_() 67 self.expect_keyword_("ID") 68 gid = self.expect_number_() 69 if gid < 0: 70 raise VoltLibError("Invalid glyph ID", self.cur_token_location_) 71 gunicode = None 72 if self.next_token_ == "UNICODE": 73 self.expect_keyword_("UNICODE") 74 gunicode = [self.expect_number_()] 75 if gunicode[0] < 0: 76 raise VoltLibError("Invalid glyph UNICODE", 77 self.cur_token_location_) 78 elif self.next_token_ == "UNICODEVALUES": 79 self.expect_keyword_("UNICODEVALUES") 80 gunicode = self.parse_unicode_values_() 81 gtype = None 82 if self.next_token_ == "TYPE": 83 self.expect_keyword_("TYPE") 84 gtype = self.expect_name_() 85 assert gtype in ("BASE", "LIGATURE", "MARK", "COMPONENT") 86 components = None 87 if self.next_token_ == "COMPONENTS": 88 self.expect_keyword_("COMPONENTS") 89 components = self.expect_number_() 90 self.expect_keyword_("END_GLYPH") 91 if self.glyphs_.resolve(name) is not None: 92 raise VoltLibError( 93 'Glyph "%s" (gid %i) already defined' % (name, gid), 94 location 95 ) 96 def_glyph = ast.GlyphDefinition(name, gid, 97 gunicode, gtype, components, 98 location=location) 99 self.glyphs_.define(name, def_glyph) 100 return def_glyph 101 102 def parse_def_group_(self): 103 assert self.is_cur_keyword_("DEF_GROUP") 104 location = self.cur_token_location_ 105 name = self.expect_string_() 106 enum = None 107 if self.next_token_ == "ENUM": 108 enum = self.parse_enum_() 109 self.expect_keyword_("END_GROUP") 110 if self.groups_.resolve(name) is not None: 111 raise VoltLibError( 112 'Glyph group "%s" already defined, ' 113 'group names are case insensitive' % name, 114 location 115 ) 116 def_group = ast.GroupDefinition(name, enum, 117 location=location) 118 self.groups_.define(name, def_group) 119 return def_group 120 121 def parse_def_script_(self): 122 assert self.is_cur_keyword_("DEF_SCRIPT") 123 location = self.cur_token_location_ 124 name = None 125 if self.next_token_ == "NAME": 126 self.expect_keyword_("NAME") 127 name = self.expect_string_() 128 self.expect_keyword_("TAG") 129 tag = self.expect_string_() 130 if self.scripts_.resolve(tag) is not None: 131 raise VoltLibError( 132 'Script "%s" already defined, ' 133 'script tags are case insensitive' % tag, 134 location 135 ) 136 self.langs_.enter_scope() 137 langs = [] 138 while self.next_token_ != "END_SCRIPT": 139 self.advance_lexer_() 140 lang = self.parse_langsys_() 141 self.expect_keyword_("END_LANGSYS") 142 if self.langs_.resolve(lang.tag) is not None: 143 raise VoltLibError( 144 'Language "%s" already defined in script "%s", ' 145 'language tags are case insensitive' % (lang.tag, tag), 146 location 147 ) 148 self.langs_.define(lang.tag, lang) 149 langs.append(lang) 150 self.expect_keyword_("END_SCRIPT") 151 self.langs_.exit_scope() 152 def_script = ast.ScriptDefinition(name, tag, langs, location=location) 153 self.scripts_.define(tag, def_script) 154 return def_script 155 156 def parse_langsys_(self): 157 assert self.is_cur_keyword_("DEF_LANGSYS") 158 location = self.cur_token_location_ 159 name = None 160 if self.next_token_ == "NAME": 161 self.expect_keyword_("NAME") 162 name = self.expect_string_() 163 self.expect_keyword_("TAG") 164 tag = self.expect_string_() 165 features = [] 166 while self.next_token_ != "END_LANGSYS": 167 self.advance_lexer_() 168 feature = self.parse_feature_() 169 self.expect_keyword_("END_FEATURE") 170 features.append(feature) 171 def_langsys = ast.LangSysDefinition(name, tag, features, 172 location=location) 173 return def_langsys 174 175 def parse_feature_(self): 176 assert self.is_cur_keyword_("DEF_FEATURE") 177 location = self.cur_token_location_ 178 self.expect_keyword_("NAME") 179 name = self.expect_string_() 180 self.expect_keyword_("TAG") 181 tag = self.expect_string_() 182 lookups = [] 183 while self.next_token_ != "END_FEATURE": 184 # self.advance_lexer_() 185 self.expect_keyword_("LOOKUP") 186 lookup = self.expect_string_() 187 lookups.append(lookup) 188 feature = ast.FeatureDefinition(name, tag, lookups, 189 location=location) 190 return feature 191 192 def parse_def_lookup_(self): 193 assert self.is_cur_keyword_("DEF_LOOKUP") 194 location = self.cur_token_location_ 195 name = self.expect_string_() 196 if not name[0].isalpha(): 197 raise VoltLibError( 198 'Lookup name "%s" must start with a letter' % name, 199 location 200 ) 201 if self.lookups_.resolve(name) is not None: 202 raise VoltLibError( 203 'Lookup "%s" already defined, ' 204 'lookup names are case insensitive' % name, 205 location 206 ) 207 process_base = True 208 if self.next_token_ == "PROCESS_BASE": 209 self.advance_lexer_() 210 elif self.next_token_ == "SKIP_BASE": 211 self.advance_lexer_() 212 process_base = False 213 process_marks = True 214 mark_glyph_set = None 215 if self.next_token_ == "PROCESS_MARKS": 216 self.advance_lexer_() 217 if self.next_token_ == "MARK_GLYPH_SET": 218 self.advance_lexer_() 219 mark_glyph_set = self.expect_string_() 220 elif self.next_token_type_ == Lexer.STRING: 221 process_marks = self.expect_string_() 222 elif self.next_token_ == "ALL": 223 self.advance_lexer_() 224 else: 225 raise VoltLibError( 226 "Expected ALL, MARK_GLYPH_SET or an ID. " 227 "Got %s" % (self.next_token_type_), 228 location) 229 elif self.next_token_ == "SKIP_MARKS": 230 self.advance_lexer_() 231 process_marks = False 232 direction = None 233 if self.next_token_ == "DIRECTION": 234 self.expect_keyword_("DIRECTION") 235 direction = self.expect_name_() 236 assert direction in ("LTR", "RTL") 237 reversal = None 238 if self.next_token_ == "REVERSAL": 239 self.expect_keyword_("REVERSAL") 240 reversal = True 241 comments = None 242 if self.next_token_ == "COMMENTS": 243 self.expect_keyword_("COMMENTS") 244 comments = self.expect_string_() 245 context = [] 246 while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"): 247 context = self.parse_context_() 248 as_pos_or_sub = self.expect_name_() 249 sub = None 250 pos = None 251 if as_pos_or_sub == "AS_SUBSTITUTION": 252 sub = self.parse_substitution_(reversal) 253 elif as_pos_or_sub == "AS_POSITION": 254 pos = self.parse_position_() 255 else: 256 raise VoltLibError( 257 "Expected AS_SUBSTITUTION or AS_POSITION. " 258 "Got %s" % (as_pos_or_sub), 259 location) 260 def_lookup = ast.LookupDefinition( 261 name, process_base, process_marks, mark_glyph_set, direction, 262 reversal, comments, context, sub, pos, location=location) 263 self.lookups_.define(name, def_lookup) 264 return def_lookup 265 266 def parse_context_(self): 267 location = self.cur_token_location_ 268 contexts = [] 269 while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"): 270 side = None 271 coverage = None 272 ex_or_in = self.expect_name_() 273 # side_contexts = [] # XXX 274 if self.next_token_ != "END_CONTEXT": 275 left = [] 276 right = [] 277 while self.next_token_ in ("LEFT", "RIGHT"): 278 side = self.expect_name_() 279 coverage = self.parse_coverage_() 280 if side == "LEFT": 281 left.append(coverage) 282 else: 283 right.append(coverage) 284 self.expect_keyword_("END_CONTEXT") 285 context = ast.ContextDefinition(ex_or_in, left, 286 right, location=location) 287 contexts.append(context) 288 else: 289 self.expect_keyword_("END_CONTEXT") 290 return contexts 291 292 def parse_substitution_(self, reversal): 293 assert self.is_cur_keyword_("AS_SUBSTITUTION") 294 location = self.cur_token_location_ 295 src = [] 296 dest = [] 297 if self.next_token_ != "SUB": 298 raise VoltLibError("Expected SUB", location) 299 while self.next_token_ == "SUB": 300 self.expect_keyword_("SUB") 301 src.append(self.parse_coverage_()) 302 self.expect_keyword_("WITH") 303 dest.append(self.parse_coverage_()) 304 self.expect_keyword_("END_SUB") 305 self.expect_keyword_("END_SUBSTITUTION") 306 max_src = max([len(cov) for cov in src]) 307 max_dest = max([len(cov) for cov in dest]) 308 # many to many or mixed is invalid 309 if ((max_src > 1 and max_dest > 1) or 310 (reversal and (max_src > 1 or max_dest > 1))): 311 raise VoltLibError( 312 "Invalid substitution type", 313 location) 314 mapping = OrderedDict(zip(tuple(src), tuple(dest))) 315 if max_src == 1 and max_dest == 1: 316 if reversal: 317 sub = ast.SubstitutionReverseChainingSingleDefinition( 318 mapping, location=location) 319 else: 320 sub = ast.SubstitutionSingleDefinition(mapping, 321 location=location) 322 elif max_src == 1 and max_dest > 1: 323 sub = ast.SubstitutionMultipleDefinition(mapping, 324 location=location) 325 elif max_src > 1 and max_dest == 1: 326 sub = ast.SubstitutionLigatureDefinition(mapping, 327 location=location) 328 return sub 329 330 def parse_position_(self): 331 assert self.is_cur_keyword_("AS_POSITION") 332 location = self.cur_token_location_ 333 pos_type = self.expect_name_() 334 if pos_type not in ( 335 "ATTACH", "ATTACH_CURSIVE", "ADJUST_PAIR", "ADJUST_SINGLE"): 336 raise VoltLibError( 337 "Expected ATTACH, ATTACH_CURSIVE, ADJUST_PAIR, ADJUST_SINGLE", 338 location) 339 if pos_type == "ATTACH": 340 position = self.parse_attach_() 341 elif pos_type == "ATTACH_CURSIVE": 342 position = self.parse_attach_cursive_() 343 elif pos_type == "ADJUST_PAIR": 344 position = self.parse_adjust_pair_() 345 elif pos_type == "ADJUST_SINGLE": 346 position = self.parse_adjust_single_() 347 self.expect_keyword_("END_POSITION") 348 return position 349 350 def parse_attach_(self): 351 assert self.is_cur_keyword_("ATTACH") 352 location = self.cur_token_location_ 353 coverage = self.parse_coverage_() 354 coverage_to = [] 355 self.expect_keyword_("TO") 356 while self.next_token_ != "END_ATTACH": 357 cov = self.parse_coverage_() 358 self.expect_keyword_("AT") 359 self.expect_keyword_("ANCHOR") 360 anchor_name = self.expect_string_() 361 coverage_to.append((cov, anchor_name)) 362 self.expect_keyword_("END_ATTACH") 363 position = ast.PositionAttachDefinition( 364 coverage, coverage_to, location=location) 365 return position 366 367 def parse_attach_cursive_(self): 368 assert self.is_cur_keyword_("ATTACH_CURSIVE") 369 location = self.cur_token_location_ 370 coverages_exit = [] 371 coverages_enter = [] 372 while self.next_token_ != "ENTER": 373 self.expect_keyword_("EXIT") 374 coverages_exit.append(self.parse_coverage_()) 375 while self.next_token_ != "END_ATTACH": 376 self.expect_keyword_("ENTER") 377 coverages_enter.append(self.parse_coverage_()) 378 self.expect_keyword_("END_ATTACH") 379 position = ast.PositionAttachCursiveDefinition( 380 coverages_exit, coverages_enter, location=location) 381 return position 382 383 def parse_adjust_pair_(self): 384 assert self.is_cur_keyword_("ADJUST_PAIR") 385 location = self.cur_token_location_ 386 coverages_1 = [] 387 coverages_2 = [] 388 adjust_pair = {} 389 while self.next_token_ == "FIRST": 390 self.advance_lexer_() 391 coverage_1 = self.parse_coverage_() 392 coverages_1.append(coverage_1) 393 while self.next_token_ == "SECOND": 394 self.advance_lexer_() 395 coverage_2 = self.parse_coverage_() 396 coverages_2.append(coverage_2) 397 while self.next_token_ != "END_ADJUST": 398 id_1 = self.expect_number_() 399 id_2 = self.expect_number_() 400 self.expect_keyword_("BY") 401 pos_1 = self.parse_pos_() 402 pos_2 = self.parse_pos_() 403 adjust_pair[(id_1, id_2)] = (pos_1, pos_2) 404 self.expect_keyword_("END_ADJUST") 405 position = ast.PositionAdjustPairDefinition( 406 coverages_1, coverages_2, adjust_pair, location=location) 407 return position 408 409 def parse_adjust_single_(self): 410 assert self.is_cur_keyword_("ADJUST_SINGLE") 411 location = self.cur_token_location_ 412 adjust_single = [] 413 while self.next_token_ != "END_ADJUST": 414 coverages = self.parse_coverage_() 415 self.expect_keyword_("BY") 416 pos = self.parse_pos_() 417 adjust_single.append((coverages, pos)) 418 self.expect_keyword_("END_ADJUST") 419 position = ast.PositionAdjustSingleDefinition( 420 adjust_single, location=location) 421 return position 422 423 def parse_def_anchor_(self): 424 assert self.is_cur_keyword_("DEF_ANCHOR") 425 location = self.cur_token_location_ 426 name = self.expect_string_() 427 self.expect_keyword_("ON") 428 gid = self.expect_number_() 429 self.expect_keyword_("GLYPH") 430 glyph_name = self.expect_name_() 431 self.expect_keyword_("COMPONENT") 432 component = self.expect_number_() 433 # check for duplicate anchor names on this glyph 434 if glyph_name in self.anchors_: 435 anchor = self.anchors_[glyph_name].resolve(name) 436 if anchor is not None and anchor.component == component: 437 raise VoltLibError( 438 'Anchor "%s" already defined, ' 439 'anchor names are case insensitive' % name, 440 location 441 ) 442 if self.next_token_ == "LOCKED": 443 locked = True 444 self.advance_lexer_() 445 else: 446 locked = False 447 self.expect_keyword_("AT") 448 pos = self.parse_pos_() 449 self.expect_keyword_("END_ANCHOR") 450 anchor = ast.AnchorDefinition(name, gid, glyph_name, 451 component, locked, pos, 452 location=location) 453 if glyph_name not in self.anchors_: 454 self.anchors_[glyph_name] = SymbolTable() 455 self.anchors_[glyph_name].define(name, anchor) 456 return anchor 457 458 def parse_adjust_by_(self): 459 self.advance_lexer_() 460 assert self.is_cur_keyword_("ADJUST_BY") 461 adjustment = self.expect_number_() 462 self.expect_keyword_("AT") 463 size = self.expect_number_() 464 return adjustment, size 465 466 def parse_pos_(self): 467 # VOLT syntax doesn't seem to take device Y advance 468 self.advance_lexer_() 469 location = self.cur_token_location_ 470 assert self.is_cur_keyword_("POS"), location 471 adv = None 472 dx = None 473 dy = None 474 adv_adjust_by = {} 475 dx_adjust_by = {} 476 dy_adjust_by = {} 477 if self.next_token_ == "ADV": 478 self.advance_lexer_() 479 adv = self.expect_number_() 480 while self.next_token_ == "ADJUST_BY": 481 adjustment, size = self.parse_adjust_by_() 482 adv_adjust_by[size] = adjustment 483 if self.next_token_ == "DX": 484 self.advance_lexer_() 485 dx = self.expect_number_() 486 while self.next_token_ == "ADJUST_BY": 487 adjustment, size = self.parse_adjust_by_() 488 dx_adjust_by[size] = adjustment 489 if self.next_token_ == "DY": 490 self.advance_lexer_() 491 dy = self.expect_number_() 492 while self.next_token_ == "ADJUST_BY": 493 adjustment, size = self.parse_adjust_by_() 494 dy_adjust_by[size] = adjustment 495 self.expect_keyword_("END_POS") 496 return (adv, dx, dy, adv_adjust_by, dx_adjust_by, dy_adjust_by) 497 498 def parse_unicode_values_(self): 499 location = self.cur_token_location_ 500 try: 501 unicode_values = self.expect_string_().split(",") 502 unicode_values = [ 503 int(uni[2:], 16) 504 for uni in unicode_values if uni != ""] 505 except ValueError as err: 506 raise VoltLibError(str(err), location) 507 return unicode_values if unicode_values != [] else None 508 509 def parse_enum_(self): 510 self.expect_keyword_("ENUM") 511 location = self.cur_token_location_ 512 enum = ast.Enum(self.parse_coverage_(), location=location) 513 self.expect_keyword_("END_ENUM") 514 return enum 515 516 def parse_coverage_(self): 517 coverage = [] 518 location = self.cur_token_location_ 519 while self.next_token_ in ("GLYPH", "GROUP", "RANGE", "ENUM"): 520 if self.next_token_ == "ENUM": 521 enum = self.parse_enum_() 522 coverage.append(enum) 523 elif self.next_token_ == "GLYPH": 524 self.expect_keyword_("GLYPH") 525 name = self.expect_string_() 526 coverage.append(ast.GlyphName(name, location=location)) 527 elif self.next_token_ == "GROUP": 528 self.expect_keyword_("GROUP") 529 name = self.expect_string_() 530 coverage.append(ast.GroupName(name, self, location=location)) 531 elif self.next_token_ == "RANGE": 532 self.expect_keyword_("RANGE") 533 start = self.expect_string_() 534 self.expect_keyword_("TO") 535 end = self.expect_string_() 536 coverage.append(ast.Range(start, end, self, location=location)) 537 return tuple(coverage) 538 539 def resolve_group(self, group_name): 540 return self.groups_.resolve(group_name) 541 542 def glyph_range(self, start, end): 543 return self.glyphs_.range(start, end) 544 545 def parse_ppem_(self): 546 location = self.cur_token_location_ 547 ppem_name = self.cur_token_ 548 value = self.expect_number_() 549 setting = ast.SettingDefinition(ppem_name, value, location=location) 550 return setting 551 552 def parse_compiler_flag_(self): 553 location = self.cur_token_location_ 554 flag_name = self.cur_token_ 555 value = True 556 setting = ast.SettingDefinition(flag_name, value, location=location) 557 return setting 558 559 def parse_cmap_format(self): 560 location = self.cur_token_location_ 561 name = self.cur_token_ 562 value = (self.expect_number_(), self.expect_number_(), 563 self.expect_number_()) 564 setting = ast.SettingDefinition(name, value, location=location) 565 return setting 566 567 def is_cur_keyword_(self, k): 568 return (self.cur_token_type_ is Lexer.NAME) and (self.cur_token_ == k) 569 570 def expect_string_(self): 571 self.advance_lexer_() 572 if self.cur_token_type_ is not Lexer.STRING: 573 raise VoltLibError("Expected a string", self.cur_token_location_) 574 return self.cur_token_ 575 576 def expect_keyword_(self, keyword): 577 self.advance_lexer_() 578 if self.cur_token_type_ is Lexer.NAME and self.cur_token_ == keyword: 579 return self.cur_token_ 580 raise VoltLibError("Expected \"%s\"" % keyword, 581 self.cur_token_location_) 582 583 def expect_name_(self): 584 self.advance_lexer_() 585 if self.cur_token_type_ is Lexer.NAME: 586 return self.cur_token_ 587 raise VoltLibError("Expected a name", self.cur_token_location_) 588 589 def expect_number_(self): 590 self.advance_lexer_() 591 if self.cur_token_type_ is not Lexer.NUMBER: 592 raise VoltLibError("Expected a number", self.cur_token_location_) 593 return self.cur_token_ 594 595 def advance_lexer_(self): 596 self.cur_token_type_, self.cur_token_, self.cur_token_location_ = ( 597 self.next_token_type_, self.next_token_, self.next_token_location_) 598 try: 599 if self.is_cur_keyword_("END"): 600 raise StopIteration 601 (self.next_token_type_, self.next_token_, 602 self.next_token_location_) = self.lexer_.next() 603 except StopIteration: 604 self.next_token_type_, self.next_token_ = (None, None) 605 606 607class SymbolTable(object): 608 def __init__(self): 609 self.scopes_ = [{}] 610 611 def enter_scope(self): 612 self.scopes_.append({}) 613 614 def exit_scope(self): 615 self.scopes_.pop() 616 617 def define(self, name, item): 618 self.scopes_[-1][name] = item 619 620 def resolve(self, name, case_insensitive=True): 621 for scope in reversed(self.scopes_): 622 item = scope.get(name) 623 if item: 624 return item 625 if case_insensitive: 626 for key in scope: 627 if key.lower() == name.lower(): 628 return scope[key] 629 return None 630 631 632class OrderedSymbolTable(SymbolTable): 633 def __init__(self): 634 self.scopes_ = [OrderedDict()] 635 636 def enter_scope(self): 637 self.scopes_.append(OrderedDict()) 638 639 def resolve(self, name, case_insensitive=False): 640 SymbolTable.resolve(self, name, case_insensitive=case_insensitive) 641 642 def range(self, start, end): 643 for scope in reversed(self.scopes_): 644 if start in scope and end in scope: 645 start_idx = list(scope.keys()).index(start) 646 end_idx = list(scope.keys()).index(end) 647 return list(scope.keys())[start_idx:end_idx + 1] 648 return None 649