1from __future__ import print_function, division, absolute_import 2from __future__ import unicode_literals 3from fontTools.feaLib.error import FeatureLibError 4from fontTools.feaLib.lexer import Lexer, IncludingLexer, NonIncludingLexer 5from fontTools.misc.encodingTools import getEncoding 6from fontTools.misc.py23 import * 7import fontTools.feaLib.ast as ast 8import logging 9import os 10import re 11 12 13log = logging.getLogger(__name__) 14 15 16class Parser(object): 17 extensions = {} 18 ast = ast 19 SS_FEATURE_TAGS = {"ss%02d" % i for i in range(1, 20+1)} 20 CV_FEATURE_TAGS = {"cv%02d" % i for i in range(1, 99+1)} 21 22 def __init__(self, featurefile, glyphNames=(), followIncludes=True, 23 **kwargs): 24 if "glyphMap" in kwargs: 25 from fontTools.misc.loggingTools import deprecateArgument 26 deprecateArgument("glyphMap", "use 'glyphNames' (iterable) instead") 27 if glyphNames: 28 raise TypeError("'glyphNames' and (deprecated) 'glyphMap' are " 29 "mutually exclusive") 30 glyphNames = kwargs.pop("glyphMap") 31 if kwargs: 32 raise TypeError("unsupported keyword argument%s: %s" 33 % ("" if len(kwargs) == 1 else "s", 34 ", ".join(repr(k) for k in kwargs))) 35 36 self.glyphNames_ = set(glyphNames) 37 self.doc_ = self.ast.FeatureFile() 38 self.anchors_ = SymbolTable() 39 self.glyphclasses_ = SymbolTable() 40 self.lookups_ = SymbolTable() 41 self.valuerecords_ = SymbolTable() 42 self.symbol_tables_ = { 43 self.anchors_, self.valuerecords_ 44 } 45 self.next_token_type_, self.next_token_ = (None, None) 46 self.cur_comments_ = [] 47 self.next_token_location_ = None 48 lexerClass = IncludingLexer if followIncludes else NonIncludingLexer 49 self.lexer_ = lexerClass(featurefile) 50 self.advance_lexer_(comments=True) 51 52 def parse(self): 53 statements = self.doc_.statements 54 while self.next_token_type_ is not None or self.cur_comments_: 55 self.advance_lexer_(comments=True) 56 if self.cur_token_type_ is Lexer.COMMENT: 57 statements.append( 58 self.ast.Comment(self.cur_token_, 59 location=self.cur_token_location_)) 60 elif self.is_cur_keyword_("include"): 61 statements.append(self.parse_include_()) 62 elif self.cur_token_type_ is Lexer.GLYPHCLASS: 63 statements.append(self.parse_glyphclass_definition_()) 64 elif self.is_cur_keyword_(("anon", "anonymous")): 65 statements.append(self.parse_anonymous_()) 66 elif self.is_cur_keyword_("anchorDef"): 67 statements.append(self.parse_anchordef_()) 68 elif self.is_cur_keyword_("languagesystem"): 69 statements.append(self.parse_languagesystem_()) 70 elif self.is_cur_keyword_("lookup"): 71 statements.append(self.parse_lookup_(vertical=False)) 72 elif self.is_cur_keyword_("markClass"): 73 statements.append(self.parse_markClass_()) 74 elif self.is_cur_keyword_("feature"): 75 statements.append(self.parse_feature_block_()) 76 elif self.is_cur_keyword_("table"): 77 statements.append(self.parse_table_()) 78 elif self.is_cur_keyword_("valueRecordDef"): 79 statements.append( 80 self.parse_valuerecord_definition_(vertical=False)) 81 elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in self.extensions: 82 statements.append(self.extensions[self.cur_token_](self)) 83 elif self.cur_token_type_ is Lexer.SYMBOL and self.cur_token_ == ";": 84 continue 85 else: 86 raise FeatureLibError( 87 "Expected feature, languagesystem, lookup, markClass, " 88 "table, or glyph class definition, got {} \"{}\"".format(self.cur_token_type_, self.cur_token_), 89 self.cur_token_location_) 90 return self.doc_ 91 92 def parse_anchor_(self): 93 self.expect_symbol_("<") 94 self.expect_keyword_("anchor") 95 location = self.cur_token_location_ 96 97 if self.next_token_ == "NULL": 98 self.expect_keyword_("NULL") 99 self.expect_symbol_(">") 100 return None 101 102 if self.next_token_type_ == Lexer.NAME: 103 name = self.expect_name_() 104 anchordef = self.anchors_.resolve(name) 105 if anchordef is None: 106 raise FeatureLibError( 107 'Unknown anchor "%s"' % name, 108 self.cur_token_location_) 109 self.expect_symbol_(">") 110 return self.ast.Anchor(anchordef.x, anchordef.y, 111 name=name, 112 contourpoint=anchordef.contourpoint, 113 xDeviceTable=None, yDeviceTable=None, 114 location=location) 115 116 x, y = self.expect_number_(), self.expect_number_() 117 118 contourpoint = None 119 if self.next_token_ == "contourpoint": 120 self.expect_keyword_("contourpoint") 121 contourpoint = self.expect_number_() 122 123 if self.next_token_ == "<": 124 xDeviceTable = self.parse_device_() 125 yDeviceTable = self.parse_device_() 126 else: 127 xDeviceTable, yDeviceTable = None, None 128 129 self.expect_symbol_(">") 130 return self.ast.Anchor(x, y, name=None, 131 contourpoint=contourpoint, 132 xDeviceTable=xDeviceTable, 133 yDeviceTable=yDeviceTable, 134 location=location) 135 136 def parse_anchor_marks_(self): 137 """Parses a sequence of [<anchor> mark @MARKCLASS]*.""" 138 anchorMarks = [] # [(self.ast.Anchor, markClassName)*] 139 while self.next_token_ == "<": 140 anchor = self.parse_anchor_() 141 if anchor is None and self.next_token_ != "mark": 142 continue # <anchor NULL> without mark, eg. in GPOS type 5 143 self.expect_keyword_("mark") 144 markClass = self.expect_markClass_reference_() 145 anchorMarks.append((anchor, markClass)) 146 return anchorMarks 147 148 def parse_anchordef_(self): 149 assert self.is_cur_keyword_("anchorDef") 150 location = self.cur_token_location_ 151 x, y = self.expect_number_(), self.expect_number_() 152 contourpoint = None 153 if self.next_token_ == "contourpoint": 154 self.expect_keyword_("contourpoint") 155 contourpoint = self.expect_number_() 156 name = self.expect_name_() 157 self.expect_symbol_(";") 158 anchordef = self.ast.AnchorDefinition(name, x, y, 159 contourpoint=contourpoint, 160 location=location) 161 self.anchors_.define(name, anchordef) 162 return anchordef 163 164 def parse_anonymous_(self): 165 assert self.is_cur_keyword_(("anon", "anonymous")) 166 tag = self.expect_tag_() 167 _, content, location = self.lexer_.scan_anonymous_block(tag) 168 self.advance_lexer_() 169 self.expect_symbol_('}') 170 end_tag = self.expect_tag_() 171 assert tag == end_tag, "bad splitting in Lexer.scan_anonymous_block()" 172 self.expect_symbol_(';') 173 return self.ast.AnonymousBlock(tag, content, location=location) 174 175 def parse_attach_(self): 176 assert self.is_cur_keyword_("Attach") 177 location = self.cur_token_location_ 178 glyphs = self.parse_glyphclass_(accept_glyphname=True) 179 contourPoints = {self.expect_number_()} 180 while self.next_token_ != ";": 181 contourPoints.add(self.expect_number_()) 182 self.expect_symbol_(";") 183 return self.ast.AttachStatement(glyphs, contourPoints, 184 location=location) 185 186 def parse_enumerate_(self, vertical): 187 assert self.cur_token_ in {"enumerate", "enum"} 188 self.advance_lexer_() 189 return self.parse_position_(enumerated=True, vertical=vertical) 190 191 def parse_GlyphClassDef_(self): 192 """Parses 'GlyphClassDef @BASE, @LIGATURES, @MARKS, @COMPONENTS;'""" 193 assert self.is_cur_keyword_("GlyphClassDef") 194 location = self.cur_token_location_ 195 if self.next_token_ != ",": 196 baseGlyphs = self.parse_glyphclass_(accept_glyphname=False) 197 else: 198 baseGlyphs = None 199 self.expect_symbol_(",") 200 if self.next_token_ != ",": 201 ligatureGlyphs = self.parse_glyphclass_(accept_glyphname=False) 202 else: 203 ligatureGlyphs = None 204 self.expect_symbol_(",") 205 if self.next_token_ != ",": 206 markGlyphs = self.parse_glyphclass_(accept_glyphname=False) 207 else: 208 markGlyphs = None 209 self.expect_symbol_(",") 210 if self.next_token_ != ";": 211 componentGlyphs = self.parse_glyphclass_(accept_glyphname=False) 212 else: 213 componentGlyphs = None 214 self.expect_symbol_(";") 215 return self.ast.GlyphClassDefStatement(baseGlyphs, markGlyphs, 216 ligatureGlyphs, componentGlyphs, 217 location=location) 218 219 def parse_glyphclass_definition_(self): 220 """Parses glyph class definitions such as '@UPPERCASE = [A-Z];'""" 221 location, name = self.cur_token_location_, self.cur_token_ 222 self.expect_symbol_("=") 223 glyphs = self.parse_glyphclass_(accept_glyphname=False) 224 self.expect_symbol_(";") 225 glyphclass = self.ast.GlyphClassDefinition(name, glyphs, 226 location=location) 227 self.glyphclasses_.define(name, glyphclass) 228 return glyphclass 229 230 def split_glyph_range_(self, name, location): 231 # Since v1.20, the OpenType Feature File specification allows 232 # for dashes in glyph names. A sequence like "a-b-c-d" could 233 # therefore mean a single glyph whose name happens to be 234 # "a-b-c-d", or it could mean a range from glyph "a" to glyph 235 # "b-c-d", or a range from glyph "a-b" to glyph "c-d", or a 236 # range from glyph "a-b-c" to glyph "d".Technically, this 237 # example could be resolved because the (pretty complex) 238 # definition of glyph ranges renders most of these splits 239 # invalid. But the specification does not say that a compiler 240 # should try to apply such fancy heuristics. To encourage 241 # unambiguous feature files, we therefore try all possible 242 # splits and reject the feature file if there are multiple 243 # splits possible. It is intentional that we don't just emit a 244 # warning; warnings tend to get ignored. To fix the problem, 245 # font designers can trivially add spaces around the intended 246 # split point, and we emit a compiler error that suggests 247 # how exactly the source should be rewritten to make things 248 # unambiguous. 249 parts = name.split("-") 250 solutions = [] 251 for i in range(len(parts)): 252 start, limit = "-".join(parts[0:i]), "-".join(parts[i:]) 253 if start in self.glyphNames_ and limit in self.glyphNames_: 254 solutions.append((start, limit)) 255 if len(solutions) == 1: 256 start, limit = solutions[0] 257 return start, limit 258 elif len(solutions) == 0: 259 raise FeatureLibError( 260 "\"%s\" is not a glyph in the font, and it can not be split " 261 "into a range of known glyphs" % name, location) 262 else: 263 ranges = " or ".join(["\"%s - %s\"" % (s, l) for s, l in solutions]) 264 raise FeatureLibError( 265 "Ambiguous glyph range \"%s\"; " 266 "please use %s to clarify what you mean" % (name, ranges), 267 location) 268 269 def parse_glyphclass_(self, accept_glyphname): 270 if (accept_glyphname and 271 self.next_token_type_ in (Lexer.NAME, Lexer.CID)): 272 glyph = self.expect_glyph_() 273 return self.ast.GlyphName(glyph, location=self.cur_token_location_) 274 if self.next_token_type_ is Lexer.GLYPHCLASS: 275 self.advance_lexer_() 276 gc = self.glyphclasses_.resolve(self.cur_token_) 277 if gc is None: 278 raise FeatureLibError( 279 "Unknown glyph class @%s" % self.cur_token_, 280 self.cur_token_location_) 281 if isinstance(gc, self.ast.MarkClass): 282 return self.ast.MarkClassName( 283 gc, location=self.cur_token_location_) 284 else: 285 return self.ast.GlyphClassName( 286 gc, location=self.cur_token_location_) 287 288 self.expect_symbol_("[") 289 location = self.cur_token_location_ 290 glyphs = self.ast.GlyphClass(location=location) 291 while self.next_token_ != "]": 292 if self.next_token_type_ is Lexer.NAME: 293 glyph = self.expect_glyph_() 294 location = self.cur_token_location_ 295 if '-' in glyph and glyph not in self.glyphNames_: 296 start, limit = self.split_glyph_range_(glyph, location) 297 glyphs.add_range( 298 start, limit, 299 self.make_glyph_range_(location, start, limit)) 300 elif self.next_token_ == "-": 301 start = glyph 302 self.expect_symbol_("-") 303 limit = self.expect_glyph_() 304 glyphs.add_range( 305 start, limit, 306 self.make_glyph_range_(location, start, limit)) 307 else: 308 glyphs.append(glyph) 309 elif self.next_token_type_ is Lexer.CID: 310 glyph = self.expect_glyph_() 311 if self.next_token_ == "-": 312 range_location = self.cur_token_location_ 313 range_start = self.cur_token_ 314 self.expect_symbol_("-") 315 range_end = self.expect_cid_() 316 glyphs.add_cid_range(range_start, range_end, 317 self.make_cid_range_(range_location, 318 range_start, range_end)) 319 else: 320 glyphs.append("cid%05d" % self.cur_token_) 321 elif self.next_token_type_ is Lexer.GLYPHCLASS: 322 self.advance_lexer_() 323 gc = self.glyphclasses_.resolve(self.cur_token_) 324 if gc is None: 325 raise FeatureLibError( 326 "Unknown glyph class @%s" % self.cur_token_, 327 self.cur_token_location_) 328 if isinstance(gc, self.ast.MarkClass): 329 gc = self.ast.MarkClassName( 330 gc, location=self.cur_token_location_) 331 else: 332 gc = self.ast.GlyphClassName( 333 gc, location=self.cur_token_location_) 334 glyphs.add_class(gc) 335 else: 336 raise FeatureLibError( 337 "Expected glyph name, glyph range, " 338 "or glyph class reference", 339 self.next_token_location_) 340 self.expect_symbol_("]") 341 return glyphs 342 343 def parse_class_name_(self): 344 name = self.expect_class_name_() 345 gc = self.glyphclasses_.resolve(name) 346 if gc is None: 347 raise FeatureLibError( 348 "Unknown glyph class @%s" % name, 349 self.cur_token_location_) 350 if isinstance(gc, self.ast.MarkClass): 351 return self.ast.MarkClassName( 352 gc, location=self.cur_token_location_) 353 else: 354 return self.ast.GlyphClassName( 355 gc, location=self.cur_token_location_) 356 357 def parse_glyph_pattern_(self, vertical): 358 prefix, glyphs, lookups, values, suffix = ([], [], [], [], []) 359 hasMarks = False 360 while self.next_token_ not in {"by", "from", ";", ","}: 361 gc = self.parse_glyphclass_(accept_glyphname=True) 362 marked = False 363 if self.next_token_ == "'": 364 self.expect_symbol_("'") 365 hasMarks = marked = True 366 if marked: 367 if suffix: 368 # makeotf also reports this as an error, while FontForge 369 # silently inserts ' in all the intervening glyphs. 370 # https://github.com/fonttools/fonttools/pull/1096 371 raise FeatureLibError( 372 "Unsupported contextual target sequence: at most " 373 "one run of marked (') glyph/class names allowed", 374 self.cur_token_location_) 375 glyphs.append(gc) 376 elif glyphs: 377 suffix.append(gc) 378 else: 379 prefix.append(gc) 380 381 if self.is_next_value_(): 382 values.append(self.parse_valuerecord_(vertical)) 383 else: 384 values.append(None) 385 386 lookup = None 387 if self.next_token_ == "lookup": 388 self.expect_keyword_("lookup") 389 if not marked: 390 raise FeatureLibError( 391 "Lookups can only follow marked glyphs", 392 self.cur_token_location_) 393 lookup_name = self.expect_name_() 394 lookup = self.lookups_.resolve(lookup_name) 395 if lookup is None: 396 raise FeatureLibError( 397 'Unknown lookup "%s"' % lookup_name, 398 self.cur_token_location_) 399 if marked: 400 lookups.append(lookup) 401 402 if not glyphs and not suffix: # eg., "sub f f i by" 403 assert lookups == [] 404 return ([], prefix, [None] * len(prefix), values, [], hasMarks) 405 else: 406 assert not any(values[:len(prefix)]), values 407 format1 = values[len(prefix):][:len(glyphs)] 408 format2 = values[(len(prefix) + len(glyphs)):][:len(suffix)] 409 values = format2 if format2 and isinstance(format2[0], self.ast.ValueRecord) else format1 410 return (prefix, glyphs, lookups, values, suffix, hasMarks) 411 412 def parse_chain_context_(self): 413 location = self.cur_token_location_ 414 prefix, glyphs, lookups, values, suffix, hasMarks = \ 415 self.parse_glyph_pattern_(vertical=False) 416 chainContext = [(prefix, glyphs, suffix)] 417 hasLookups = any(lookups) 418 while self.next_token_ == ",": 419 self.expect_symbol_(",") 420 prefix, glyphs, lookups, values, suffix, hasMarks = \ 421 self.parse_glyph_pattern_(vertical=False) 422 chainContext.append((prefix, glyphs, suffix)) 423 hasLookups = hasLookups or any(lookups) 424 self.expect_symbol_(";") 425 return chainContext, hasLookups 426 427 def parse_ignore_(self): 428 assert self.is_cur_keyword_("ignore") 429 location = self.cur_token_location_ 430 self.advance_lexer_() 431 if self.cur_token_ in ["substitute", "sub"]: 432 chainContext, hasLookups = self.parse_chain_context_() 433 if hasLookups: 434 raise FeatureLibError( 435 "No lookups can be specified for \"ignore sub\"", 436 location) 437 return self.ast.IgnoreSubstStatement(chainContext, 438 location=location) 439 if self.cur_token_ in ["position", "pos"]: 440 chainContext, hasLookups = self.parse_chain_context_() 441 if hasLookups: 442 raise FeatureLibError( 443 "No lookups can be specified for \"ignore pos\"", 444 location) 445 return self.ast.IgnorePosStatement(chainContext, 446 location=location) 447 raise FeatureLibError( 448 "Expected \"substitute\" or \"position\"", 449 self.cur_token_location_) 450 451 def parse_include_(self): 452 assert self.cur_token_ == "include" 453 location = self.cur_token_location_ 454 filename = self.expect_filename_() 455 # self.expect_symbol_(";") 456 return ast.IncludeStatement(filename, location=location) 457 458 def parse_language_(self): 459 assert self.is_cur_keyword_("language") 460 location = self.cur_token_location_ 461 language = self.expect_language_tag_() 462 include_default, required = (True, False) 463 if self.next_token_ in {"exclude_dflt", "include_dflt"}: 464 include_default = (self.expect_name_() == "include_dflt") 465 if self.next_token_ == "required": 466 self.expect_keyword_("required") 467 required = True 468 self.expect_symbol_(";") 469 return self.ast.LanguageStatement(language, 470 include_default, required, 471 location=location) 472 473 def parse_ligatureCaretByIndex_(self): 474 assert self.is_cur_keyword_("LigatureCaretByIndex") 475 location = self.cur_token_location_ 476 glyphs = self.parse_glyphclass_(accept_glyphname=True) 477 carets = [self.expect_number_()] 478 while self.next_token_ != ";": 479 carets.append(self.expect_number_()) 480 self.expect_symbol_(";") 481 return self.ast.LigatureCaretByIndexStatement(glyphs, carets, 482 location=location) 483 484 def parse_ligatureCaretByPos_(self): 485 assert self.is_cur_keyword_("LigatureCaretByPos") 486 location = self.cur_token_location_ 487 glyphs = self.parse_glyphclass_(accept_glyphname=True) 488 carets = [self.expect_number_()] 489 while self.next_token_ != ";": 490 carets.append(self.expect_number_()) 491 self.expect_symbol_(";") 492 return self.ast.LigatureCaretByPosStatement(glyphs, carets, 493 location=location) 494 495 def parse_lookup_(self, vertical): 496 assert self.is_cur_keyword_("lookup") 497 location, name = self.cur_token_location_, self.expect_name_() 498 499 if self.next_token_ == ";": 500 lookup = self.lookups_.resolve(name) 501 if lookup is None: 502 raise FeatureLibError("Unknown lookup \"%s\"" % name, 503 self.cur_token_location_) 504 self.expect_symbol_(";") 505 return self.ast.LookupReferenceStatement(lookup, 506 location=location) 507 508 use_extension = False 509 if self.next_token_ == "useExtension": 510 self.expect_keyword_("useExtension") 511 use_extension = True 512 513 block = self.ast.LookupBlock(name, use_extension, location=location) 514 self.parse_block_(block, vertical) 515 self.lookups_.define(name, block) 516 return block 517 518 def parse_lookupflag_(self): 519 assert self.is_cur_keyword_("lookupflag") 520 location = self.cur_token_location_ 521 522 # format B: "lookupflag 6;" 523 if self.next_token_type_ == Lexer.NUMBER: 524 value = self.expect_number_() 525 self.expect_symbol_(";") 526 return self.ast.LookupFlagStatement(value, location=location) 527 528 # format A: "lookupflag RightToLeft MarkAttachmentType @M;" 529 value_seen = False 530 value, markAttachment, markFilteringSet = 0, None, None 531 flags = { 532 "RightToLeft": 1, "IgnoreBaseGlyphs": 2, 533 "IgnoreLigatures": 4, "IgnoreMarks": 8 534 } 535 seen = set() 536 while self.next_token_ != ";": 537 if self.next_token_ in seen: 538 raise FeatureLibError( 539 "%s can be specified only once" % self.next_token_, 540 self.next_token_location_) 541 seen.add(self.next_token_) 542 if self.next_token_ == "MarkAttachmentType": 543 self.expect_keyword_("MarkAttachmentType") 544 markAttachment = self.parse_class_name_() 545 elif self.next_token_ == "UseMarkFilteringSet": 546 self.expect_keyword_("UseMarkFilteringSet") 547 markFilteringSet = self.parse_class_name_() 548 elif self.next_token_ in flags: 549 value_seen = True 550 value = value | flags[self.expect_name_()] 551 else: 552 raise FeatureLibError( 553 '"%s" is not a recognized lookupflag' % self.next_token_, 554 self.next_token_location_) 555 self.expect_symbol_(";") 556 557 if not any([value_seen, markAttachment, markFilteringSet]): 558 raise FeatureLibError( 559 'lookupflag must have a value', self.next_token_location_) 560 561 return self.ast.LookupFlagStatement(value, 562 markAttachment=markAttachment, 563 markFilteringSet=markFilteringSet, 564 location=location) 565 566 def parse_markClass_(self): 567 assert self.is_cur_keyword_("markClass") 568 location = self.cur_token_location_ 569 glyphs = self.parse_glyphclass_(accept_glyphname=True) 570 anchor = self.parse_anchor_() 571 name = self.expect_class_name_() 572 self.expect_symbol_(";") 573 markClass = self.doc_.markClasses.get(name) 574 if markClass is None: 575 markClass = self.ast.MarkClass(name) 576 self.doc_.markClasses[name] = markClass 577 self.glyphclasses_.define(name, markClass) 578 mcdef = self.ast.MarkClassDefinition(markClass, anchor, glyphs, 579 location=location) 580 markClass.addDefinition(mcdef) 581 return mcdef 582 583 def parse_position_(self, enumerated, vertical): 584 assert self.cur_token_ in {"position", "pos"} 585 if self.next_token_ == "cursive": # GPOS type 3 586 return self.parse_position_cursive_(enumerated, vertical) 587 elif self.next_token_ == "base": # GPOS type 4 588 return self.parse_position_base_(enumerated, vertical) 589 elif self.next_token_ == "ligature": # GPOS type 5 590 return self.parse_position_ligature_(enumerated, vertical) 591 elif self.next_token_ == "mark": # GPOS type 6 592 return self.parse_position_mark_(enumerated, vertical) 593 594 location = self.cur_token_location_ 595 prefix, glyphs, lookups, values, suffix, hasMarks = \ 596 self.parse_glyph_pattern_(vertical) 597 self.expect_symbol_(";") 598 599 if any(lookups): 600 # GPOS type 8: Chaining contextual positioning; explicit lookups 601 if any(values): 602 raise FeatureLibError( 603 "If \"lookup\" is present, no values must be specified", 604 location) 605 return self.ast.ChainContextPosStatement( 606 prefix, glyphs, suffix, lookups, location=location) 607 608 # Pair positioning, format A: "pos V 10 A -10;" 609 # Pair positioning, format B: "pos V A -20;" 610 if not prefix and not suffix and len(glyphs) == 2 and not hasMarks: 611 if values[0] is None: # Format B: "pos V A -20;" 612 values.reverse() 613 return self.ast.PairPosStatement( 614 glyphs[0], values[0], glyphs[1], values[1], 615 enumerated=enumerated, 616 location=location) 617 618 if enumerated: 619 raise FeatureLibError( 620 '"enumerate" is only allowed with pair positionings', location) 621 return self.ast.SinglePosStatement(list(zip(glyphs, values)), 622 prefix, suffix, forceChain=hasMarks, 623 location=location) 624 625 def parse_position_cursive_(self, enumerated, vertical): 626 location = self.cur_token_location_ 627 self.expect_keyword_("cursive") 628 if enumerated: 629 raise FeatureLibError( 630 '"enumerate" is not allowed with ' 631 'cursive attachment positioning', 632 location) 633 glyphclass = self.parse_glyphclass_(accept_glyphname=True) 634 entryAnchor = self.parse_anchor_() 635 exitAnchor = self.parse_anchor_() 636 self.expect_symbol_(";") 637 return self.ast.CursivePosStatement( 638 glyphclass, entryAnchor, exitAnchor, location=location) 639 640 def parse_position_base_(self, enumerated, vertical): 641 location = self.cur_token_location_ 642 self.expect_keyword_("base") 643 if enumerated: 644 raise FeatureLibError( 645 '"enumerate" is not allowed with ' 646 'mark-to-base attachment positioning', 647 location) 648 base = self.parse_glyphclass_(accept_glyphname=True) 649 marks = self.parse_anchor_marks_() 650 self.expect_symbol_(";") 651 return self.ast.MarkBasePosStatement(base, marks, location=location) 652 653 def parse_position_ligature_(self, enumerated, vertical): 654 location = self.cur_token_location_ 655 self.expect_keyword_("ligature") 656 if enumerated: 657 raise FeatureLibError( 658 '"enumerate" is not allowed with ' 659 'mark-to-ligature attachment positioning', 660 location) 661 ligatures = self.parse_glyphclass_(accept_glyphname=True) 662 marks = [self.parse_anchor_marks_()] 663 while self.next_token_ == "ligComponent": 664 self.expect_keyword_("ligComponent") 665 marks.append(self.parse_anchor_marks_()) 666 self.expect_symbol_(";") 667 return self.ast.MarkLigPosStatement(ligatures, marks, location=location) 668 669 def parse_position_mark_(self, enumerated, vertical): 670 location = self.cur_token_location_ 671 self.expect_keyword_("mark") 672 if enumerated: 673 raise FeatureLibError( 674 '"enumerate" is not allowed with ' 675 'mark-to-mark attachment positioning', 676 location) 677 baseMarks = self.parse_glyphclass_(accept_glyphname=True) 678 marks = self.parse_anchor_marks_() 679 self.expect_symbol_(";") 680 return self.ast.MarkMarkPosStatement(baseMarks, marks, 681 location=location) 682 683 def parse_script_(self): 684 assert self.is_cur_keyword_("script") 685 location, script = self.cur_token_location_, self.expect_script_tag_() 686 self.expect_symbol_(";") 687 return self.ast.ScriptStatement(script, location=location) 688 689 def parse_substitute_(self): 690 assert self.cur_token_ in {"substitute", "sub", "reversesub", "rsub"} 691 location = self.cur_token_location_ 692 reverse = self.cur_token_ in {"reversesub", "rsub"} 693 old_prefix, old, lookups, values, old_suffix, hasMarks = \ 694 self.parse_glyph_pattern_(vertical=False) 695 if any(values): 696 raise FeatureLibError( 697 "Substitution statements cannot contain values", location) 698 new = [] 699 if self.next_token_ == "by": 700 keyword = self.expect_keyword_("by") 701 while self.next_token_ != ";": 702 gc = self.parse_glyphclass_(accept_glyphname=True) 703 new.append(gc) 704 elif self.next_token_ == "from": 705 keyword = self.expect_keyword_("from") 706 new = [self.parse_glyphclass_(accept_glyphname=False)] 707 else: 708 keyword = None 709 self.expect_symbol_(";") 710 if len(new) is 0 and not any(lookups): 711 raise FeatureLibError( 712 'Expected "by", "from" or explicit lookup references', 713 self.cur_token_location_) 714 715 # GSUB lookup type 3: Alternate substitution. 716 # Format: "substitute a from [a.1 a.2 a.3];" 717 if keyword == "from": 718 if reverse: 719 raise FeatureLibError( 720 'Reverse chaining substitutions do not support "from"', 721 location) 722 if len(old) != 1 or len(old[0].glyphSet()) != 1: 723 raise FeatureLibError( 724 'Expected a single glyph before "from"', 725 location) 726 if len(new) != 1: 727 raise FeatureLibError( 728 'Expected a single glyphclass after "from"', 729 location) 730 return self.ast.AlternateSubstStatement( 731 old_prefix, old[0], old_suffix, new[0], location=location) 732 733 num_lookups = len([l for l in lookups if l is not None]) 734 735 # GSUB lookup type 1: Single substitution. 736 # Format A: "substitute a by a.sc;" 737 # Format B: "substitute [one.fitted one.oldstyle] by one;" 738 # Format C: "substitute [a-d] by [A.sc-D.sc];" 739 if (not reverse and len(old) == 1 and len(new) == 1 and 740 num_lookups == 0): 741 glyphs = list(old[0].glyphSet()) 742 replacements = list(new[0].glyphSet()) 743 if len(replacements) == 1: 744 replacements = replacements * len(glyphs) 745 if len(glyphs) != len(replacements): 746 raise FeatureLibError( 747 'Expected a glyph class with %d elements after "by", ' 748 'but found a glyph class with %d elements' % 749 (len(glyphs), len(replacements)), location) 750 return self.ast.SingleSubstStatement( 751 old, new, 752 old_prefix, old_suffix, 753 forceChain=hasMarks, 754 location=location 755 ) 756 757 # GSUB lookup type 2: Multiple substitution. 758 # Format: "substitute f_f_i by f f i;" 759 if (not reverse and 760 len(old) == 1 and len(old[0].glyphSet()) == 1 and 761 len(new) > 1 and max([len(n.glyphSet()) for n in new]) == 1 and 762 num_lookups == 0): 763 return self.ast.MultipleSubstStatement( 764 old_prefix, tuple(old[0].glyphSet())[0], old_suffix, 765 tuple([list(n.glyphSet())[0] for n in new]), 766 forceChain=hasMarks, location=location) 767 768 # GSUB lookup type 4: Ligature substitution. 769 # Format: "substitute f f i by f_f_i;" 770 if (not reverse and 771 len(old) > 1 and len(new) == 1 and 772 len(new[0].glyphSet()) == 1 and 773 num_lookups == 0): 774 return self.ast.LigatureSubstStatement( 775 old_prefix, old, old_suffix, 776 list(new[0].glyphSet())[0], forceChain=hasMarks, 777 location=location) 778 779 # GSUB lookup type 8: Reverse chaining substitution. 780 if reverse: 781 if len(old) != 1: 782 raise FeatureLibError( 783 "In reverse chaining single substitutions, " 784 "only a single glyph or glyph class can be replaced", 785 location) 786 if len(new) != 1: 787 raise FeatureLibError( 788 'In reverse chaining single substitutions, ' 789 'the replacement (after "by") must be a single glyph ' 790 'or glyph class', location) 791 if num_lookups != 0: 792 raise FeatureLibError( 793 "Reverse chaining substitutions cannot call named lookups", 794 location) 795 glyphs = sorted(list(old[0].glyphSet())) 796 replacements = sorted(list(new[0].glyphSet())) 797 if len(replacements) == 1: 798 replacements = replacements * len(glyphs) 799 if len(glyphs) != len(replacements): 800 raise FeatureLibError( 801 'Expected a glyph class with %d elements after "by", ' 802 'but found a glyph class with %d elements' % 803 (len(glyphs), len(replacements)), location) 804 return self.ast.ReverseChainSingleSubstStatement( 805 old_prefix, old_suffix, old, new, location=location) 806 807 if len(old) > 1 and len(new) > 1: 808 raise FeatureLibError( 809 'Direct substitution of multiple glyphs by multiple glyphs ' 810 'is not supported', 811 location) 812 813 # GSUB lookup type 6: Chaining contextual substitution. 814 assert len(new) == 0, new 815 rule = self.ast.ChainContextSubstStatement( 816 old_prefix, old, old_suffix, lookups, location=location) 817 return rule 818 819 def parse_subtable_(self): 820 assert self.is_cur_keyword_("subtable") 821 location = self.cur_token_location_ 822 self.expect_symbol_(";") 823 return self.ast.SubtableStatement(location=location) 824 825 def parse_size_parameters_(self): 826 assert self.is_cur_keyword_("parameters") 827 location = self.cur_token_location_ 828 DesignSize = self.expect_decipoint_() 829 SubfamilyID = self.expect_number_() 830 RangeStart = 0 831 RangeEnd = 0 832 if self.next_token_type_ in (Lexer.NUMBER, Lexer.FLOAT) or \ 833 SubfamilyID != 0: 834 RangeStart = self.expect_decipoint_() 835 RangeEnd = self.expect_decipoint_() 836 837 self.expect_symbol_(";") 838 return self.ast.SizeParameters(DesignSize, SubfamilyID, 839 RangeStart, RangeEnd, 840 location=location) 841 842 def parse_size_menuname_(self): 843 assert self.is_cur_keyword_("sizemenuname") 844 location = self.cur_token_location_ 845 platformID, platEncID, langID, string = self.parse_name_() 846 return self.ast.FeatureNameStatement("size", platformID, 847 platEncID, langID, string, 848 location=location) 849 850 def parse_table_(self): 851 assert self.is_cur_keyword_("table") 852 location, name = self.cur_token_location_, self.expect_tag_() 853 table = self.ast.TableBlock(name, location=location) 854 self.expect_symbol_("{") 855 handler = { 856 "GDEF": self.parse_table_GDEF_, 857 "head": self.parse_table_head_, 858 "hhea": self.parse_table_hhea_, 859 "vhea": self.parse_table_vhea_, 860 "name": self.parse_table_name_, 861 "BASE": self.parse_table_BASE_, 862 "OS/2": self.parse_table_OS_2_, 863 }.get(name) 864 if handler: 865 handler(table) 866 else: 867 raise FeatureLibError('"table %s" is not supported' % name.strip(), 868 location) 869 self.expect_symbol_("}") 870 end_tag = self.expect_tag_() 871 if end_tag != name: 872 raise FeatureLibError('Expected "%s"' % name.strip(), 873 self.cur_token_location_) 874 self.expect_symbol_(";") 875 return table 876 877 def parse_table_GDEF_(self, table): 878 statements = table.statements 879 while self.next_token_ != "}" or self.cur_comments_: 880 self.advance_lexer_(comments=True) 881 if self.cur_token_type_ is Lexer.COMMENT: 882 statements.append(self.ast.Comment( 883 self.cur_token_, location=self.cur_token_location_)) 884 elif self.is_cur_keyword_("Attach"): 885 statements.append(self.parse_attach_()) 886 elif self.is_cur_keyword_("GlyphClassDef"): 887 statements.append(self.parse_GlyphClassDef_()) 888 elif self.is_cur_keyword_("LigatureCaretByIndex"): 889 statements.append(self.parse_ligatureCaretByIndex_()) 890 elif self.is_cur_keyword_("LigatureCaretByPos"): 891 statements.append(self.parse_ligatureCaretByPos_()) 892 elif self.cur_token_ == ";": 893 continue 894 else: 895 raise FeatureLibError( 896 "Expected Attach, LigatureCaretByIndex, " 897 "or LigatureCaretByPos", 898 self.cur_token_location_) 899 900 def parse_table_head_(self, table): 901 statements = table.statements 902 while self.next_token_ != "}" or self.cur_comments_: 903 self.advance_lexer_(comments=True) 904 if self.cur_token_type_ is Lexer.COMMENT: 905 statements.append(self.ast.Comment( 906 self.cur_token_, location=self.cur_token_location_)) 907 elif self.is_cur_keyword_("FontRevision"): 908 statements.append(self.parse_FontRevision_()) 909 elif self.cur_token_ == ";": 910 continue 911 else: 912 raise FeatureLibError("Expected FontRevision", 913 self.cur_token_location_) 914 915 def parse_table_hhea_(self, table): 916 statements = table.statements 917 fields = ("CaretOffset", "Ascender", "Descender", "LineGap") 918 while self.next_token_ != "}" or self.cur_comments_: 919 self.advance_lexer_(comments=True) 920 if self.cur_token_type_ is Lexer.COMMENT: 921 statements.append(self.ast.Comment( 922 self.cur_token_, location=self.cur_token_location_)) 923 elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields: 924 key = self.cur_token_.lower() 925 value = self.expect_number_() 926 statements.append( 927 self.ast.HheaField(key, value, 928 location=self.cur_token_location_)) 929 if self.next_token_ != ";": 930 raise FeatureLibError("Incomplete statement", self.next_token_location_) 931 elif self.cur_token_ == ";": 932 continue 933 else: 934 raise FeatureLibError("Expected CaretOffset, Ascender, " 935 "Descender or LineGap", 936 self.cur_token_location_) 937 938 def parse_table_vhea_(self, table): 939 statements = table.statements 940 fields = ("VertTypoAscender", "VertTypoDescender", "VertTypoLineGap") 941 while self.next_token_ != "}" or self.cur_comments_: 942 self.advance_lexer_(comments=True) 943 if self.cur_token_type_ is Lexer.COMMENT: 944 statements.append(self.ast.Comment( 945 self.cur_token_, location=self.cur_token_location_)) 946 elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields: 947 key = self.cur_token_.lower() 948 value = self.expect_number_() 949 statements.append( 950 self.ast.VheaField(key, value, 951 location=self.cur_token_location_)) 952 if self.next_token_ != ";": 953 raise FeatureLibError("Incomplete statement", self.next_token_location_) 954 elif self.cur_token_ == ";": 955 continue 956 else: 957 raise FeatureLibError("Expected VertTypoAscender, " 958 "VertTypoDescender or VertTypoLineGap", 959 self.cur_token_location_) 960 961 def parse_table_name_(self, table): 962 statements = table.statements 963 while self.next_token_ != "}" or self.cur_comments_: 964 self.advance_lexer_(comments=True) 965 if self.cur_token_type_ is Lexer.COMMENT: 966 statements.append(self.ast.Comment( 967 self.cur_token_, location=self.cur_token_location_)) 968 elif self.is_cur_keyword_("nameid"): 969 statement = self.parse_nameid_() 970 if statement: 971 statements.append(statement) 972 elif self.cur_token_ == ";": 973 continue 974 else: 975 raise FeatureLibError("Expected nameid", 976 self.cur_token_location_) 977 978 def parse_name_(self): 979 platEncID = None 980 langID = None 981 if self.next_token_type_ == Lexer.NUMBER: 982 platformID = self.expect_number_() 983 location = self.cur_token_location_ 984 if platformID not in (1, 3): 985 raise FeatureLibError("Expected platform id 1 or 3", location) 986 if self.next_token_type_ == Lexer.NUMBER: 987 platEncID = self.expect_number_() 988 langID = self.expect_number_() 989 else: 990 platformID = 3 991 location = self.cur_token_location_ 992 993 if platformID == 1: # Macintosh 994 platEncID = platEncID or 0 # Roman 995 langID = langID or 0 # English 996 else: # 3, Windows 997 platEncID = platEncID or 1 # Unicode 998 langID = langID or 0x0409 # English 999 1000 string = self.expect_string_() 1001 self.expect_symbol_(";") 1002 1003 encoding = getEncoding(platformID, platEncID, langID) 1004 if encoding is None: 1005 raise FeatureLibError("Unsupported encoding", location) 1006 unescaped = self.unescape_string_(string, encoding) 1007 return platformID, platEncID, langID, unescaped 1008 1009 def parse_nameid_(self): 1010 assert self.cur_token_ == "nameid", self.cur_token_ 1011 location, nameID = self.cur_token_location_, self.expect_number_() 1012 if nameID > 32767: 1013 raise FeatureLibError("Name id value cannot be greater than 32767", 1014 self.cur_token_location_) 1015 if 1 <= nameID <= 6: 1016 log.warning("Name id %d cannot be set from the feature file. " 1017 "Ignoring record" % nameID) 1018 self.parse_name_() # skip to the next record 1019 return None 1020 1021 platformID, platEncID, langID, string = self.parse_name_() 1022 return self.ast.NameRecord(nameID, platformID, platEncID, 1023 langID, string, location=location) 1024 1025 def unescape_string_(self, string, encoding): 1026 if encoding == "utf_16_be": 1027 s = re.sub(r"\\[0-9a-fA-F]{4}", self.unescape_unichr_, string) 1028 else: 1029 unescape = lambda m: self.unescape_byte_(m, encoding) 1030 s = re.sub(r"\\[0-9a-fA-F]{2}", unescape, string) 1031 # We now have a Unicode string, but it might contain surrogate pairs. 1032 # We convert surrogates to actual Unicode by round-tripping through 1033 # Python's UTF-16 codec in a special mode. 1034 utf16 = tobytes(s, "utf_16_be", "surrogatepass") 1035 return tounicode(utf16, "utf_16_be") 1036 1037 @staticmethod 1038 def unescape_unichr_(match): 1039 n = match.group(0)[1:] 1040 return unichr(int(n, 16)) 1041 1042 @staticmethod 1043 def unescape_byte_(match, encoding): 1044 n = match.group(0)[1:] 1045 return bytechr(int(n, 16)).decode(encoding) 1046 1047 def parse_table_BASE_(self, table): 1048 statements = table.statements 1049 while self.next_token_ != "}" or self.cur_comments_: 1050 self.advance_lexer_(comments=True) 1051 if self.cur_token_type_ is Lexer.COMMENT: 1052 statements.append(self.ast.Comment( 1053 self.cur_token_, location=self.cur_token_location_)) 1054 elif self.is_cur_keyword_("HorizAxis.BaseTagList"): 1055 horiz_bases = self.parse_base_tag_list_() 1056 elif self.is_cur_keyword_("HorizAxis.BaseScriptList"): 1057 horiz_scripts = self.parse_base_script_list_(len(horiz_bases)) 1058 statements.append( 1059 self.ast.BaseAxis(horiz_bases, 1060 horiz_scripts, False, 1061 location=self.cur_token_location_)) 1062 elif self.is_cur_keyword_("VertAxis.BaseTagList"): 1063 vert_bases = self.parse_base_tag_list_() 1064 elif self.is_cur_keyword_("VertAxis.BaseScriptList"): 1065 vert_scripts = self.parse_base_script_list_(len(vert_bases)) 1066 statements.append( 1067 self.ast.BaseAxis(vert_bases, 1068 vert_scripts, True, 1069 location=self.cur_token_location_)) 1070 elif self.cur_token_ == ";": 1071 continue 1072 1073 def parse_table_OS_2_(self, table): 1074 statements = table.statements 1075 numbers = ("FSType", "TypoAscender", "TypoDescender", "TypoLineGap", 1076 "winAscent", "winDescent", "XHeight", "CapHeight", 1077 "WeightClass", "WidthClass", "LowerOpSize", "UpperOpSize") 1078 ranges = ("UnicodeRange", "CodePageRange") 1079 while self.next_token_ != "}" or self.cur_comments_: 1080 self.advance_lexer_(comments=True) 1081 if self.cur_token_type_ is Lexer.COMMENT: 1082 statements.append(self.ast.Comment( 1083 self.cur_token_, location=self.cur_token_location_)) 1084 elif self.cur_token_type_ is Lexer.NAME: 1085 key = self.cur_token_.lower() 1086 value = None 1087 if self.cur_token_ in numbers: 1088 value = self.expect_number_() 1089 elif self.is_cur_keyword_("Panose"): 1090 value = [] 1091 for i in range(10): 1092 value.append(self.expect_number_()) 1093 elif self.cur_token_ in ranges: 1094 value = [] 1095 while self.next_token_ != ";": 1096 value.append(self.expect_number_()) 1097 elif self.is_cur_keyword_("Vendor"): 1098 value = self.expect_string_() 1099 statements.append( 1100 self.ast.OS2Field(key, value, 1101 location=self.cur_token_location_)) 1102 elif self.cur_token_ == ";": 1103 continue 1104 1105 def parse_base_tag_list_(self): 1106 assert self.cur_token_ in ("HorizAxis.BaseTagList", 1107 "VertAxis.BaseTagList"), self.cur_token_ 1108 bases = [] 1109 while self.next_token_ != ";": 1110 bases.append(self.expect_script_tag_()) 1111 self.expect_symbol_(";") 1112 return bases 1113 1114 def parse_base_script_list_(self, count): 1115 assert self.cur_token_ in ("HorizAxis.BaseScriptList", 1116 "VertAxis.BaseScriptList"), self.cur_token_ 1117 scripts = [(self.parse_base_script_record_(count))] 1118 while self.next_token_ == ",": 1119 self.expect_symbol_(",") 1120 scripts.append(self.parse_base_script_record_(count)) 1121 self.expect_symbol_(";") 1122 return scripts 1123 1124 def parse_base_script_record_(self, count): 1125 script_tag = self.expect_script_tag_() 1126 base_tag = self.expect_script_tag_() 1127 coords = [self.expect_number_() for i in range(count)] 1128 return script_tag, base_tag, coords 1129 1130 def parse_device_(self): 1131 result = None 1132 self.expect_symbol_("<") 1133 self.expect_keyword_("device") 1134 if self.next_token_ == "NULL": 1135 self.expect_keyword_("NULL") 1136 else: 1137 result = [(self.expect_number_(), self.expect_number_())] 1138 while self.next_token_ == ",": 1139 self.expect_symbol_(",") 1140 result.append((self.expect_number_(), self.expect_number_())) 1141 result = tuple(result) # make it hashable 1142 self.expect_symbol_(">") 1143 return result 1144 1145 def is_next_value_(self): 1146 return self.next_token_type_ is Lexer.NUMBER or self.next_token_ == "<" 1147 1148 def parse_valuerecord_(self, vertical): 1149 if self.next_token_type_ is Lexer.NUMBER: 1150 number, location = self.expect_number_(), self.cur_token_location_ 1151 if vertical: 1152 val = self.ast.ValueRecord(yAdvance=number, 1153 vertical=vertical, 1154 location=location) 1155 else: 1156 val = self.ast.ValueRecord(xAdvance=number, 1157 vertical=vertical, 1158 location=location) 1159 return val 1160 self.expect_symbol_("<") 1161 location = self.cur_token_location_ 1162 if self.next_token_type_ is Lexer.NAME: 1163 name = self.expect_name_() 1164 if name == "NULL": 1165 self.expect_symbol_(">") 1166 return self.ast.ValueRecord() 1167 vrd = self.valuerecords_.resolve(name) 1168 if vrd is None: 1169 raise FeatureLibError("Unknown valueRecordDef \"%s\"" % name, 1170 self.cur_token_location_) 1171 value = vrd.value 1172 xPlacement, yPlacement = (value.xPlacement, value.yPlacement) 1173 xAdvance, yAdvance = (value.xAdvance, value.yAdvance) 1174 else: 1175 xPlacement, yPlacement, xAdvance, yAdvance = ( 1176 self.expect_number_(), self.expect_number_(), 1177 self.expect_number_(), self.expect_number_()) 1178 1179 if self.next_token_ == "<": 1180 xPlaDevice, yPlaDevice, xAdvDevice, yAdvDevice = ( 1181 self.parse_device_(), self.parse_device_(), 1182 self.parse_device_(), self.parse_device_()) 1183 allDeltas = sorted([ 1184 delta 1185 for size, delta 1186 in (xPlaDevice if xPlaDevice else ()) + 1187 (yPlaDevice if yPlaDevice else ()) + 1188 (xAdvDevice if xAdvDevice else ()) + 1189 (yAdvDevice if yAdvDevice else ())]) 1190 if allDeltas[0] < -128 or allDeltas[-1] > 127: 1191 raise FeatureLibError( 1192 "Device value out of valid range (-128..127)", 1193 self.cur_token_location_) 1194 else: 1195 xPlaDevice, yPlaDevice, xAdvDevice, yAdvDevice = ( 1196 None, None, None, None) 1197 1198 self.expect_symbol_(">") 1199 return self.ast.ValueRecord( 1200 xPlacement, yPlacement, xAdvance, yAdvance, 1201 xPlaDevice, yPlaDevice, xAdvDevice, yAdvDevice, 1202 vertical=vertical, location=location) 1203 1204 def parse_valuerecord_definition_(self, vertical): 1205 assert self.is_cur_keyword_("valueRecordDef") 1206 location = self.cur_token_location_ 1207 value = self.parse_valuerecord_(vertical) 1208 name = self.expect_name_() 1209 self.expect_symbol_(";") 1210 vrd = self.ast.ValueRecordDefinition(name, value, location=location) 1211 self.valuerecords_.define(name, vrd) 1212 return vrd 1213 1214 def parse_languagesystem_(self): 1215 assert self.cur_token_ == "languagesystem" 1216 location = self.cur_token_location_ 1217 script = self.expect_script_tag_() 1218 language = self.expect_language_tag_() 1219 self.expect_symbol_(";") 1220 return self.ast.LanguageSystemStatement(script, language, 1221 location=location) 1222 1223 def parse_feature_block_(self): 1224 assert self.cur_token_ == "feature" 1225 location = self.cur_token_location_ 1226 tag = self.expect_tag_() 1227 vertical = (tag in {"vkrn", "vpal", "vhal", "valt"}) 1228 1229 stylisticset = None 1230 cv_feature = None 1231 size_feature = False 1232 if tag in self.SS_FEATURE_TAGS: 1233 stylisticset = tag 1234 elif tag in self.CV_FEATURE_TAGS: 1235 cv_feature = tag 1236 elif tag == "size": 1237 size_feature = True 1238 1239 use_extension = False 1240 if self.next_token_ == "useExtension": 1241 self.expect_keyword_("useExtension") 1242 use_extension = True 1243 1244 block = self.ast.FeatureBlock(tag, use_extension=use_extension, 1245 location=location) 1246 self.parse_block_(block, vertical, stylisticset, size_feature, 1247 cv_feature) 1248 return block 1249 1250 def parse_feature_reference_(self): 1251 assert self.cur_token_ == "feature", self.cur_token_ 1252 location = self.cur_token_location_ 1253 featureName = self.expect_tag_() 1254 self.expect_symbol_(";") 1255 return self.ast.FeatureReferenceStatement(featureName, 1256 location=location) 1257 1258 def parse_featureNames_(self, tag): 1259 assert self.cur_token_ == "featureNames", self.cur_token_ 1260 block = self.ast.NestedBlock(tag, self.cur_token_, 1261 location=self.cur_token_location_) 1262 self.expect_symbol_("{") 1263 for symtab in self.symbol_tables_: 1264 symtab.enter_scope() 1265 while self.next_token_ != "}" or self.cur_comments_: 1266 self.advance_lexer_(comments=True) 1267 if self.cur_token_type_ is Lexer.COMMENT: 1268 block.statements.append(self.ast.Comment( 1269 self.cur_token_, location=self.cur_token_location_)) 1270 elif self.is_cur_keyword_("name"): 1271 location = self.cur_token_location_ 1272 platformID, platEncID, langID, string = self.parse_name_() 1273 block.statements.append( 1274 self.ast.FeatureNameStatement(tag, platformID, 1275 platEncID, langID, string, 1276 location=location)) 1277 elif self.cur_token_ == ";": 1278 continue 1279 else: 1280 raise FeatureLibError('Expected "name"', 1281 self.cur_token_location_) 1282 self.expect_symbol_("}") 1283 for symtab in self.symbol_tables_: 1284 symtab.exit_scope() 1285 self.expect_symbol_(";") 1286 return block 1287 1288 def parse_cvParameters_(self, tag): 1289 assert self.cur_token_ == "cvParameters", self.cur_token_ 1290 block = self.ast.NestedBlock(tag, self.cur_token_, 1291 location=self.cur_token_location_) 1292 self.expect_symbol_("{") 1293 for symtab in self.symbol_tables_: 1294 symtab.enter_scope() 1295 1296 statements = block.statements 1297 while self.next_token_ != "}" or self.cur_comments_: 1298 self.advance_lexer_(comments=True) 1299 if self.cur_token_type_ is Lexer.COMMENT: 1300 statements.append(self.ast.Comment( 1301 self.cur_token_, location=self.cur_token_location_)) 1302 elif self.is_cur_keyword_({"FeatUILabelNameID", 1303 "FeatUITooltipTextNameID", 1304 "SampleTextNameID", 1305 "ParamUILabelNameID"}): 1306 statements.append(self.parse_cvNameIDs_(tag, self.cur_token_)) 1307 elif self.is_cur_keyword_("Character"): 1308 statements.append(self.parse_cvCharacter_(tag)) 1309 elif self.cur_token_ == ";": 1310 continue 1311 else: 1312 raise FeatureLibError( 1313 "Expected statement: got {} {}".format( 1314 self.cur_token_type_, self.cur_token_), 1315 self.cur_token_location_) 1316 1317 self.expect_symbol_("}") 1318 for symtab in self.symbol_tables_: 1319 symtab.exit_scope() 1320 self.expect_symbol_(";") 1321 return block 1322 1323 def parse_cvNameIDs_(self, tag, block_name): 1324 assert self.cur_token_ == block_name, self.cur_token_ 1325 block = self.ast.NestedBlock(tag, block_name, 1326 location=self.cur_token_location_) 1327 self.expect_symbol_("{") 1328 for symtab in self.symbol_tables_: 1329 symtab.enter_scope() 1330 while self.next_token_ != "}" or self.cur_comments_: 1331 self.advance_lexer_(comments=True) 1332 if self.cur_token_type_ is Lexer.COMMENT: 1333 block.statements.append(self.ast.Comment( 1334 self.cur_token_, location=self.cur_token_location_)) 1335 elif self.is_cur_keyword_("name"): 1336 location = self.cur_token_location_ 1337 platformID, platEncID, langID, string = self.parse_name_() 1338 block.statements.append( 1339 self.ast.CVParametersNameStatement( 1340 tag, platformID, platEncID, langID, string, 1341 block_name, location=location)) 1342 elif self.cur_token_ == ";": 1343 continue 1344 else: 1345 raise FeatureLibError('Expected "name"', 1346 self.cur_token_location_) 1347 self.expect_symbol_("}") 1348 for symtab in self.symbol_tables_: 1349 symtab.exit_scope() 1350 self.expect_symbol_(";") 1351 return block 1352 1353 def parse_cvCharacter_(self, tag): 1354 assert self.cur_token_ == "Character", self.cur_token_ 1355 location, character = self.cur_token_location_, self.expect_decimal_or_hexadecimal_() 1356 self.expect_symbol_(";") 1357 if not (0xFFFFFF >= character >= 0): 1358 raise FeatureLibError("Character value must be between " 1359 "{:#x} and {:#x}".format(0, 0xFFFFFF), 1360 location) 1361 return self.ast.CharacterStatement(character, tag, location=location) 1362 1363 def parse_FontRevision_(self): 1364 assert self.cur_token_ == "FontRevision", self.cur_token_ 1365 location, version = self.cur_token_location_, self.expect_float_() 1366 self.expect_symbol_(";") 1367 if version <= 0: 1368 raise FeatureLibError("Font revision numbers must be positive", 1369 location) 1370 return self.ast.FontRevisionStatement(version, location=location) 1371 1372 def parse_block_(self, block, vertical, stylisticset=None, 1373 size_feature=False, cv_feature=None): 1374 self.expect_symbol_("{") 1375 for symtab in self.symbol_tables_: 1376 symtab.enter_scope() 1377 1378 statements = block.statements 1379 while self.next_token_ != "}" or self.cur_comments_: 1380 self.advance_lexer_(comments=True) 1381 if self.cur_token_type_ is Lexer.COMMENT: 1382 statements.append(self.ast.Comment( 1383 self.cur_token_, location=self.cur_token_location_)) 1384 elif self.cur_token_type_ is Lexer.GLYPHCLASS: 1385 statements.append(self.parse_glyphclass_definition_()) 1386 elif self.is_cur_keyword_("anchorDef"): 1387 statements.append(self.parse_anchordef_()) 1388 elif self.is_cur_keyword_({"enum", "enumerate"}): 1389 statements.append(self.parse_enumerate_(vertical=vertical)) 1390 elif self.is_cur_keyword_("feature"): 1391 statements.append(self.parse_feature_reference_()) 1392 elif self.is_cur_keyword_("ignore"): 1393 statements.append(self.parse_ignore_()) 1394 elif self.is_cur_keyword_("language"): 1395 statements.append(self.parse_language_()) 1396 elif self.is_cur_keyword_("lookup"): 1397 statements.append(self.parse_lookup_(vertical)) 1398 elif self.is_cur_keyword_("lookupflag"): 1399 statements.append(self.parse_lookupflag_()) 1400 elif self.is_cur_keyword_("markClass"): 1401 statements.append(self.parse_markClass_()) 1402 elif self.is_cur_keyword_({"pos", "position"}): 1403 statements.append( 1404 self.parse_position_(enumerated=False, vertical=vertical)) 1405 elif self.is_cur_keyword_("script"): 1406 statements.append(self.parse_script_()) 1407 elif (self.is_cur_keyword_({"sub", "substitute", 1408 "rsub", "reversesub"})): 1409 statements.append(self.parse_substitute_()) 1410 elif self.is_cur_keyword_("subtable"): 1411 statements.append(self.parse_subtable_()) 1412 elif self.is_cur_keyword_("valueRecordDef"): 1413 statements.append(self.parse_valuerecord_definition_(vertical)) 1414 elif stylisticset and self.is_cur_keyword_("featureNames"): 1415 statements.append(self.parse_featureNames_(stylisticset)) 1416 elif cv_feature and self.is_cur_keyword_("cvParameters"): 1417 statements.append(self.parse_cvParameters_(cv_feature)) 1418 elif size_feature and self.is_cur_keyword_("parameters"): 1419 statements.append(self.parse_size_parameters_()) 1420 elif size_feature and self.is_cur_keyword_("sizemenuname"): 1421 statements.append(self.parse_size_menuname_()) 1422 elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in self.extensions: 1423 statements.append(self.extensions[self.cur_token_](self)) 1424 elif self.cur_token_ == ";": 1425 continue 1426 else: 1427 raise FeatureLibError( 1428 "Expected glyph class definition or statement: got {} {}".format(self.cur_token_type_, self.cur_token_), 1429 self.cur_token_location_) 1430 1431 self.expect_symbol_("}") 1432 for symtab in self.symbol_tables_: 1433 symtab.exit_scope() 1434 1435 name = self.expect_name_() 1436 if name != block.name.strip(): 1437 raise FeatureLibError("Expected \"%s\"" % block.name.strip(), 1438 self.cur_token_location_) 1439 self.expect_symbol_(";") 1440 1441 # A multiple substitution may have a single destination, in which case 1442 # it will look just like a single substitution. So if there are both 1443 # multiple and single substitutions, upgrade all the single ones to 1444 # multiple substitutions. 1445 1446 # Check if we have a mix of non-contextual singles and multiples. 1447 has_single = False 1448 has_multiple = False 1449 for s in statements: 1450 if isinstance(s, self.ast.SingleSubstStatement): 1451 has_single = not any([s.prefix, s.suffix, s.forceChain]) 1452 elif isinstance(s, self.ast.MultipleSubstStatement): 1453 has_multiple = not any([s.prefix, s.suffix, s.forceChain]) 1454 1455 # Upgrade all single substitutions to multiple substitutions. 1456 if has_single and has_multiple: 1457 for i, s in enumerate(statements): 1458 if isinstance(s, self.ast.SingleSubstStatement): 1459 statements[i] = self.ast.MultipleSubstStatement( 1460 s.prefix, s.glyphs[0].glyphSet()[0], s.suffix, 1461 [r.glyphSet()[0] for r in s.replacements], 1462 s.forceChain, location=s.location) 1463 1464 def is_cur_keyword_(self, k): 1465 if self.cur_token_type_ is Lexer.NAME: 1466 if isinstance(k, type("")): # basestring is gone in Python3 1467 return self.cur_token_ == k 1468 else: 1469 return self.cur_token_ in k 1470 return False 1471 1472 def expect_class_name_(self): 1473 self.advance_lexer_() 1474 if self.cur_token_type_ is not Lexer.GLYPHCLASS: 1475 raise FeatureLibError("Expected @NAME", self.cur_token_location_) 1476 return self.cur_token_ 1477 1478 def expect_cid_(self): 1479 self.advance_lexer_() 1480 if self.cur_token_type_ is Lexer.CID: 1481 return self.cur_token_ 1482 raise FeatureLibError("Expected a CID", self.cur_token_location_) 1483 1484 def expect_filename_(self): 1485 self.advance_lexer_() 1486 if self.cur_token_type_ is not Lexer.FILENAME: 1487 raise FeatureLibError("Expected file name", 1488 self.cur_token_location_) 1489 return self.cur_token_ 1490 1491 def expect_glyph_(self): 1492 self.advance_lexer_() 1493 if self.cur_token_type_ is Lexer.NAME: 1494 self.cur_token_ = self.cur_token_.lstrip("\\") 1495 if len(self.cur_token_) > 63: 1496 raise FeatureLibError( 1497 "Glyph names must not be longer than 63 characters", 1498 self.cur_token_location_) 1499 return self.cur_token_ 1500 elif self.cur_token_type_ is Lexer.CID: 1501 return "cid%05d" % self.cur_token_ 1502 raise FeatureLibError("Expected a glyph name or CID", 1503 self.cur_token_location_) 1504 1505 def expect_markClass_reference_(self): 1506 name = self.expect_class_name_() 1507 mc = self.glyphclasses_.resolve(name) 1508 if mc is None: 1509 raise FeatureLibError("Unknown markClass @%s" % name, 1510 self.cur_token_location_) 1511 if not isinstance(mc, self.ast.MarkClass): 1512 raise FeatureLibError("@%s is not a markClass" % name, 1513 self.cur_token_location_) 1514 return mc 1515 1516 def expect_tag_(self): 1517 self.advance_lexer_() 1518 if self.cur_token_type_ is not Lexer.NAME: 1519 raise FeatureLibError("Expected a tag", self.cur_token_location_) 1520 if len(self.cur_token_) > 4: 1521 raise FeatureLibError("Tags can not be longer than 4 characters", 1522 self.cur_token_location_) 1523 return (self.cur_token_ + " ")[:4] 1524 1525 def expect_script_tag_(self): 1526 tag = self.expect_tag_() 1527 if tag == "dflt": 1528 raise FeatureLibError( 1529 '"dflt" is not a valid script tag; use "DFLT" instead', 1530 self.cur_token_location_) 1531 return tag 1532 1533 def expect_language_tag_(self): 1534 tag = self.expect_tag_() 1535 if tag == "DFLT": 1536 raise FeatureLibError( 1537 '"DFLT" is not a valid language tag; use "dflt" instead', 1538 self.cur_token_location_) 1539 return tag 1540 1541 def expect_symbol_(self, symbol): 1542 self.advance_lexer_() 1543 if self.cur_token_type_ is Lexer.SYMBOL and self.cur_token_ == symbol: 1544 return symbol 1545 raise FeatureLibError("Expected '%s'" % symbol, 1546 self.cur_token_location_) 1547 1548 def expect_keyword_(self, keyword): 1549 self.advance_lexer_() 1550 if self.cur_token_type_ is Lexer.NAME and self.cur_token_ == keyword: 1551 return self.cur_token_ 1552 raise FeatureLibError("Expected \"%s\"" % keyword, 1553 self.cur_token_location_) 1554 1555 def expect_name_(self): 1556 self.advance_lexer_() 1557 if self.cur_token_type_ is Lexer.NAME: 1558 return self.cur_token_ 1559 raise FeatureLibError("Expected a name", self.cur_token_location_) 1560 1561 # TODO: Don't allow this method to accept hexadecimal values 1562 def expect_number_(self): 1563 self.advance_lexer_() 1564 if self.cur_token_type_ is Lexer.NUMBER: 1565 return self.cur_token_ 1566 raise FeatureLibError("Expected a number", self.cur_token_location_) 1567 1568 def expect_float_(self): 1569 self.advance_lexer_() 1570 if self.cur_token_type_ is Lexer.FLOAT: 1571 return self.cur_token_ 1572 raise FeatureLibError("Expected a floating-point number", 1573 self.cur_token_location_) 1574 1575 # TODO: Don't allow this method to accept hexadecimal values 1576 def expect_decipoint_(self): 1577 if self.next_token_type_ == Lexer.FLOAT: 1578 return self.expect_float_() 1579 elif self.next_token_type_ is Lexer.NUMBER: 1580 return self.expect_number_() / 10 1581 else: 1582 raise FeatureLibError("Expected an integer or floating-point number", 1583 self.cur_token_location_) 1584 1585 def expect_decimal_or_hexadecimal_(self): 1586 # the lexer returns the same token type 'NUMBER' for either decimal or 1587 # hexadecimal integers, and casts them both to a `int` type, so it's 1588 # impossible to distinguish the two here. This method is implemented 1589 # the same as `expect_number_`, only it gives a more informative 1590 # error message 1591 self.advance_lexer_() 1592 if self.cur_token_type_ is Lexer.NUMBER: 1593 return self.cur_token_ 1594 raise FeatureLibError("Expected a decimal or hexadecimal number", 1595 self.cur_token_location_) 1596 1597 def expect_string_(self): 1598 self.advance_lexer_() 1599 if self.cur_token_type_ is Lexer.STRING: 1600 return self.cur_token_ 1601 raise FeatureLibError("Expected a string", self.cur_token_location_) 1602 1603 def advance_lexer_(self, comments=False): 1604 if comments and self.cur_comments_: 1605 self.cur_token_type_ = Lexer.COMMENT 1606 self.cur_token_, self.cur_token_location_ = self.cur_comments_.pop(0) 1607 return 1608 else: 1609 self.cur_token_type_, self.cur_token_, self.cur_token_location_ = ( 1610 self.next_token_type_, self.next_token_, self.next_token_location_) 1611 while True: 1612 try: 1613 (self.next_token_type_, self.next_token_, 1614 self.next_token_location_) = next(self.lexer_) 1615 except StopIteration: 1616 self.next_token_type_, self.next_token_ = (None, None) 1617 if self.next_token_type_ != Lexer.COMMENT: 1618 break 1619 self.cur_comments_.append((self.next_token_, self.next_token_location_)) 1620 1621 @staticmethod 1622 def reverse_string_(s): 1623 """'abc' --> 'cba'""" 1624 return ''.join(reversed(list(s))) 1625 1626 def make_cid_range_(self, location, start, limit): 1627 """(location, 999, 1001) --> ["cid00999", "cid01000", "cid01001"]""" 1628 result = list() 1629 if start > limit: 1630 raise FeatureLibError( 1631 "Bad range: start should be less than limit", location) 1632 for cid in range(start, limit + 1): 1633 result.append("cid%05d" % cid) 1634 return result 1635 1636 def make_glyph_range_(self, location, start, limit): 1637 """(location, "a.sc", "d.sc") --> ["a.sc", "b.sc", "c.sc", "d.sc"]""" 1638 result = list() 1639 if len(start) != len(limit): 1640 raise FeatureLibError( 1641 "Bad range: \"%s\" and \"%s\" should have the same length" % 1642 (start, limit), location) 1643 1644 rev = self.reverse_string_ 1645 prefix = os.path.commonprefix([start, limit]) 1646 suffix = rev(os.path.commonprefix([rev(start), rev(limit)])) 1647 if len(suffix) > 0: 1648 start_range = start[len(prefix):-len(suffix)] 1649 limit_range = limit[len(prefix):-len(suffix)] 1650 else: 1651 start_range = start[len(prefix):] 1652 limit_range = limit[len(prefix):] 1653 1654 if start_range >= limit_range: 1655 raise FeatureLibError( 1656 "Start of range must be smaller than its end", 1657 location) 1658 1659 uppercase = re.compile(r'^[A-Z]$') 1660 if uppercase.match(start_range) and uppercase.match(limit_range): 1661 for c in range(ord(start_range), ord(limit_range) + 1): 1662 result.append("%s%c%s" % (prefix, c, suffix)) 1663 return result 1664 1665 lowercase = re.compile(r'^[a-z]$') 1666 if lowercase.match(start_range) and lowercase.match(limit_range): 1667 for c in range(ord(start_range), ord(limit_range) + 1): 1668 result.append("%s%c%s" % (prefix, c, suffix)) 1669 return result 1670 1671 digits = re.compile(r'^[0-9]{1,3}$') 1672 if digits.match(start_range) and digits.match(limit_range): 1673 for i in range(int(start_range, 10), int(limit_range, 10) + 1): 1674 number = ("000" + str(i))[-len(start_range):] 1675 result.append("%s%s%s" % (prefix, number, suffix)) 1676 return result 1677 1678 raise FeatureLibError("Bad range: \"%s-%s\"" % (start, limit), 1679 location) 1680 1681 1682class SymbolTable(object): 1683 def __init__(self): 1684 self.scopes_ = [{}] 1685 1686 def enter_scope(self): 1687 self.scopes_.append({}) 1688 1689 def exit_scope(self): 1690 self.scopes_.pop() 1691 1692 def define(self, name, item): 1693 self.scopes_[-1][name] = item 1694 1695 def resolve(self, name): 1696 for scope in reversed(self.scopes_): 1697 item = scope.get(name) 1698 if item: 1699 return item 1700 return None 1701