• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1from __future__ import (
2    print_function, division, absolute_import, unicode_literals)
3from collections import OrderedDict
4import fontTools.voltLib.ast as ast
5from fontTools.voltLib.lexer import Lexer
6from fontTools.voltLib.error import VoltLibError
7from io import open
8
9PARSE_FUNCS = {
10    "DEF_GLYPH": "parse_def_glyph_",
11    "DEF_GROUP": "parse_def_group_",
12    "DEF_SCRIPT": "parse_def_script_",
13    "DEF_LOOKUP": "parse_def_lookup_",
14    "DEF_ANCHOR": "parse_def_anchor_",
15    "GRID_PPEM": "parse_ppem_",
16    "PRESENTATION_PPEM": "parse_ppem_",
17    "PPOSITIONING_PPEM": "parse_ppem_",
18    "COMPILER_USEEXTENSIONLOOKUPS": "parse_compiler_flag_",
19    "COMPILER_USEPAIRPOSFORMAT2": "parse_compiler_flag_",
20    "CMAP_FORMAT": "parse_cmap_format",
21}
22
23
24class Parser(object):
25    def __init__(self, path):
26        self.doc_ = ast.VoltFile()
27        self.glyphs_ = OrderedSymbolTable()
28        self.groups_ = SymbolTable()
29        self.anchors_ = {}  # dictionary of SymbolTable() keyed by glyph
30        self.scripts_ = SymbolTable()
31        self.langs_ = SymbolTable()
32        self.lookups_ = SymbolTable()
33        self.next_token_type_, self.next_token_ = (None, None)
34        self.next_token_location_ = None
35        self.make_lexer_(path)
36        self.advance_lexer_()
37
38    def make_lexer_(self, file_or_path):
39        if hasattr(file_or_path, "read"):
40            filename = getattr(file_or_path, "name", None)
41            data = file_or_path.read()
42        else:
43            filename = file_or_path
44            with open(file_or_path, "r") as f:
45                data = f.read()
46        self.lexer_ = Lexer(data, filename)
47
48    def parse(self):
49        statements = self.doc_.statements
50        while self.next_token_type_ is not None:
51            self.advance_lexer_()
52            if self.cur_token_ in PARSE_FUNCS.keys():
53                func = getattr(self, PARSE_FUNCS[self.cur_token_])
54                statements.append(func())
55            elif self.is_cur_keyword_("END"):
56                break
57            else:
58                raise VoltLibError(
59                    "Expected " + ", ".join(sorted(PARSE_FUNCS.keys())),
60                    self.cur_token_location_)
61        return self.doc_
62
63    def parse_def_glyph_(self):
64        assert self.is_cur_keyword_("DEF_GLYPH")
65        location = self.cur_token_location_
66        name = self.expect_string_()
67        self.expect_keyword_("ID")
68        gid = self.expect_number_()
69        if gid < 0:
70            raise VoltLibError("Invalid glyph ID", self.cur_token_location_)
71        gunicode = None
72        if self.next_token_ == "UNICODE":
73            self.expect_keyword_("UNICODE")
74            gunicode = [self.expect_number_()]
75            if gunicode[0] < 0:
76                raise VoltLibError("Invalid glyph UNICODE",
77                                   self.cur_token_location_)
78        elif self.next_token_ == "UNICODEVALUES":
79            self.expect_keyword_("UNICODEVALUES")
80            gunicode = self.parse_unicode_values_()
81        gtype = None
82        if self.next_token_ == "TYPE":
83            self.expect_keyword_("TYPE")
84            gtype = self.expect_name_()
85            assert gtype in ("BASE", "LIGATURE", "MARK", "COMPONENT")
86        components = None
87        if self.next_token_ == "COMPONENTS":
88            self.expect_keyword_("COMPONENTS")
89            components = self.expect_number_()
90        self.expect_keyword_("END_GLYPH")
91        if self.glyphs_.resolve(name) is not None:
92            raise VoltLibError(
93                'Glyph "%s" (gid %i) already defined' % (name, gid),
94                location
95            )
96        def_glyph = ast.GlyphDefinition(name, gid,
97                                        gunicode, gtype, components,
98                                        location=location)
99        self.glyphs_.define(name, def_glyph)
100        return def_glyph
101
102    def parse_def_group_(self):
103        assert self.is_cur_keyword_("DEF_GROUP")
104        location = self.cur_token_location_
105        name = self.expect_string_()
106        enum = None
107        if self.next_token_ == "ENUM":
108            enum = self.parse_enum_()
109        self.expect_keyword_("END_GROUP")
110        if self.groups_.resolve(name) is not None:
111            raise VoltLibError(
112                'Glyph group "%s" already defined, '
113                'group names are case insensitive' % name,
114                location
115            )
116        def_group = ast.GroupDefinition(name, enum,
117                                        location=location)
118        self.groups_.define(name, def_group)
119        return def_group
120
121    def parse_def_script_(self):
122        assert self.is_cur_keyword_("DEF_SCRIPT")
123        location = self.cur_token_location_
124        name = None
125        if self.next_token_ == "NAME":
126            self.expect_keyword_("NAME")
127            name = self.expect_string_()
128        self.expect_keyword_("TAG")
129        tag = self.expect_string_()
130        if self.scripts_.resolve(tag) is not None:
131            raise VoltLibError(
132                'Script "%s" already defined, '
133                'script tags are case insensitive' % tag,
134                location
135            )
136        self.langs_.enter_scope()
137        langs = []
138        while self.next_token_ != "END_SCRIPT":
139            self.advance_lexer_()
140            lang = self.parse_langsys_()
141            self.expect_keyword_("END_LANGSYS")
142            if self.langs_.resolve(lang.tag) is not None:
143                raise VoltLibError(
144                    'Language "%s" already defined in script "%s", '
145                    'language tags are case insensitive' % (lang.tag, tag),
146                    location
147                )
148            self.langs_.define(lang.tag, lang)
149            langs.append(lang)
150        self.expect_keyword_("END_SCRIPT")
151        self.langs_.exit_scope()
152        def_script = ast.ScriptDefinition(name, tag, langs, location=location)
153        self.scripts_.define(tag, def_script)
154        return def_script
155
156    def parse_langsys_(self):
157        assert self.is_cur_keyword_("DEF_LANGSYS")
158        location = self.cur_token_location_
159        name = None
160        if self.next_token_ == "NAME":
161            self.expect_keyword_("NAME")
162            name = self.expect_string_()
163        self.expect_keyword_("TAG")
164        tag = self.expect_string_()
165        features = []
166        while self.next_token_ != "END_LANGSYS":
167            self.advance_lexer_()
168            feature = self.parse_feature_()
169            self.expect_keyword_("END_FEATURE")
170            features.append(feature)
171        def_langsys = ast.LangSysDefinition(name, tag, features,
172                                            location=location)
173        return def_langsys
174
175    def parse_feature_(self):
176        assert self.is_cur_keyword_("DEF_FEATURE")
177        location = self.cur_token_location_
178        self.expect_keyword_("NAME")
179        name = self.expect_string_()
180        self.expect_keyword_("TAG")
181        tag = self.expect_string_()
182        lookups = []
183        while self.next_token_ != "END_FEATURE":
184            # self.advance_lexer_()
185            self.expect_keyword_("LOOKUP")
186            lookup = self.expect_string_()
187            lookups.append(lookup)
188        feature = ast.FeatureDefinition(name, tag, lookups,
189                                        location=location)
190        return feature
191
192    def parse_def_lookup_(self):
193        assert self.is_cur_keyword_("DEF_LOOKUP")
194        location = self.cur_token_location_
195        name = self.expect_string_()
196        if not name[0].isalpha():
197            raise VoltLibError(
198                'Lookup name "%s" must start with a letter' % name,
199                location
200            )
201        if self.lookups_.resolve(name) is not None:
202            raise VoltLibError(
203                'Lookup "%s" already defined, '
204                'lookup names are case insensitive' % name,
205                location
206            )
207        process_base = True
208        if self.next_token_ == "PROCESS_BASE":
209            self.advance_lexer_()
210        elif self.next_token_ == "SKIP_BASE":
211            self.advance_lexer_()
212            process_base = False
213        process_marks = True
214        mark_glyph_set = None
215        if self.next_token_ == "PROCESS_MARKS":
216            self.advance_lexer_()
217            if self.next_token_ == "MARK_GLYPH_SET":
218                self.advance_lexer_()
219                mark_glyph_set = self.expect_string_()
220            elif self.next_token_type_ == Lexer.STRING:
221                process_marks = self.expect_string_()
222            elif self.next_token_ == "ALL":
223                self.advance_lexer_()
224            else:
225                raise VoltLibError(
226                    "Expected ALL, MARK_GLYPH_SET or an ID. "
227                    "Got %s" % (self.next_token_type_),
228                    location)
229        elif self.next_token_ == "SKIP_MARKS":
230            self.advance_lexer_()
231            process_marks = False
232        direction = None
233        if self.next_token_ == "DIRECTION":
234            self.expect_keyword_("DIRECTION")
235            direction = self.expect_name_()
236            assert direction in ("LTR", "RTL")
237        reversal = None
238        if self.next_token_ == "REVERSAL":
239            self.expect_keyword_("REVERSAL")
240            reversal = True
241        comments = None
242        if self.next_token_ == "COMMENTS":
243            self.expect_keyword_("COMMENTS")
244            comments = self.expect_string_()
245        context = []
246        while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"):
247            context = self.parse_context_()
248        as_pos_or_sub = self.expect_name_()
249        sub = None
250        pos = None
251        if as_pos_or_sub == "AS_SUBSTITUTION":
252            sub = self.parse_substitution_(reversal)
253        elif as_pos_or_sub == "AS_POSITION":
254            pos = self.parse_position_()
255        else:
256            raise VoltLibError(
257                "Expected AS_SUBSTITUTION or AS_POSITION. "
258                "Got %s" % (as_pos_or_sub),
259                location)
260        def_lookup = ast.LookupDefinition(
261            name, process_base, process_marks, mark_glyph_set, direction,
262            reversal, comments, context, sub, pos, location=location)
263        self.lookups_.define(name, def_lookup)
264        return def_lookup
265
266    def parse_context_(self):
267        location = self.cur_token_location_
268        contexts = []
269        while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"):
270            side = None
271            coverage = None
272            ex_or_in = self.expect_name_()
273            # side_contexts = [] # XXX
274            if self.next_token_ != "END_CONTEXT":
275                left = []
276                right = []
277                while self.next_token_ in ("LEFT", "RIGHT"):
278                    side = self.expect_name_()
279                    coverage = self.parse_coverage_()
280                    if side == "LEFT":
281                        left.append(coverage)
282                    else:
283                        right.append(coverage)
284                self.expect_keyword_("END_CONTEXT")
285                context = ast.ContextDefinition(ex_or_in, left,
286                                                right, location=location)
287                contexts.append(context)
288            else:
289                self.expect_keyword_("END_CONTEXT")
290        return contexts
291
292    def parse_substitution_(self, reversal):
293        assert self.is_cur_keyword_("AS_SUBSTITUTION")
294        location = self.cur_token_location_
295        src = []
296        dest = []
297        if self.next_token_ != "SUB":
298            raise VoltLibError("Expected SUB", location)
299        while self.next_token_ == "SUB":
300            self.expect_keyword_("SUB")
301            src.append(self.parse_coverage_())
302            self.expect_keyword_("WITH")
303            dest.append(self.parse_coverage_())
304            self.expect_keyword_("END_SUB")
305        self.expect_keyword_("END_SUBSTITUTION")
306        max_src = max([len(cov) for cov in src])
307        max_dest = max([len(cov) for cov in dest])
308        # many to many or mixed is invalid
309        if ((max_src > 1 and max_dest > 1) or
310                (reversal and (max_src > 1 or max_dest > 1))):
311            raise VoltLibError(
312                "Invalid substitution type",
313                location)
314        mapping = OrderedDict(zip(tuple(src), tuple(dest)))
315        if max_src == 1 and max_dest == 1:
316            if reversal:
317                sub = ast.SubstitutionReverseChainingSingleDefinition(
318                    mapping, location=location)
319            else:
320                sub = ast.SubstitutionSingleDefinition(mapping,
321                                                       location=location)
322        elif max_src == 1 and max_dest > 1:
323            sub = ast.SubstitutionMultipleDefinition(mapping,
324                                                     location=location)
325        elif max_src > 1 and max_dest == 1:
326            sub = ast.SubstitutionLigatureDefinition(mapping,
327                                                     location=location)
328        return sub
329
330    def parse_position_(self):
331        assert self.is_cur_keyword_("AS_POSITION")
332        location = self.cur_token_location_
333        pos_type = self.expect_name_()
334        if pos_type not in (
335                "ATTACH", "ATTACH_CURSIVE", "ADJUST_PAIR", "ADJUST_SINGLE"):
336            raise VoltLibError(
337                "Expected ATTACH, ATTACH_CURSIVE, ADJUST_PAIR, ADJUST_SINGLE",
338                location)
339        if pos_type == "ATTACH":
340            position = self.parse_attach_()
341        elif pos_type == "ATTACH_CURSIVE":
342            position = self.parse_attach_cursive_()
343        elif pos_type == "ADJUST_PAIR":
344            position = self.parse_adjust_pair_()
345        elif pos_type == "ADJUST_SINGLE":
346            position = self.parse_adjust_single_()
347        self.expect_keyword_("END_POSITION")
348        return position
349
350    def parse_attach_(self):
351        assert self.is_cur_keyword_("ATTACH")
352        location = self.cur_token_location_
353        coverage = self.parse_coverage_()
354        coverage_to = []
355        self.expect_keyword_("TO")
356        while self.next_token_ != "END_ATTACH":
357            cov = self.parse_coverage_()
358            self.expect_keyword_("AT")
359            self.expect_keyword_("ANCHOR")
360            anchor_name = self.expect_string_()
361            coverage_to.append((cov, anchor_name))
362        self.expect_keyword_("END_ATTACH")
363        position = ast.PositionAttachDefinition(
364            coverage, coverage_to, location=location)
365        return position
366
367    def parse_attach_cursive_(self):
368        assert self.is_cur_keyword_("ATTACH_CURSIVE")
369        location = self.cur_token_location_
370        coverages_exit = []
371        coverages_enter = []
372        while self.next_token_ != "ENTER":
373            self.expect_keyword_("EXIT")
374            coverages_exit.append(self.parse_coverage_())
375        while self.next_token_ != "END_ATTACH":
376            self.expect_keyword_("ENTER")
377            coverages_enter.append(self.parse_coverage_())
378        self.expect_keyword_("END_ATTACH")
379        position = ast.PositionAttachCursiveDefinition(
380            coverages_exit, coverages_enter, location=location)
381        return position
382
383    def parse_adjust_pair_(self):
384        assert self.is_cur_keyword_("ADJUST_PAIR")
385        location = self.cur_token_location_
386        coverages_1 = []
387        coverages_2 = []
388        adjust_pair = {}
389        while self.next_token_ == "FIRST":
390            self.advance_lexer_()
391            coverage_1 = self.parse_coverage_()
392            coverages_1.append(coverage_1)
393        while self.next_token_ == "SECOND":
394            self.advance_lexer_()
395            coverage_2 = self.parse_coverage_()
396            coverages_2.append(coverage_2)
397        while self.next_token_ != "END_ADJUST":
398            id_1 = self.expect_number_()
399            id_2 = self.expect_number_()
400            self.expect_keyword_("BY")
401            pos_1 = self.parse_pos_()
402            pos_2 = self.parse_pos_()
403            adjust_pair[(id_1, id_2)] = (pos_1, pos_2)
404        self.expect_keyword_("END_ADJUST")
405        position = ast.PositionAdjustPairDefinition(
406            coverages_1, coverages_2, adjust_pair, location=location)
407        return position
408
409    def parse_adjust_single_(self):
410        assert self.is_cur_keyword_("ADJUST_SINGLE")
411        location = self.cur_token_location_
412        adjust_single = []
413        while self.next_token_ != "END_ADJUST":
414            coverages = self.parse_coverage_()
415            self.expect_keyword_("BY")
416            pos = self.parse_pos_()
417            adjust_single.append((coverages, pos))
418        self.expect_keyword_("END_ADJUST")
419        position = ast.PositionAdjustSingleDefinition(
420            adjust_single, location=location)
421        return position
422
423    def parse_def_anchor_(self):
424        assert self.is_cur_keyword_("DEF_ANCHOR")
425        location = self.cur_token_location_
426        name = self.expect_string_()
427        self.expect_keyword_("ON")
428        gid = self.expect_number_()
429        self.expect_keyword_("GLYPH")
430        glyph_name = self.expect_name_()
431        self.expect_keyword_("COMPONENT")
432        component = self.expect_number_()
433        # check for duplicate anchor names on this glyph
434        if glyph_name in self.anchors_:
435            anchor = self.anchors_[glyph_name].resolve(name)
436            if anchor is not None and anchor.component == component:
437                raise VoltLibError(
438                    'Anchor "%s" already defined, '
439                    'anchor names are case insensitive' % name,
440                    location
441                )
442        if self.next_token_ == "LOCKED":
443            locked = True
444            self.advance_lexer_()
445        else:
446            locked = False
447        self.expect_keyword_("AT")
448        pos = self.parse_pos_()
449        self.expect_keyword_("END_ANCHOR")
450        anchor = ast.AnchorDefinition(name, gid, glyph_name,
451                                      component, locked, pos,
452                                      location=location)
453        if glyph_name not in self.anchors_:
454            self.anchors_[glyph_name] = SymbolTable()
455        self.anchors_[glyph_name].define(name, anchor)
456        return anchor
457
458    def parse_adjust_by_(self):
459        self.advance_lexer_()
460        assert self.is_cur_keyword_("ADJUST_BY")
461        adjustment = self.expect_number_()
462        self.expect_keyword_("AT")
463        size = self.expect_number_()
464        return adjustment, size
465
466    def parse_pos_(self):
467        # VOLT syntax doesn't seem to take device Y advance
468        self.advance_lexer_()
469        location = self.cur_token_location_
470        assert self.is_cur_keyword_("POS"), location
471        adv = None
472        dx = None
473        dy = None
474        adv_adjust_by = {}
475        dx_adjust_by = {}
476        dy_adjust_by = {}
477        if self.next_token_ == "ADV":
478            self.advance_lexer_()
479            adv = self.expect_number_()
480            while self.next_token_ == "ADJUST_BY":
481                adjustment, size = self.parse_adjust_by_()
482                adv_adjust_by[size] = adjustment
483        if self.next_token_ == "DX":
484            self.advance_lexer_()
485            dx = self.expect_number_()
486            while self.next_token_ == "ADJUST_BY":
487                adjustment, size = self.parse_adjust_by_()
488                dx_adjust_by[size] = adjustment
489        if self.next_token_ == "DY":
490            self.advance_lexer_()
491            dy = self.expect_number_()
492            while self.next_token_ == "ADJUST_BY":
493                adjustment, size = self.parse_adjust_by_()
494                dy_adjust_by[size] = adjustment
495        self.expect_keyword_("END_POS")
496        return (adv, dx, dy, adv_adjust_by, dx_adjust_by, dy_adjust_by)
497
498    def parse_unicode_values_(self):
499        location = self.cur_token_location_
500        try:
501            unicode_values = self.expect_string_().split(",")
502            unicode_values = [
503                int(uni[2:], 16)
504                for uni in unicode_values if uni != ""]
505        except ValueError as err:
506            raise VoltLibError(str(err), location)
507        return unicode_values if unicode_values != [] else None
508
509    def parse_enum_(self):
510        self.expect_keyword_("ENUM")
511        location = self.cur_token_location_
512        enum = ast.Enum(self.parse_coverage_(), location=location)
513        self.expect_keyword_("END_ENUM")
514        return enum
515
516    def parse_coverage_(self):
517        coverage = []
518        location = self.cur_token_location_
519        while self.next_token_ in ("GLYPH", "GROUP", "RANGE", "ENUM"):
520            if self.next_token_ == "ENUM":
521                enum = self.parse_enum_()
522                coverage.append(enum)
523            elif self.next_token_ == "GLYPH":
524                self.expect_keyword_("GLYPH")
525                name = self.expect_string_()
526                coverage.append(ast.GlyphName(name, location=location))
527            elif self.next_token_ == "GROUP":
528                self.expect_keyword_("GROUP")
529                name = self.expect_string_()
530                coverage.append(ast.GroupName(name, self, location=location))
531            elif self.next_token_ == "RANGE":
532                self.expect_keyword_("RANGE")
533                start = self.expect_string_()
534                self.expect_keyword_("TO")
535                end = self.expect_string_()
536                coverage.append(ast.Range(start, end, self, location=location))
537        return tuple(coverage)
538
539    def resolve_group(self, group_name):
540        return self.groups_.resolve(group_name)
541
542    def glyph_range(self, start, end):
543        return self.glyphs_.range(start, end)
544
545    def parse_ppem_(self):
546        location = self.cur_token_location_
547        ppem_name = self.cur_token_
548        value = self.expect_number_()
549        setting = ast.SettingDefinition(ppem_name, value, location=location)
550        return setting
551
552    def parse_compiler_flag_(self):
553        location = self.cur_token_location_
554        flag_name = self.cur_token_
555        value = True
556        setting = ast.SettingDefinition(flag_name, value, location=location)
557        return setting
558
559    def parse_cmap_format(self):
560        location = self.cur_token_location_
561        name = self.cur_token_
562        value = (self.expect_number_(), self.expect_number_(),
563                 self.expect_number_())
564        setting = ast.SettingDefinition(name, value, location=location)
565        return setting
566
567    def is_cur_keyword_(self, k):
568        return (self.cur_token_type_ is Lexer.NAME) and (self.cur_token_ == k)
569
570    def expect_string_(self):
571        self.advance_lexer_()
572        if self.cur_token_type_ is not Lexer.STRING:
573            raise VoltLibError("Expected a string", self.cur_token_location_)
574        return self.cur_token_
575
576    def expect_keyword_(self, keyword):
577        self.advance_lexer_()
578        if self.cur_token_type_ is Lexer.NAME and self.cur_token_ == keyword:
579            return self.cur_token_
580        raise VoltLibError("Expected \"%s\"" % keyword,
581                           self.cur_token_location_)
582
583    def expect_name_(self):
584        self.advance_lexer_()
585        if self.cur_token_type_ is Lexer.NAME:
586            return self.cur_token_
587        raise VoltLibError("Expected a name", self.cur_token_location_)
588
589    def expect_number_(self):
590        self.advance_lexer_()
591        if self.cur_token_type_ is not Lexer.NUMBER:
592            raise VoltLibError("Expected a number", self.cur_token_location_)
593        return self.cur_token_
594
595    def advance_lexer_(self):
596        self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
597            self.next_token_type_, self.next_token_, self.next_token_location_)
598        try:
599            if self.is_cur_keyword_("END"):
600                raise StopIteration
601            (self.next_token_type_, self.next_token_,
602             self.next_token_location_) = self.lexer_.next()
603        except StopIteration:
604            self.next_token_type_, self.next_token_ = (None, None)
605
606
607class SymbolTable(object):
608    def __init__(self):
609        self.scopes_ = [{}]
610
611    def enter_scope(self):
612        self.scopes_.append({})
613
614    def exit_scope(self):
615        self.scopes_.pop()
616
617    def define(self, name, item):
618        self.scopes_[-1][name] = item
619
620    def resolve(self, name, case_insensitive=True):
621        for scope in reversed(self.scopes_):
622            item = scope.get(name)
623            if item:
624                return item
625        if case_insensitive:
626            for key in scope:
627                if key.lower() == name.lower():
628                    return scope[key]
629        return None
630
631
632class OrderedSymbolTable(SymbolTable):
633    def __init__(self):
634        self.scopes_ = [OrderedDict()]
635
636    def enter_scope(self):
637        self.scopes_.append(OrderedDict())
638
639    def resolve(self, name, case_insensitive=False):
640        SymbolTable.resolve(self, name, case_insensitive=case_insensitive)
641
642    def range(self, start, end):
643        for scope in reversed(self.scopes_):
644            if start in scope and end in scope:
645                start_idx = list(scope.keys()).index(start)
646                end_idx = list(scope.keys()).index(end)
647                return list(scope.keys())[start_idx:end_idx + 1]
648        return None
649