• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import fontTools.voltLib.ast as ast
2from fontTools.voltLib.lexer import Lexer
3from fontTools.voltLib.error import VoltLibError
4from io import open
5
6PARSE_FUNCS = {
7    "DEF_GLYPH": "parse_def_glyph_",
8    "DEF_GROUP": "parse_def_group_",
9    "DEF_SCRIPT": "parse_def_script_",
10    "DEF_LOOKUP": "parse_def_lookup_",
11    "DEF_ANCHOR": "parse_def_anchor_",
12    "GRID_PPEM": "parse_ppem_",
13    "PRESENTATION_PPEM": "parse_ppem_",
14    "PPOSITIONING_PPEM": "parse_ppem_",
15    "COMPILER_USEEXTENSIONLOOKUPS": "parse_noarg_option_",
16    "COMPILER_USEPAIRPOSFORMAT2": "parse_noarg_option_",
17    "CMAP_FORMAT": "parse_cmap_format",
18    "DO_NOT_TOUCH_CMAP": "parse_noarg_option_",
19}
20
21
22class Parser(object):
23    def __init__(self, path):
24        self.doc_ = ast.VoltFile()
25        self.glyphs_ = OrderedSymbolTable()
26        self.groups_ = SymbolTable()
27        self.anchors_ = {}  # dictionary of SymbolTable() keyed by glyph
28        self.scripts_ = SymbolTable()
29        self.langs_ = SymbolTable()
30        self.lookups_ = SymbolTable()
31        self.next_token_type_, self.next_token_ = (None, None)
32        self.next_token_location_ = None
33        self.make_lexer_(path)
34        self.advance_lexer_()
35
36    def make_lexer_(self, file_or_path):
37        if hasattr(file_or_path, "read"):
38            filename = getattr(file_or_path, "name", None)
39            data = file_or_path.read()
40        else:
41            filename = file_or_path
42            with open(file_or_path, "r") as f:
43                data = f.read()
44        self.lexer_ = Lexer(data, filename)
45
46    def parse(self):
47        statements = self.doc_.statements
48        while self.next_token_type_ is not None:
49            self.advance_lexer_()
50            if self.cur_token_ in PARSE_FUNCS.keys():
51                func = getattr(self, PARSE_FUNCS[self.cur_token_])
52                statements.append(func())
53            elif self.is_cur_keyword_("END"):
54                break
55            else:
56                raise VoltLibError(
57                    "Expected " + ", ".join(sorted(PARSE_FUNCS.keys())),
58                    self.cur_token_location_)
59        return self.doc_
60
61    def parse_def_glyph_(self):
62        assert self.is_cur_keyword_("DEF_GLYPH")
63        location = self.cur_token_location_
64        name = self.expect_string_()
65        self.expect_keyword_("ID")
66        gid = self.expect_number_()
67        if gid < 0:
68            raise VoltLibError("Invalid glyph ID", self.cur_token_location_)
69        gunicode = None
70        if self.next_token_ == "UNICODE":
71            self.expect_keyword_("UNICODE")
72            gunicode = [self.expect_number_()]
73            if gunicode[0] < 0:
74                raise VoltLibError("Invalid glyph UNICODE",
75                                   self.cur_token_location_)
76        elif self.next_token_ == "UNICODEVALUES":
77            self.expect_keyword_("UNICODEVALUES")
78            gunicode = self.parse_unicode_values_()
79        gtype = None
80        if self.next_token_ == "TYPE":
81            self.expect_keyword_("TYPE")
82            gtype = self.expect_name_()
83            assert gtype in ("BASE", "LIGATURE", "MARK", "COMPONENT")
84        components = None
85        if self.next_token_ == "COMPONENTS":
86            self.expect_keyword_("COMPONENTS")
87            components = self.expect_number_()
88        self.expect_keyword_("END_GLYPH")
89        if self.glyphs_.resolve(name) is not None:
90            raise VoltLibError(
91                'Glyph "%s" (gid %i) already defined' % (name, gid),
92                location
93            )
94        def_glyph = ast.GlyphDefinition(name, gid,
95                                        gunicode, gtype, components,
96                                        location=location)
97        self.glyphs_.define(name, def_glyph)
98        return def_glyph
99
100    def parse_def_group_(self):
101        assert self.is_cur_keyword_("DEF_GROUP")
102        location = self.cur_token_location_
103        name = self.expect_string_()
104        enum = None
105        if self.next_token_ == "ENUM":
106            enum = self.parse_enum_()
107        self.expect_keyword_("END_GROUP")
108        if self.groups_.resolve(name) is not None:
109            raise VoltLibError(
110                'Glyph group "%s" already defined, '
111                'group names are case insensitive' % name,
112                location
113            )
114        def_group = ast.GroupDefinition(name, enum,
115                                        location=location)
116        self.groups_.define(name, def_group)
117        return def_group
118
119    def parse_def_script_(self):
120        assert self.is_cur_keyword_("DEF_SCRIPT")
121        location = self.cur_token_location_
122        name = None
123        if self.next_token_ == "NAME":
124            self.expect_keyword_("NAME")
125            name = self.expect_string_()
126        self.expect_keyword_("TAG")
127        tag = self.expect_string_()
128        if self.scripts_.resolve(tag) is not None:
129            raise VoltLibError(
130                'Script "%s" already defined, '
131                'script tags are case insensitive' % tag,
132                location
133            )
134        self.langs_.enter_scope()
135        langs = []
136        while self.next_token_ != "END_SCRIPT":
137            self.advance_lexer_()
138            lang = self.parse_langsys_()
139            self.expect_keyword_("END_LANGSYS")
140            if self.langs_.resolve(lang.tag) is not None:
141                raise VoltLibError(
142                    'Language "%s" already defined in script "%s", '
143                    'language tags are case insensitive' % (lang.tag, tag),
144                    location
145                )
146            self.langs_.define(lang.tag, lang)
147            langs.append(lang)
148        self.expect_keyword_("END_SCRIPT")
149        self.langs_.exit_scope()
150        def_script = ast.ScriptDefinition(name, tag, langs, location=location)
151        self.scripts_.define(tag, def_script)
152        return def_script
153
154    def parse_langsys_(self):
155        assert self.is_cur_keyword_("DEF_LANGSYS")
156        location = self.cur_token_location_
157        name = None
158        if self.next_token_ == "NAME":
159            self.expect_keyword_("NAME")
160            name = self.expect_string_()
161        self.expect_keyword_("TAG")
162        tag = self.expect_string_()
163        features = []
164        while self.next_token_ != "END_LANGSYS":
165            self.advance_lexer_()
166            feature = self.parse_feature_()
167            self.expect_keyword_("END_FEATURE")
168            features.append(feature)
169        def_langsys = ast.LangSysDefinition(name, tag, features,
170                                            location=location)
171        return def_langsys
172
173    def parse_feature_(self):
174        assert self.is_cur_keyword_("DEF_FEATURE")
175        location = self.cur_token_location_
176        self.expect_keyword_("NAME")
177        name = self.expect_string_()
178        self.expect_keyword_("TAG")
179        tag = self.expect_string_()
180        lookups = []
181        while self.next_token_ != "END_FEATURE":
182            # self.advance_lexer_()
183            self.expect_keyword_("LOOKUP")
184            lookup = self.expect_string_()
185            lookups.append(lookup)
186        feature = ast.FeatureDefinition(name, tag, lookups,
187                                        location=location)
188        return feature
189
190    def parse_def_lookup_(self):
191        assert self.is_cur_keyword_("DEF_LOOKUP")
192        location = self.cur_token_location_
193        name = self.expect_string_()
194        if not name[0].isalpha():
195            raise VoltLibError(
196                'Lookup name "%s" must start with a letter' % name,
197                location
198            )
199        if self.lookups_.resolve(name) is not None:
200            raise VoltLibError(
201                'Lookup "%s" already defined, '
202                'lookup names are case insensitive' % name,
203                location
204            )
205        process_base = True
206        if self.next_token_ == "PROCESS_BASE":
207            self.advance_lexer_()
208        elif self.next_token_ == "SKIP_BASE":
209            self.advance_lexer_()
210            process_base = False
211        process_marks = True
212        mark_glyph_set = None
213        if self.next_token_ == "PROCESS_MARKS":
214            self.advance_lexer_()
215            if self.next_token_ == "MARK_GLYPH_SET":
216                self.advance_lexer_()
217                mark_glyph_set = self.expect_string_()
218            elif self.next_token_ == "ALL":
219                self.advance_lexer_()
220            elif self.next_token_ == "NONE":
221                self.advance_lexer_()
222                process_marks = False
223            elif self.next_token_type_ == Lexer.STRING:
224                process_marks = self.expect_string_()
225            else:
226                raise VoltLibError(
227                    "Expected ALL, NONE, MARK_GLYPH_SET or an ID. "
228                    "Got %s" % (self.next_token_type_),
229                    location)
230        elif self.next_token_ == "SKIP_MARKS":
231            self.advance_lexer_()
232            process_marks = False
233        direction = None
234        if self.next_token_ == "DIRECTION":
235            self.expect_keyword_("DIRECTION")
236            direction = self.expect_name_()
237            assert direction in ("LTR", "RTL")
238        reversal = None
239        if self.next_token_ == "REVERSAL":
240            self.expect_keyword_("REVERSAL")
241            reversal = True
242        comments = None
243        if self.next_token_ == "COMMENTS":
244            self.expect_keyword_("COMMENTS")
245            comments = self.expect_string_().replace(r'\n', '\n')
246        context = []
247        while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"):
248            context = self.parse_context_()
249        as_pos_or_sub = self.expect_name_()
250        sub = None
251        pos = None
252        if as_pos_or_sub == "AS_SUBSTITUTION":
253            sub = self.parse_substitution_(reversal)
254        elif as_pos_or_sub == "AS_POSITION":
255            pos = self.parse_position_()
256        else:
257            raise VoltLibError(
258                "Expected AS_SUBSTITUTION or AS_POSITION. "
259                "Got %s" % (as_pos_or_sub),
260                location)
261        def_lookup = ast.LookupDefinition(
262            name, process_base, process_marks, mark_glyph_set, direction,
263            reversal, comments, context, sub, pos, location=location)
264        self.lookups_.define(name, def_lookup)
265        return def_lookup
266
267    def parse_context_(self):
268        location = self.cur_token_location_
269        contexts = []
270        while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"):
271            side = None
272            coverage = None
273            ex_or_in = self.expect_name_()
274            # side_contexts = [] # XXX
275            if self.next_token_ != "END_CONTEXT":
276                left = []
277                right = []
278                while self.next_token_ in ("LEFT", "RIGHT"):
279                    side = self.expect_name_()
280                    coverage = self.parse_coverage_()
281                    if side == "LEFT":
282                        left.append(coverage)
283                    else:
284                        right.append(coverage)
285                self.expect_keyword_("END_CONTEXT")
286                context = ast.ContextDefinition(ex_or_in, left,
287                                                right, location=location)
288                contexts.append(context)
289            else:
290                self.expect_keyword_("END_CONTEXT")
291        return contexts
292
293    def parse_substitution_(self, reversal):
294        assert self.is_cur_keyword_("AS_SUBSTITUTION")
295        location = self.cur_token_location_
296        src = []
297        dest = []
298        if self.next_token_ != "SUB":
299            raise VoltLibError("Expected SUB", location)
300        while self.next_token_ == "SUB":
301            self.expect_keyword_("SUB")
302            src.append(self.parse_coverage_())
303            self.expect_keyword_("WITH")
304            dest.append(self.parse_coverage_())
305            self.expect_keyword_("END_SUB")
306        self.expect_keyword_("END_SUBSTITUTION")
307        max_src = max([len(cov) for cov in src])
308        max_dest = max([len(cov) for cov in dest])
309        # many to many or mixed is invalid
310        if ((max_src > 1 and max_dest > 1) or
311                (reversal and (max_src > 1 or max_dest > 1))):
312            raise VoltLibError(
313                "Invalid substitution type",
314                location)
315        mapping = dict(zip(tuple(src), tuple(dest)))
316        if max_src == 1 and max_dest == 1:
317            if reversal:
318                sub = ast.SubstitutionReverseChainingSingleDefinition(
319                    mapping, location=location)
320            else:
321                sub = ast.SubstitutionSingleDefinition(mapping,
322                                                       location=location)
323        elif max_src == 1 and max_dest > 1:
324            sub = ast.SubstitutionMultipleDefinition(mapping,
325                                                     location=location)
326        elif max_src > 1 and max_dest == 1:
327            sub = ast.SubstitutionLigatureDefinition(mapping,
328                                                     location=location)
329        return sub
330
331    def parse_position_(self):
332        assert self.is_cur_keyword_("AS_POSITION")
333        location = self.cur_token_location_
334        pos_type = self.expect_name_()
335        if pos_type not in (
336                "ATTACH", "ATTACH_CURSIVE", "ADJUST_PAIR", "ADJUST_SINGLE"):
337            raise VoltLibError(
338                "Expected ATTACH, ATTACH_CURSIVE, ADJUST_PAIR, ADJUST_SINGLE",
339                location)
340        if pos_type == "ATTACH":
341            position = self.parse_attach_()
342        elif pos_type == "ATTACH_CURSIVE":
343            position = self.parse_attach_cursive_()
344        elif pos_type == "ADJUST_PAIR":
345            position = self.parse_adjust_pair_()
346        elif pos_type == "ADJUST_SINGLE":
347            position = self.parse_adjust_single_()
348        self.expect_keyword_("END_POSITION")
349        return position
350
351    def parse_attach_(self):
352        assert self.is_cur_keyword_("ATTACH")
353        location = self.cur_token_location_
354        coverage = self.parse_coverage_()
355        coverage_to = []
356        self.expect_keyword_("TO")
357        while self.next_token_ != "END_ATTACH":
358            cov = self.parse_coverage_()
359            self.expect_keyword_("AT")
360            self.expect_keyword_("ANCHOR")
361            anchor_name = self.expect_string_()
362            coverage_to.append((cov, anchor_name))
363        self.expect_keyword_("END_ATTACH")
364        position = ast.PositionAttachDefinition(
365            coverage, coverage_to, location=location)
366        return position
367
368    def parse_attach_cursive_(self):
369        assert self.is_cur_keyword_("ATTACH_CURSIVE")
370        location = self.cur_token_location_
371        coverages_exit = []
372        coverages_enter = []
373        while self.next_token_ != "ENTER":
374            self.expect_keyword_("EXIT")
375            coverages_exit.append(self.parse_coverage_())
376        while self.next_token_ != "END_ATTACH":
377            self.expect_keyword_("ENTER")
378            coverages_enter.append(self.parse_coverage_())
379        self.expect_keyword_("END_ATTACH")
380        position = ast.PositionAttachCursiveDefinition(
381            coverages_exit, coverages_enter, location=location)
382        return position
383
384    def parse_adjust_pair_(self):
385        assert self.is_cur_keyword_("ADJUST_PAIR")
386        location = self.cur_token_location_
387        coverages_1 = []
388        coverages_2 = []
389        adjust_pair = {}
390        while self.next_token_ == "FIRST":
391            self.advance_lexer_()
392            coverage_1 = self.parse_coverage_()
393            coverages_1.append(coverage_1)
394        while self.next_token_ == "SECOND":
395            self.advance_lexer_()
396            coverage_2 = self.parse_coverage_()
397            coverages_2.append(coverage_2)
398        while self.next_token_ != "END_ADJUST":
399            id_1 = self.expect_number_()
400            id_2 = self.expect_number_()
401            self.expect_keyword_("BY")
402            pos_1 = self.parse_pos_()
403            pos_2 = self.parse_pos_()
404            adjust_pair[(id_1, id_2)] = (pos_1, pos_2)
405        self.expect_keyword_("END_ADJUST")
406        position = ast.PositionAdjustPairDefinition(
407            coverages_1, coverages_2, adjust_pair, location=location)
408        return position
409
410    def parse_adjust_single_(self):
411        assert self.is_cur_keyword_("ADJUST_SINGLE")
412        location = self.cur_token_location_
413        adjust_single = []
414        while self.next_token_ != "END_ADJUST":
415            coverages = self.parse_coverage_()
416            self.expect_keyword_("BY")
417            pos = self.parse_pos_()
418            adjust_single.append((coverages, pos))
419        self.expect_keyword_("END_ADJUST")
420        position = ast.PositionAdjustSingleDefinition(
421            adjust_single, location=location)
422        return position
423
424    def parse_def_anchor_(self):
425        assert self.is_cur_keyword_("DEF_ANCHOR")
426        location = self.cur_token_location_
427        name = self.expect_string_()
428        self.expect_keyword_("ON")
429        gid = self.expect_number_()
430        self.expect_keyword_("GLYPH")
431        glyph_name = self.expect_name_()
432        self.expect_keyword_("COMPONENT")
433        component = self.expect_number_()
434        # check for duplicate anchor names on this glyph
435        if glyph_name in self.anchors_:
436            anchor = self.anchors_[glyph_name].resolve(name)
437            if anchor is not None and anchor.component == component:
438                raise VoltLibError(
439                    'Anchor "%s" already defined, '
440                    'anchor names are case insensitive' % name,
441                    location
442                )
443        if self.next_token_ == "LOCKED":
444            locked = True
445            self.advance_lexer_()
446        else:
447            locked = False
448        self.expect_keyword_("AT")
449        pos = self.parse_pos_()
450        self.expect_keyword_("END_ANCHOR")
451        anchor = ast.AnchorDefinition(name, gid, glyph_name,
452                                      component, locked, pos,
453                                      location=location)
454        if glyph_name not in self.anchors_:
455            self.anchors_[glyph_name] = SymbolTable()
456        self.anchors_[glyph_name].define(name, anchor)
457        return anchor
458
459    def parse_adjust_by_(self):
460        self.advance_lexer_()
461        assert self.is_cur_keyword_("ADJUST_BY")
462        adjustment = self.expect_number_()
463        self.expect_keyword_("AT")
464        size = self.expect_number_()
465        return adjustment, size
466
467    def parse_pos_(self):
468        # VOLT syntax doesn't seem to take device Y advance
469        self.advance_lexer_()
470        location = self.cur_token_location_
471        assert self.is_cur_keyword_("POS"), location
472        adv = None
473        dx = None
474        dy = None
475        adv_adjust_by = {}
476        dx_adjust_by = {}
477        dy_adjust_by = {}
478        if self.next_token_ == "ADV":
479            self.advance_lexer_()
480            adv = self.expect_number_()
481            while self.next_token_ == "ADJUST_BY":
482                adjustment, size = self.parse_adjust_by_()
483                adv_adjust_by[size] = adjustment
484        if self.next_token_ == "DX":
485            self.advance_lexer_()
486            dx = self.expect_number_()
487            while self.next_token_ == "ADJUST_BY":
488                adjustment, size = self.parse_adjust_by_()
489                dx_adjust_by[size] = adjustment
490        if self.next_token_ == "DY":
491            self.advance_lexer_()
492            dy = self.expect_number_()
493            while self.next_token_ == "ADJUST_BY":
494                adjustment, size = self.parse_adjust_by_()
495                dy_adjust_by[size] = adjustment
496        self.expect_keyword_("END_POS")
497        return ast.Pos(adv, dx, dy, adv_adjust_by, dx_adjust_by, dy_adjust_by)
498
499    def parse_unicode_values_(self):
500        location = self.cur_token_location_
501        try:
502            unicode_values = self.expect_string_().split(",")
503            unicode_values = [
504                int(uni[2:], 16)
505                for uni in unicode_values if uni != ""]
506        except ValueError as err:
507            raise VoltLibError(str(err), location)
508        return unicode_values if unicode_values != [] else None
509
510    def parse_enum_(self):
511        self.expect_keyword_("ENUM")
512        location = self.cur_token_location_
513        enum = ast.Enum(self.parse_coverage_(), location=location)
514        self.expect_keyword_("END_ENUM")
515        return enum
516
517    def parse_coverage_(self):
518        coverage = []
519        location = self.cur_token_location_
520        while self.next_token_ in ("GLYPH", "GROUP", "RANGE", "ENUM"):
521            if self.next_token_ == "ENUM":
522                enum = self.parse_enum_()
523                coverage.append(enum)
524            elif self.next_token_ == "GLYPH":
525                self.expect_keyword_("GLYPH")
526                name = self.expect_string_()
527                coverage.append(ast.GlyphName(name, location=location))
528            elif self.next_token_ == "GROUP":
529                self.expect_keyword_("GROUP")
530                name = self.expect_string_()
531                coverage.append(ast.GroupName(name, self, location=location))
532            elif self.next_token_ == "RANGE":
533                self.expect_keyword_("RANGE")
534                start = self.expect_string_()
535                self.expect_keyword_("TO")
536                end = self.expect_string_()
537                coverage.append(ast.Range(start, end, self, location=location))
538        return tuple(coverage)
539
540    def resolve_group(self, group_name):
541        return self.groups_.resolve(group_name)
542
543    def glyph_range(self, start, end):
544        return self.glyphs_.range(start, end)
545
546    def parse_ppem_(self):
547        location = self.cur_token_location_
548        ppem_name = self.cur_token_
549        value = self.expect_number_()
550        setting = ast.SettingDefinition(ppem_name, value, location=location)
551        return setting
552
553    def parse_noarg_option_(self):
554        location = self.cur_token_location_
555        name = self.cur_token_
556        value = True
557        setting = ast.SettingDefinition(name, value, location=location)
558        return setting
559
560    def parse_cmap_format(self):
561        location = self.cur_token_location_
562        name = self.cur_token_
563        value = (self.expect_number_(), self.expect_number_(),
564                 self.expect_number_())
565        setting = ast.SettingDefinition(name, value, location=location)
566        return setting
567
568    def is_cur_keyword_(self, k):
569        return (self.cur_token_type_ is Lexer.NAME) and (self.cur_token_ == k)
570
571    def expect_string_(self):
572        self.advance_lexer_()
573        if self.cur_token_type_ is not Lexer.STRING:
574            raise VoltLibError("Expected a string", self.cur_token_location_)
575        return self.cur_token_
576
577    def expect_keyword_(self, keyword):
578        self.advance_lexer_()
579        if self.cur_token_type_ is Lexer.NAME and self.cur_token_ == keyword:
580            return self.cur_token_
581        raise VoltLibError("Expected \"%s\"" % keyword,
582                           self.cur_token_location_)
583
584    def expect_name_(self):
585        self.advance_lexer_()
586        if self.cur_token_type_ is Lexer.NAME:
587            return self.cur_token_
588        raise VoltLibError("Expected a name", self.cur_token_location_)
589
590    def expect_number_(self):
591        self.advance_lexer_()
592        if self.cur_token_type_ is not Lexer.NUMBER:
593            raise VoltLibError("Expected a number", self.cur_token_location_)
594        return self.cur_token_
595
596    def advance_lexer_(self):
597        self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
598            self.next_token_type_, self.next_token_, self.next_token_location_)
599        try:
600            if self.is_cur_keyword_("END"):
601                raise StopIteration
602            (self.next_token_type_, self.next_token_,
603             self.next_token_location_) = self.lexer_.next()
604        except StopIteration:
605            self.next_token_type_, self.next_token_ = (None, None)
606
607
608class SymbolTable(object):
609    def __init__(self):
610        self.scopes_ = [{}]
611
612    def enter_scope(self):
613        self.scopes_.append({})
614
615    def exit_scope(self):
616        self.scopes_.pop()
617
618    def define(self, name, item):
619        self.scopes_[-1][name] = item
620
621    def resolve(self, name, case_insensitive=True):
622        for scope in reversed(self.scopes_):
623            item = scope.get(name)
624            if item:
625                return item
626        if case_insensitive:
627            for key in scope:
628                if key.lower() == name.lower():
629                    return scope[key]
630        return None
631
632
633class OrderedSymbolTable(SymbolTable):
634    def __init__(self):
635        self.scopes_ = [{}]
636
637    def enter_scope(self):
638        self.scopes_.append({})
639
640    def resolve(self, name, case_insensitive=False):
641        SymbolTable.resolve(self, name, case_insensitive=case_insensitive)
642
643    def range(self, start, end):
644        for scope in reversed(self.scopes_):
645            if start in scope and end in scope:
646                start_idx = list(scope.keys()).index(start)
647                end_idx = list(scope.keys()).index(end)
648                return list(scope.keys())[start_idx:end_idx + 1]
649        return None
650