• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1from fontTools.feaLib.error import FeatureLibError, IncludedFeaNotFound
2from fontTools.feaLib.location import FeatureLibLocation
3import re
4import os
5
6
7class Lexer(object):
8    NUMBER = "NUMBER"
9    HEXADECIMAL = "HEXADECIMAL"
10    OCTAL = "OCTAL"
11    NUMBERS = (NUMBER, HEXADECIMAL, OCTAL)
12    FLOAT = "FLOAT"
13    STRING = "STRING"
14    NAME = "NAME"
15    FILENAME = "FILENAME"
16    GLYPHCLASS = "GLYPHCLASS"
17    CID = "CID"
18    SYMBOL = "SYMBOL"
19    COMMENT = "COMMENT"
20    NEWLINE = "NEWLINE"
21    ANONYMOUS_BLOCK = "ANONYMOUS_BLOCK"
22
23    CHAR_WHITESPACE_ = " \t"
24    CHAR_NEWLINE_ = "\r\n"
25    CHAR_SYMBOL_ = ",;:-+'{}[]<>()="
26    CHAR_DIGIT_ = "0123456789"
27    CHAR_HEXDIGIT_ = "0123456789ABCDEFabcdef"
28    CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
29    CHAR_NAME_START_ = CHAR_LETTER_ + "_+*:.^~!\\"
30    CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/-"
31
32    RE_GLYPHCLASS = re.compile(r"^[A-Za-z_0-9.\-]+$")
33
34    MODE_NORMAL_ = "NORMAL"
35    MODE_FILENAME_ = "FILENAME"
36
37    def __init__(self, text, filename):
38        self.filename_ = filename
39        self.line_ = 1
40        self.pos_ = 0
41        self.line_start_ = 0
42        self.text_ = text
43        self.text_length_ = len(text)
44        self.mode_ = Lexer.MODE_NORMAL_
45
46    def __iter__(self):
47        return self
48
49    def next(self):  # Python 2
50        return self.__next__()
51
52    def __next__(self):  # Python 3
53        while True:
54            token_type, token, location = self.next_()
55            if token_type != Lexer.NEWLINE:
56                return (token_type, token, location)
57
58    def location_(self):
59        column = self.pos_ - self.line_start_ + 1
60        return FeatureLibLocation(self.filename_ or "<features>", self.line_, column)
61
62    def next_(self):
63        self.scan_over_(Lexer.CHAR_WHITESPACE_)
64        location = self.location_()
65        start = self.pos_
66        text = self.text_
67        limit = len(text)
68        if start >= limit:
69            raise StopIteration()
70        cur_char = text[start]
71        next_char = text[start + 1] if start + 1 < limit else None
72
73        if cur_char == "\n":
74            self.pos_ += 1
75            self.line_ += 1
76            self.line_start_ = self.pos_
77            return (Lexer.NEWLINE, None, location)
78        if cur_char == "\r":
79            self.pos_ += 2 if next_char == "\n" else 1
80            self.line_ += 1
81            self.line_start_ = self.pos_
82            return (Lexer.NEWLINE, None, location)
83        if cur_char == "#":
84            self.scan_until_(Lexer.CHAR_NEWLINE_)
85            return (Lexer.COMMENT, text[start : self.pos_], location)
86
87        if self.mode_ is Lexer.MODE_FILENAME_:
88            if cur_char != "(":
89                raise FeatureLibError("Expected '(' before file name", location)
90            self.scan_until_(")")
91            cur_char = text[self.pos_] if self.pos_ < limit else None
92            if cur_char != ")":
93                raise FeatureLibError("Expected ')' after file name", location)
94            self.pos_ += 1
95            self.mode_ = Lexer.MODE_NORMAL_
96            return (Lexer.FILENAME, text[start + 1 : self.pos_ - 1], location)
97
98        if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_:
99            self.pos_ += 1
100            self.scan_over_(Lexer.CHAR_DIGIT_)
101            return (Lexer.CID, int(text[start + 1 : self.pos_], 10), location)
102        if cur_char == "@":
103            self.pos_ += 1
104            self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
105            glyphclass = text[start + 1 : self.pos_]
106            if len(glyphclass) < 1:
107                raise FeatureLibError("Expected glyph class name", location)
108            if len(glyphclass) > 63:
109                raise FeatureLibError(
110                    "Glyph class names must not be longer than 63 characters", location
111                )
112            if not Lexer.RE_GLYPHCLASS.match(glyphclass):
113                raise FeatureLibError(
114                    "Glyph class names must consist of letters, digits, "
115                    "underscore, period or hyphen",
116                    location,
117                )
118            return (Lexer.GLYPHCLASS, glyphclass, location)
119        if cur_char in Lexer.CHAR_NAME_START_:
120            self.pos_ += 1
121            self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
122            token = text[start : self.pos_]
123            if token == "include":
124                self.mode_ = Lexer.MODE_FILENAME_
125            return (Lexer.NAME, token, location)
126        if cur_char == "0" and next_char in "xX":
127            self.pos_ += 2
128            self.scan_over_(Lexer.CHAR_HEXDIGIT_)
129            return (Lexer.HEXADECIMAL, int(text[start : self.pos_], 16), location)
130        if cur_char == "0" and next_char in Lexer.CHAR_DIGIT_:
131            self.scan_over_(Lexer.CHAR_DIGIT_)
132            return (Lexer.OCTAL, int(text[start : self.pos_], 8), location)
133        if cur_char in Lexer.CHAR_DIGIT_:
134            self.scan_over_(Lexer.CHAR_DIGIT_)
135            if self.pos_ >= limit or text[self.pos_] != ".":
136                return (Lexer.NUMBER, int(text[start : self.pos_], 10), location)
137            self.scan_over_(".")
138            self.scan_over_(Lexer.CHAR_DIGIT_)
139            return (Lexer.FLOAT, float(text[start : self.pos_]), location)
140        if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_:
141            self.pos_ += 1
142            self.scan_over_(Lexer.CHAR_DIGIT_)
143            if self.pos_ >= limit or text[self.pos_] != ".":
144                return (Lexer.NUMBER, int(text[start : self.pos_], 10), location)
145            self.scan_over_(".")
146            self.scan_over_(Lexer.CHAR_DIGIT_)
147            return (Lexer.FLOAT, float(text[start : self.pos_]), location)
148        if cur_char in Lexer.CHAR_SYMBOL_:
149            self.pos_ += 1
150            return (Lexer.SYMBOL, cur_char, location)
151        if cur_char == '"':
152            self.pos_ += 1
153            self.scan_until_('"')
154            if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"':
155                self.pos_ += 1
156                # strip newlines embedded within a string
157                string = re.sub("[\r\n]", "", text[start + 1 : self.pos_ - 1])
158                return (Lexer.STRING, string, location)
159            else:
160                raise FeatureLibError("Expected '\"' to terminate string", location)
161        raise FeatureLibError("Unexpected character: %r" % cur_char, location)
162
163    def scan_over_(self, valid):
164        p = self.pos_
165        while p < self.text_length_ and self.text_[p] in valid:
166            p += 1
167        self.pos_ = p
168
169    def scan_until_(self, stop_at):
170        p = self.pos_
171        while p < self.text_length_ and self.text_[p] not in stop_at:
172            p += 1
173        self.pos_ = p
174
175    def scan_anonymous_block(self, tag):
176        location = self.location_()
177        tag = tag.strip()
178        self.scan_until_(Lexer.CHAR_NEWLINE_)
179        self.scan_over_(Lexer.CHAR_NEWLINE_)
180        regexp = r"}\s*" + tag + r"\s*;"
181        split = re.split(regexp, self.text_[self.pos_ :], maxsplit=1)
182        if len(split) != 2:
183            raise FeatureLibError(
184                "Expected '} %s;' to terminate anonymous block" % tag, location
185            )
186        self.pos_ += len(split[0])
187        return (Lexer.ANONYMOUS_BLOCK, split[0], location)
188
189
190class IncludingLexer(object):
191    """A Lexer that follows include statements.
192
193    The OpenType feature file specification states that due to
194    historical reasons, relative imports should be resolved in this
195    order:
196
197    1. If the source font is UFO format, then relative to the UFO's
198       font directory
199    2. relative to the top-level include file
200    3. relative to the parent include file
201
202    We only support 1 (via includeDir) and 2.
203    """
204
205    def __init__(self, featurefile, *, includeDir=None):
206        """Initializes an IncludingLexer.
207
208        Behavior:
209            If includeDir is passed, it will be used to determine the top-level
210            include directory to use for all encountered include statements. If it is
211            not passed, ``os.path.dirname(featurefile)`` will be considered the
212            include directory.
213        """
214
215        self.lexers_ = [self.make_lexer_(featurefile)]
216        self.featurefilepath = self.lexers_[0].filename_
217        self.includeDir = includeDir
218
219    def __iter__(self):
220        return self
221
222    def next(self):  # Python 2
223        return self.__next__()
224
225    def __next__(self):  # Python 3
226        while self.lexers_:
227            lexer = self.lexers_[-1]
228            try:
229                token_type, token, location = next(lexer)
230            except StopIteration:
231                self.lexers_.pop()
232                continue
233            if token_type is Lexer.NAME and token == "include":
234                fname_type, fname_token, fname_location = lexer.next()
235                if fname_type is not Lexer.FILENAME:
236                    raise FeatureLibError("Expected file name", fname_location)
237                # semi_type, semi_token, semi_location = lexer.next()
238                # if semi_type is not Lexer.SYMBOL or semi_token != ";":
239                #    raise FeatureLibError("Expected ';'", semi_location)
240                if os.path.isabs(fname_token):
241                    path = fname_token
242                else:
243                    if self.includeDir is not None:
244                        curpath = self.includeDir
245                    elif self.featurefilepath is not None:
246                        curpath = os.path.dirname(self.featurefilepath)
247                    else:
248                        # if the IncludingLexer was initialized from an in-memory
249                        # file-like stream, it doesn't have a 'name' pointing to
250                        # its filesystem path, therefore we fall back to using the
251                        # current working directory to resolve relative includes
252                        curpath = os.getcwd()
253                    path = os.path.join(curpath, fname_token)
254                if len(self.lexers_) >= 5:
255                    raise FeatureLibError("Too many recursive includes", fname_location)
256                try:
257                    self.lexers_.append(self.make_lexer_(path))
258                except FileNotFoundError as err:
259                    raise IncludedFeaNotFound(fname_token, fname_location) from err
260            else:
261                return (token_type, token, location)
262        raise StopIteration()
263
264    @staticmethod
265    def make_lexer_(file_or_path):
266        if hasattr(file_or_path, "read"):
267            fileobj, closing = file_or_path, False
268        else:
269            filename, closing = file_or_path, True
270            fileobj = open(filename, "r", encoding="utf-8")
271        data = fileobj.read()
272        filename = getattr(fileobj, "name", None)
273        if closing:
274            fileobj.close()
275        return Lexer(data, filename)
276
277    def scan_anonymous_block(self, tag):
278        return self.lexers_[-1].scan_anonymous_block(tag)
279
280
281class NonIncludingLexer(IncludingLexer):
282    """Lexer that does not follow `include` statements, emits them as-is."""
283
284    def __next__(self):  # Python 3
285        return next(self.lexers_[0])
286