• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# cython: infer_types=True, language_level=3, py2_import=True
2#
3#   Cython Scanner
4#
5
6import os
7import platform
8
9import cython
10cython.declare(EncodedString=object, any_string_prefix=unicode, IDENT=unicode,
11               print_function=object)
12
13from Cython import Utils
14from Cython.Plex.Scanners import Scanner
15from Cython.Plex.Errors import UnrecognizedInput
16from Errors import error
17from Lexicon import any_string_prefix, make_lexicon, IDENT
18from Future import print_function
19
20from StringEncoding import EncodedString
21
22debug_scanner = 0
23trace_scanner = 0
24scanner_debug_flags = 0
25scanner_dump_file = None
26
27lexicon = None
28
29def get_lexicon():
30    global lexicon
31    if not lexicon:
32        lexicon = make_lexicon()
33    return lexicon
34
35#------------------------------------------------------------------
36
37py_reserved_words = [
38    "global", "nonlocal", "def", "class", "print", "del", "pass", "break",
39    "continue", "return", "raise", "import", "exec", "try",
40    "except", "finally", "while", "if", "elif", "else", "for",
41    "in", "assert", "and", "or", "not", "is", "in", "lambda",
42    "from", "yield", "with", "nonlocal",
43]
44
45pyx_reserved_words = py_reserved_words + [
46    "include", "ctypedef", "cdef", "cpdef",
47    "cimport", "DEF", "IF", "ELIF", "ELSE"
48]
49
50class Method(object):
51
52    def __init__(self, name):
53        self.name = name
54        self.__name__ = name # for Plex tracing
55
56    def __call__(self, stream, text):
57        return getattr(stream, self.name)(text)
58
59#------------------------------------------------------------------
60
61class CompileTimeScope(object):
62
63    def __init__(self, outer = None):
64        self.entries = {}
65        self.outer = outer
66
67    def declare(self, name, value):
68        self.entries[name] = value
69
70    def update(self, other):
71        self.entries.update(other)
72
73    def lookup_here(self, name):
74        return self.entries[name]
75
76    def __contains__(self, name):
77        return name in self.entries
78
79    def lookup(self, name):
80        try:
81            return self.lookup_here(name)
82        except KeyError:
83            outer = self.outer
84            if outer:
85                return outer.lookup(name)
86            else:
87                raise
88
89def initial_compile_time_env():
90    benv = CompileTimeScope()
91    names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE',
92        'UNAME_VERSION', 'UNAME_MACHINE')
93    for name, value in zip(names, platform.uname()):
94        benv.declare(name, value)
95    try:
96        import __builtin__ as builtins
97    except ImportError:
98        import builtins
99
100    names = ('False', 'True',
101             'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes',
102             'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate', 'filter',
103             'float', 'format', 'frozenset', 'hash', 'hex', 'int', 'len',
104             'list', 'long', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'range',
105             'repr', 'reversed', 'round', 'set', 'slice', 'sorted', 'str',
106             'sum', 'tuple', 'xrange', 'zip')
107
108    for name in names:
109        try:
110            benv.declare(name, getattr(builtins, name))
111        except AttributeError:
112            # ignore, likely Py3
113            pass
114    denv = CompileTimeScope(benv)
115    return denv
116
117#------------------------------------------------------------------
118
119class SourceDescriptor(object):
120    """
121    A SourceDescriptor should be considered immutable.
122    """
123    _file_type = 'pyx'
124
125    _escaped_description = None
126    _cmp_name = ''
127    def __str__(self):
128        assert False # To catch all places where a descriptor is used directly as a filename
129
130    def set_file_type_from_name(self, filename):
131        name, ext = os.path.splitext(filename)
132        self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx'
133
134    def is_cython_file(self):
135        return self._file_type in ('pyx', 'pxd')
136
137    def is_python_file(self):
138        return self._file_type == 'py'
139
140    def get_escaped_description(self):
141        if self._escaped_description is None:
142            self._escaped_description = \
143                self.get_description().encode('ASCII', 'replace').decode("ASCII")
144        return self._escaped_description
145
146    def __gt__(self, other):
147        # this is only used to provide some sort of order
148        try:
149            return self._cmp_name > other._cmp_name
150        except AttributeError:
151            return False
152
153    def __lt__(self, other):
154        # this is only used to provide some sort of order
155        try:
156            return self._cmp_name < other._cmp_name
157        except AttributeError:
158            return False
159
160    def __le__(self, other):
161        # this is only used to provide some sort of order
162        try:
163            return self._cmp_name <= other._cmp_name
164        except AttributeError:
165            return False
166
167class FileSourceDescriptor(SourceDescriptor):
168    """
169    Represents a code source. A code source is a more generic abstraction
170    for a "filename" (as sometimes the code doesn't come from a file).
171    Instances of code sources are passed to Scanner.__init__ as the
172    optional name argument and will be passed back when asking for
173    the position()-tuple.
174    """
175    def __init__(self, filename, path_description=None):
176        filename = Utils.decode_filename(filename)
177        self.path_description = path_description or filename
178        self.filename = filename
179        self.set_file_type_from_name(filename)
180        self._cmp_name = filename
181        self._lines = {}
182
183    def get_lines(self, encoding=None, error_handling=None):
184        # we cache the lines only the second time this is called, in
185        # order to save memory when they are only used once
186        key = (encoding, error_handling)
187        try:
188            lines = self._lines[key]
189            if lines is not None:
190                return lines
191        except KeyError:
192            pass
193        f = Utils.open_source_file(
194            self.filename, encoding=encoding,
195            error_handling=error_handling,
196            # newline normalisation is costly before Py2.6
197            require_normalised_newlines=False)
198        try:
199            lines = list(f)
200        finally:
201            f.close()
202        if key in self._lines:
203            self._lines[key] = lines
204        else:
205            # do not cache the first access, but remember that we
206            # already read it once
207            self._lines[key] = None
208        return lines
209
210    def get_description(self):
211        return self.path_description
212
213    def get_error_description(self):
214        path = self.filename
215        cwd = Utils.decode_filename(os.getcwd() + os.path.sep)
216        if path.startswith(cwd):
217            return path[len(cwd):]
218        return path
219
220    def get_filenametable_entry(self):
221        return self.filename
222
223    def __eq__(self, other):
224        return isinstance(other, FileSourceDescriptor) and self.filename == other.filename
225
226    def __hash__(self):
227        return hash(self.filename)
228
229    def __repr__(self):
230        return "<FileSourceDescriptor:%s>" % self.filename
231
232class StringSourceDescriptor(SourceDescriptor):
233    """
234    Instances of this class can be used instead of a filenames if the
235    code originates from a string object.
236    """
237    filename = None
238
239    def __init__(self, name, code):
240        self.name = name
241        #self.set_file_type_from_name(name)
242        self.codelines = [x + "\n" for x in code.split("\n")]
243        self._cmp_name = name
244
245    def get_lines(self, encoding=None, error_handling=None):
246        if not encoding:
247            return self.codelines
248        else:
249            return [ line.encode(encoding, error_handling).decode(encoding)
250                     for line in self.codelines ]
251
252    def get_description(self):
253        return self.name
254
255    get_error_description = get_description
256
257    def get_filenametable_entry(self):
258        return "stringsource"
259
260    def __hash__(self):
261        return id(self)
262        # Do not hash on the name, an identical string source should be the
263        # same object (name is often defaulted in other places)
264        # return hash(self.name)
265
266    def __eq__(self, other):
267        return isinstance(other, StringSourceDescriptor) and self.name == other.name
268
269    def __repr__(self):
270        return "<StringSourceDescriptor:%s>" % self.name
271
272#------------------------------------------------------------------
273
274class PyrexScanner(Scanner):
275    #  context            Context  Compilation context
276    #  included_files     [string] Files included with 'include' statement
277    #  compile_time_env   dict     Environment for conditional compilation
278    #  compile_time_eval  boolean  In a true conditional compilation context
279    #  compile_time_expr  boolean  In a compile-time expression context
280
281    def __init__(self, file, filename, parent_scanner = None,
282                 scope = None, context = None, source_encoding=None, parse_comments=True, initial_pos=None):
283        Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
284        if parent_scanner:
285            self.context = parent_scanner.context
286            self.included_files = parent_scanner.included_files
287            self.compile_time_env = parent_scanner.compile_time_env
288            self.compile_time_eval = parent_scanner.compile_time_eval
289            self.compile_time_expr = parent_scanner.compile_time_expr
290        else:
291            self.context = context
292            self.included_files = scope.included_files
293            self.compile_time_env = initial_compile_time_env()
294            self.compile_time_eval = 1
295            self.compile_time_expr = 0
296            if hasattr(context.options, 'compile_time_env') and \
297               context.options.compile_time_env is not None:
298                self.compile_time_env.update(context.options.compile_time_env)
299        self.parse_comments = parse_comments
300        self.source_encoding = source_encoding
301        if filename.is_python_file():
302            self.in_python_file = True
303            self.keywords = set(py_reserved_words)
304        else:
305            self.in_python_file = False
306            self.keywords = set(pyx_reserved_words)
307        self.trace = trace_scanner
308        self.indentation_stack = [0]
309        self.indentation_char = None
310        self.bracket_nesting_level = 0
311        self.begin('INDENT')
312        self.sy = ''
313        self.next()
314
315    def commentline(self, text):
316        if self.parse_comments:
317            self.produce('commentline', text)
318
319    def current_level(self):
320        return self.indentation_stack[-1]
321
322    def open_bracket_action(self, text):
323        self.bracket_nesting_level = self.bracket_nesting_level + 1
324        return text
325
326    def close_bracket_action(self, text):
327        self.bracket_nesting_level = self.bracket_nesting_level - 1
328        return text
329
330    def newline_action(self, text):
331        if self.bracket_nesting_level == 0:
332            self.begin('INDENT')
333            self.produce('NEWLINE', '')
334
335    string_states = {
336        "'":   'SQ_STRING',
337        '"':   'DQ_STRING',
338        "'''": 'TSQ_STRING',
339        '"""': 'TDQ_STRING'
340    }
341
342    def begin_string_action(self, text):
343        while text[:1] in any_string_prefix:
344            text = text[1:]
345        self.begin(self.string_states[text])
346        self.produce('BEGIN_STRING')
347
348    def end_string_action(self, text):
349        self.begin('')
350        self.produce('END_STRING')
351
352    def unclosed_string_action(self, text):
353        self.end_string_action(text)
354        self.error("Unclosed string literal")
355
356    def indentation_action(self, text):
357        self.begin('')
358        # Indentation within brackets should be ignored.
359        #if self.bracket_nesting_level > 0:
360        #    return
361        # Check that tabs and spaces are being used consistently.
362        if text:
363            c = text[0]
364            #print "Scanner.indentation_action: indent with", repr(c) ###
365            if self.indentation_char is None:
366                self.indentation_char = c
367                #print "Scanner.indentation_action: setting indent_char to", repr(c)
368            else:
369                if self.indentation_char != c:
370                    self.error("Mixed use of tabs and spaces")
371            if text.replace(c, "") != "":
372                self.error("Mixed use of tabs and spaces")
373        # Figure out how many indents/dedents to do
374        current_level = self.current_level()
375        new_level = len(text)
376        #print "Changing indent level from", current_level, "to", new_level ###
377        if new_level == current_level:
378            return
379        elif new_level > current_level:
380            #print "...pushing level", new_level ###
381            self.indentation_stack.append(new_level)
382            self.produce('INDENT', '')
383        else:
384            while new_level < self.current_level():
385                #print "...popping level", self.indentation_stack[-1] ###
386                self.indentation_stack.pop()
387                self.produce('DEDENT', '')
388            #print "...current level now", self.current_level() ###
389            if new_level != self.current_level():
390                self.error("Inconsistent indentation")
391
392    def eof_action(self, text):
393        while len(self.indentation_stack) > 1:
394            self.produce('DEDENT', '')
395            self.indentation_stack.pop()
396        self.produce('EOF', '')
397
398    def next(self):
399        try:
400            sy, systring = self.read()
401        except UnrecognizedInput:
402            self.error("Unrecognized character")
403        if sy == IDENT:
404            if systring in self.keywords:
405                if systring == u'print' and print_function in self.context.future_directives:
406                    self.keywords.discard('print')
407                    systring = EncodedString(systring)
408                elif systring == u'exec' and self.context.language_level >= 3:
409                    self.keywords.discard('exec')
410                    systring = EncodedString(systring)
411                else:
412                    sy = systring
413            else:
414                systring = EncodedString(systring)
415        self.sy = sy
416        self.systring = systring
417        if False: # debug_scanner:
418            _, line, col = self.position()
419            if not self.systring or self.sy == self.systring:
420                t = self.sy
421            else:
422                t = "%s %s" % (self.sy, self.systring)
423            print("--- %3d %2d %s" % (line, col, t))
424
425    def peek(self):
426        saved = self.sy, self.systring
427        self.next()
428        next = self.sy, self.systring
429        self.unread(*next)
430        self.sy, self.systring = saved
431        return next
432
433    def put_back(self, sy, systring):
434        self.unread(self.sy, self.systring)
435        self.sy = sy
436        self.systring = systring
437
438    def unread(self, token, value):
439        # This method should be added to Plex
440        self.queue.insert(0, (token, value))
441
442    def error(self, message, pos = None, fatal = True):
443        if pos is None:
444            pos = self.position()
445        if self.sy == 'INDENT':
446            err = error(pos, "Possible inconsistent indentation")
447        err = error(pos, message)
448        if fatal: raise err
449
450    def expect(self, what, message = None):
451        if self.sy == what:
452            self.next()
453        else:
454            self.expected(what, message)
455
456    def expect_keyword(self, what, message = None):
457        if self.sy == IDENT and self.systring == what:
458            self.next()
459        else:
460            self.expected(what, message)
461
462    def expected(self, what, message = None):
463        if message:
464            self.error(message)
465        else:
466            if self.sy == IDENT:
467                found = self.systring
468            else:
469                found = self.sy
470            self.error("Expected '%s', found '%s'" % (what, found))
471
472    def expect_indent(self):
473        self.expect('INDENT',
474            "Expected an increase in indentation level")
475
476    def expect_dedent(self):
477        self.expect('DEDENT',
478            "Expected a decrease in indentation level")
479
480    def expect_newline(self, message = "Expected a newline"):
481        # Expect either a newline or end of file
482        if self.sy != 'EOF':
483            self.expect('NEWLINE', message)
484