# cython: infer_types=True, language_level=3, py2_import=True # # Cython Scanner # import os import platform import cython cython.declare(EncodedString=object, any_string_prefix=unicode, IDENT=unicode, print_function=object) from Cython import Utils from Cython.Plex.Scanners import Scanner from Cython.Plex.Errors import UnrecognizedInput from Errors import error from Lexicon import any_string_prefix, make_lexicon, IDENT from Future import print_function from StringEncoding import EncodedString debug_scanner = 0 trace_scanner = 0 scanner_debug_flags = 0 scanner_dump_file = None lexicon = None def get_lexicon(): global lexicon if not lexicon: lexicon = make_lexicon() return lexicon #------------------------------------------------------------------ py_reserved_words = [ "global", "nonlocal", "def", "class", "print", "del", "pass", "break", "continue", "return", "raise", "import", "exec", "try", "except", "finally", "while", "if", "elif", "else", "for", "in", "assert", "and", "or", "not", "is", "in", "lambda", "from", "yield", "with", "nonlocal", ] pyx_reserved_words = py_reserved_words + [ "include", "ctypedef", "cdef", "cpdef", "cimport", "DEF", "IF", "ELIF", "ELSE" ] class Method(object): def __init__(self, name): self.name = name self.__name__ = name # for Plex tracing def __call__(self, stream, text): return getattr(stream, self.name)(text) #------------------------------------------------------------------ class CompileTimeScope(object): def __init__(self, outer = None): self.entries = {} self.outer = outer def declare(self, name, value): self.entries[name] = value def update(self, other): self.entries.update(other) def lookup_here(self, name): return self.entries[name] def __contains__(self, name): return name in self.entries def lookup(self, name): try: return self.lookup_here(name) except KeyError: outer = self.outer if outer: return outer.lookup(name) else: raise def initial_compile_time_env(): benv = CompileTimeScope() names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE', 'UNAME_VERSION', 'UNAME_MACHINE') for name, value in zip(names, platform.uname()): benv.declare(name, value) try: import __builtin__ as builtins except ImportError: import builtins names = ('False', 'True', 'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes', 'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate', 'filter', 'float', 'format', 'frozenset', 'hash', 'hex', 'int', 'len', 'list', 'long', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'range', 'repr', 'reversed', 'round', 'set', 'slice', 'sorted', 'str', 'sum', 'tuple', 'xrange', 'zip') for name in names: try: benv.declare(name, getattr(builtins, name)) except AttributeError: # ignore, likely Py3 pass denv = CompileTimeScope(benv) return denv #------------------------------------------------------------------ class SourceDescriptor(object): """ A SourceDescriptor should be considered immutable. """ _file_type = 'pyx' _escaped_description = None _cmp_name = '' def __str__(self): assert False # To catch all places where a descriptor is used directly as a filename def set_file_type_from_name(self, filename): name, ext = os.path.splitext(filename) self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx' def is_cython_file(self): return self._file_type in ('pyx', 'pxd') def is_python_file(self): return self._file_type == 'py' def get_escaped_description(self): if self._escaped_description is None: self._escaped_description = \ self.get_description().encode('ASCII', 'replace').decode("ASCII") return self._escaped_description def __gt__(self, other): # this is only used to provide some sort of order try: return self._cmp_name > other._cmp_name except AttributeError: return False def __lt__(self, other): # this is only used to provide some sort of order try: return self._cmp_name < other._cmp_name except AttributeError: return False def __le__(self, other): # this is only used to provide some sort of order try: return self._cmp_name <= other._cmp_name except AttributeError: return False class FileSourceDescriptor(SourceDescriptor): """ Represents a code source. A code source is a more generic abstraction for a "filename" (as sometimes the code doesn't come from a file). Instances of code sources are passed to Scanner.__init__ as the optional name argument and will be passed back when asking for the position()-tuple. """ def __init__(self, filename, path_description=None): filename = Utils.decode_filename(filename) self.path_description = path_description or filename self.filename = filename self.set_file_type_from_name(filename) self._cmp_name = filename self._lines = {} def get_lines(self, encoding=None, error_handling=None): # we cache the lines only the second time this is called, in # order to save memory when they are only used once key = (encoding, error_handling) try: lines = self._lines[key] if lines is not None: return lines except KeyError: pass f = Utils.open_source_file( self.filename, encoding=encoding, error_handling=error_handling, # newline normalisation is costly before Py2.6 require_normalised_newlines=False) try: lines = list(f) finally: f.close() if key in self._lines: self._lines[key] = lines else: # do not cache the first access, but remember that we # already read it once self._lines[key] = None return lines def get_description(self): return self.path_description def get_error_description(self): path = self.filename cwd = Utils.decode_filename(os.getcwd() + os.path.sep) if path.startswith(cwd): return path[len(cwd):] return path def get_filenametable_entry(self): return self.filename def __eq__(self, other): return isinstance(other, FileSourceDescriptor) and self.filename == other.filename def __hash__(self): return hash(self.filename) def __repr__(self): return "" % self.filename class StringSourceDescriptor(SourceDescriptor): """ Instances of this class can be used instead of a filenames if the code originates from a string object. """ filename = None def __init__(self, name, code): self.name = name #self.set_file_type_from_name(name) self.codelines = [x + "\n" for x in code.split("\n")] self._cmp_name = name def get_lines(self, encoding=None, error_handling=None): if not encoding: return self.codelines else: return [ line.encode(encoding, error_handling).decode(encoding) for line in self.codelines ] def get_description(self): return self.name get_error_description = get_description def get_filenametable_entry(self): return "stringsource" def __hash__(self): return id(self) # Do not hash on the name, an identical string source should be the # same object (name is often defaulted in other places) # return hash(self.name) def __eq__(self, other): return isinstance(other, StringSourceDescriptor) and self.name == other.name def __repr__(self): return "" % self.name #------------------------------------------------------------------ class PyrexScanner(Scanner): # context Context Compilation context # included_files [string] Files included with 'include' statement # compile_time_env dict Environment for conditional compilation # compile_time_eval boolean In a true conditional compilation context # compile_time_expr boolean In a compile-time expression context def __init__(self, file, filename, parent_scanner = None, scope = None, context = None, source_encoding=None, parse_comments=True, initial_pos=None): Scanner.__init__(self, get_lexicon(), file, filename, initial_pos) if parent_scanner: self.context = parent_scanner.context self.included_files = parent_scanner.included_files self.compile_time_env = parent_scanner.compile_time_env self.compile_time_eval = parent_scanner.compile_time_eval self.compile_time_expr = parent_scanner.compile_time_expr else: self.context = context self.included_files = scope.included_files self.compile_time_env = initial_compile_time_env() self.compile_time_eval = 1 self.compile_time_expr = 0 if hasattr(context.options, 'compile_time_env') and \ context.options.compile_time_env is not None: self.compile_time_env.update(context.options.compile_time_env) self.parse_comments = parse_comments self.source_encoding = source_encoding if filename.is_python_file(): self.in_python_file = True self.keywords = set(py_reserved_words) else: self.in_python_file = False self.keywords = set(pyx_reserved_words) self.trace = trace_scanner self.indentation_stack = [0] self.indentation_char = None self.bracket_nesting_level = 0 self.begin('INDENT') self.sy = '' self.next() def commentline(self, text): if self.parse_comments: self.produce('commentline', text) def current_level(self): return self.indentation_stack[-1] def open_bracket_action(self, text): self.bracket_nesting_level = self.bracket_nesting_level + 1 return text def close_bracket_action(self, text): self.bracket_nesting_level = self.bracket_nesting_level - 1 return text def newline_action(self, text): if self.bracket_nesting_level == 0: self.begin('INDENT') self.produce('NEWLINE', '') string_states = { "'": 'SQ_STRING', '"': 'DQ_STRING', "'''": 'TSQ_STRING', '"""': 'TDQ_STRING' } def begin_string_action(self, text): while text[:1] in any_string_prefix: text = text[1:] self.begin(self.string_states[text]) self.produce('BEGIN_STRING') def end_string_action(self, text): self.begin('') self.produce('END_STRING') def unclosed_string_action(self, text): self.end_string_action(text) self.error("Unclosed string literal") def indentation_action(self, text): self.begin('') # Indentation within brackets should be ignored. #if self.bracket_nesting_level > 0: # return # Check that tabs and spaces are being used consistently. if text: c = text[0] #print "Scanner.indentation_action: indent with", repr(c) ### if self.indentation_char is None: self.indentation_char = c #print "Scanner.indentation_action: setting indent_char to", repr(c) else: if self.indentation_char != c: self.error("Mixed use of tabs and spaces") if text.replace(c, "") != "": self.error("Mixed use of tabs and spaces") # Figure out how many indents/dedents to do current_level = self.current_level() new_level = len(text) #print "Changing indent level from", current_level, "to", new_level ### if new_level == current_level: return elif new_level > current_level: #print "...pushing level", new_level ### self.indentation_stack.append(new_level) self.produce('INDENT', '') else: while new_level < self.current_level(): #print "...popping level", self.indentation_stack[-1] ### self.indentation_stack.pop() self.produce('DEDENT', '') #print "...current level now", self.current_level() ### if new_level != self.current_level(): self.error("Inconsistent indentation") def eof_action(self, text): while len(self.indentation_stack) > 1: self.produce('DEDENT', '') self.indentation_stack.pop() self.produce('EOF', '') def next(self): try: sy, systring = self.read() except UnrecognizedInput: self.error("Unrecognized character") if sy == IDENT: if systring in self.keywords: if systring == u'print' and print_function in self.context.future_directives: self.keywords.discard('print') systring = EncodedString(systring) elif systring == u'exec' and self.context.language_level >= 3: self.keywords.discard('exec') systring = EncodedString(systring) else: sy = systring else: systring = EncodedString(systring) self.sy = sy self.systring = systring if False: # debug_scanner: _, line, col = self.position() if not self.systring or self.sy == self.systring: t = self.sy else: t = "%s %s" % (self.sy, self.systring) print("--- %3d %2d %s" % (line, col, t)) def peek(self): saved = self.sy, self.systring self.next() next = self.sy, self.systring self.unread(*next) self.sy, self.systring = saved return next def put_back(self, sy, systring): self.unread(self.sy, self.systring) self.sy = sy self.systring = systring def unread(self, token, value): # This method should be added to Plex self.queue.insert(0, (token, value)) def error(self, message, pos = None, fatal = True): if pos is None: pos = self.position() if self.sy == 'INDENT': err = error(pos, "Possible inconsistent indentation") err = error(pos, message) if fatal: raise err def expect(self, what, message = None): if self.sy == what: self.next() else: self.expected(what, message) def expect_keyword(self, what, message = None): if self.sy == IDENT and self.systring == what: self.next() else: self.expected(what, message) def expected(self, what, message = None): if message: self.error(message) else: if self.sy == IDENT: found = self.systring else: found = self.sy self.error("Expected '%s', found '%s'" % (what, found)) def expect_indent(self): self.expect('INDENT', "Expected an increase in indentation level") def expect_dedent(self): self.expect('DEDENT', "Expected a decrease in indentation level") def expect_newline(self, message = "Expected a newline"): # Expect either a newline or end of file if self.sy != 'EOF': self.expect('NEWLINE', message)