1# cython: infer_types=True, language_level=3, py2_import=True 2# 3# Cython Scanner 4# 5 6import os 7import platform 8 9import cython 10cython.declare(EncodedString=object, any_string_prefix=unicode, IDENT=unicode, 11 print_function=object) 12 13from Cython import Utils 14from Cython.Plex.Scanners import Scanner 15from Cython.Plex.Errors import UnrecognizedInput 16from Errors import error 17from Lexicon import any_string_prefix, make_lexicon, IDENT 18from Future import print_function 19 20from StringEncoding import EncodedString 21 22debug_scanner = 0 23trace_scanner = 0 24scanner_debug_flags = 0 25scanner_dump_file = None 26 27lexicon = None 28 29def get_lexicon(): 30 global lexicon 31 if not lexicon: 32 lexicon = make_lexicon() 33 return lexicon 34 35#------------------------------------------------------------------ 36 37py_reserved_words = [ 38 "global", "nonlocal", "def", "class", "print", "del", "pass", "break", 39 "continue", "return", "raise", "import", "exec", "try", 40 "except", "finally", "while", "if", "elif", "else", "for", 41 "in", "assert", "and", "or", "not", "is", "in", "lambda", 42 "from", "yield", "with", "nonlocal", 43] 44 45pyx_reserved_words = py_reserved_words + [ 46 "include", "ctypedef", "cdef", "cpdef", 47 "cimport", "DEF", "IF", "ELIF", "ELSE" 48] 49 50class Method(object): 51 52 def __init__(self, name): 53 self.name = name 54 self.__name__ = name # for Plex tracing 55 56 def __call__(self, stream, text): 57 return getattr(stream, self.name)(text) 58 59#------------------------------------------------------------------ 60 61class CompileTimeScope(object): 62 63 def __init__(self, outer = None): 64 self.entries = {} 65 self.outer = outer 66 67 def declare(self, name, value): 68 self.entries[name] = value 69 70 def update(self, other): 71 self.entries.update(other) 72 73 def lookup_here(self, name): 74 return self.entries[name] 75 76 def __contains__(self, name): 77 return name in self.entries 78 79 def lookup(self, name): 80 try: 81 return self.lookup_here(name) 82 except KeyError: 83 outer = self.outer 84 if outer: 85 return outer.lookup(name) 86 else: 87 raise 88 89def initial_compile_time_env(): 90 benv = CompileTimeScope() 91 names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE', 92 'UNAME_VERSION', 'UNAME_MACHINE') 93 for name, value in zip(names, platform.uname()): 94 benv.declare(name, value) 95 try: 96 import __builtin__ as builtins 97 except ImportError: 98 import builtins 99 100 names = ('False', 'True', 101 'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes', 102 'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate', 'filter', 103 'float', 'format', 'frozenset', 'hash', 'hex', 'int', 'len', 104 'list', 'long', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'range', 105 'repr', 'reversed', 'round', 'set', 'slice', 'sorted', 'str', 106 'sum', 'tuple', 'xrange', 'zip') 107 108 for name in names: 109 try: 110 benv.declare(name, getattr(builtins, name)) 111 except AttributeError: 112 # ignore, likely Py3 113 pass 114 denv = CompileTimeScope(benv) 115 return denv 116 117#------------------------------------------------------------------ 118 119class SourceDescriptor(object): 120 """ 121 A SourceDescriptor should be considered immutable. 122 """ 123 _file_type = 'pyx' 124 125 _escaped_description = None 126 _cmp_name = '' 127 def __str__(self): 128 assert False # To catch all places where a descriptor is used directly as a filename 129 130 def set_file_type_from_name(self, filename): 131 name, ext = os.path.splitext(filename) 132 self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx' 133 134 def is_cython_file(self): 135 return self._file_type in ('pyx', 'pxd') 136 137 def is_python_file(self): 138 return self._file_type == 'py' 139 140 def get_escaped_description(self): 141 if self._escaped_description is None: 142 self._escaped_description = \ 143 self.get_description().encode('ASCII', 'replace').decode("ASCII") 144 return self._escaped_description 145 146 def __gt__(self, other): 147 # this is only used to provide some sort of order 148 try: 149 return self._cmp_name > other._cmp_name 150 except AttributeError: 151 return False 152 153 def __lt__(self, other): 154 # this is only used to provide some sort of order 155 try: 156 return self._cmp_name < other._cmp_name 157 except AttributeError: 158 return False 159 160 def __le__(self, other): 161 # this is only used to provide some sort of order 162 try: 163 return self._cmp_name <= other._cmp_name 164 except AttributeError: 165 return False 166 167class FileSourceDescriptor(SourceDescriptor): 168 """ 169 Represents a code source. A code source is a more generic abstraction 170 for a "filename" (as sometimes the code doesn't come from a file). 171 Instances of code sources are passed to Scanner.__init__ as the 172 optional name argument and will be passed back when asking for 173 the position()-tuple. 174 """ 175 def __init__(self, filename, path_description=None): 176 filename = Utils.decode_filename(filename) 177 self.path_description = path_description or filename 178 self.filename = filename 179 self.set_file_type_from_name(filename) 180 self._cmp_name = filename 181 self._lines = {} 182 183 def get_lines(self, encoding=None, error_handling=None): 184 # we cache the lines only the second time this is called, in 185 # order to save memory when they are only used once 186 key = (encoding, error_handling) 187 try: 188 lines = self._lines[key] 189 if lines is not None: 190 return lines 191 except KeyError: 192 pass 193 f = Utils.open_source_file( 194 self.filename, encoding=encoding, 195 error_handling=error_handling, 196 # newline normalisation is costly before Py2.6 197 require_normalised_newlines=False) 198 try: 199 lines = list(f) 200 finally: 201 f.close() 202 if key in self._lines: 203 self._lines[key] = lines 204 else: 205 # do not cache the first access, but remember that we 206 # already read it once 207 self._lines[key] = None 208 return lines 209 210 def get_description(self): 211 return self.path_description 212 213 def get_error_description(self): 214 path = self.filename 215 cwd = Utils.decode_filename(os.getcwd() + os.path.sep) 216 if path.startswith(cwd): 217 return path[len(cwd):] 218 return path 219 220 def get_filenametable_entry(self): 221 return self.filename 222 223 def __eq__(self, other): 224 return isinstance(other, FileSourceDescriptor) and self.filename == other.filename 225 226 def __hash__(self): 227 return hash(self.filename) 228 229 def __repr__(self): 230 return "<FileSourceDescriptor:%s>" % self.filename 231 232class StringSourceDescriptor(SourceDescriptor): 233 """ 234 Instances of this class can be used instead of a filenames if the 235 code originates from a string object. 236 """ 237 filename = None 238 239 def __init__(self, name, code): 240 self.name = name 241 #self.set_file_type_from_name(name) 242 self.codelines = [x + "\n" for x in code.split("\n")] 243 self._cmp_name = name 244 245 def get_lines(self, encoding=None, error_handling=None): 246 if not encoding: 247 return self.codelines 248 else: 249 return [ line.encode(encoding, error_handling).decode(encoding) 250 for line in self.codelines ] 251 252 def get_description(self): 253 return self.name 254 255 get_error_description = get_description 256 257 def get_filenametable_entry(self): 258 return "stringsource" 259 260 def __hash__(self): 261 return id(self) 262 # Do not hash on the name, an identical string source should be the 263 # same object (name is often defaulted in other places) 264 # return hash(self.name) 265 266 def __eq__(self, other): 267 return isinstance(other, StringSourceDescriptor) and self.name == other.name 268 269 def __repr__(self): 270 return "<StringSourceDescriptor:%s>" % self.name 271 272#------------------------------------------------------------------ 273 274class PyrexScanner(Scanner): 275 # context Context Compilation context 276 # included_files [string] Files included with 'include' statement 277 # compile_time_env dict Environment for conditional compilation 278 # compile_time_eval boolean In a true conditional compilation context 279 # compile_time_expr boolean In a compile-time expression context 280 281 def __init__(self, file, filename, parent_scanner = None, 282 scope = None, context = None, source_encoding=None, parse_comments=True, initial_pos=None): 283 Scanner.__init__(self, get_lexicon(), file, filename, initial_pos) 284 if parent_scanner: 285 self.context = parent_scanner.context 286 self.included_files = parent_scanner.included_files 287 self.compile_time_env = parent_scanner.compile_time_env 288 self.compile_time_eval = parent_scanner.compile_time_eval 289 self.compile_time_expr = parent_scanner.compile_time_expr 290 else: 291 self.context = context 292 self.included_files = scope.included_files 293 self.compile_time_env = initial_compile_time_env() 294 self.compile_time_eval = 1 295 self.compile_time_expr = 0 296 if hasattr(context.options, 'compile_time_env') and \ 297 context.options.compile_time_env is not None: 298 self.compile_time_env.update(context.options.compile_time_env) 299 self.parse_comments = parse_comments 300 self.source_encoding = source_encoding 301 if filename.is_python_file(): 302 self.in_python_file = True 303 self.keywords = set(py_reserved_words) 304 else: 305 self.in_python_file = False 306 self.keywords = set(pyx_reserved_words) 307 self.trace = trace_scanner 308 self.indentation_stack = [0] 309 self.indentation_char = None 310 self.bracket_nesting_level = 0 311 self.begin('INDENT') 312 self.sy = '' 313 self.next() 314 315 def commentline(self, text): 316 if self.parse_comments: 317 self.produce('commentline', text) 318 319 def current_level(self): 320 return self.indentation_stack[-1] 321 322 def open_bracket_action(self, text): 323 self.bracket_nesting_level = self.bracket_nesting_level + 1 324 return text 325 326 def close_bracket_action(self, text): 327 self.bracket_nesting_level = self.bracket_nesting_level - 1 328 return text 329 330 def newline_action(self, text): 331 if self.bracket_nesting_level == 0: 332 self.begin('INDENT') 333 self.produce('NEWLINE', '') 334 335 string_states = { 336 "'": 'SQ_STRING', 337 '"': 'DQ_STRING', 338 "'''": 'TSQ_STRING', 339 '"""': 'TDQ_STRING' 340 } 341 342 def begin_string_action(self, text): 343 while text[:1] in any_string_prefix: 344 text = text[1:] 345 self.begin(self.string_states[text]) 346 self.produce('BEGIN_STRING') 347 348 def end_string_action(self, text): 349 self.begin('') 350 self.produce('END_STRING') 351 352 def unclosed_string_action(self, text): 353 self.end_string_action(text) 354 self.error("Unclosed string literal") 355 356 def indentation_action(self, text): 357 self.begin('') 358 # Indentation within brackets should be ignored. 359 #if self.bracket_nesting_level > 0: 360 # return 361 # Check that tabs and spaces are being used consistently. 362 if text: 363 c = text[0] 364 #print "Scanner.indentation_action: indent with", repr(c) ### 365 if self.indentation_char is None: 366 self.indentation_char = c 367 #print "Scanner.indentation_action: setting indent_char to", repr(c) 368 else: 369 if self.indentation_char != c: 370 self.error("Mixed use of tabs and spaces") 371 if text.replace(c, "") != "": 372 self.error("Mixed use of tabs and spaces") 373 # Figure out how many indents/dedents to do 374 current_level = self.current_level() 375 new_level = len(text) 376 #print "Changing indent level from", current_level, "to", new_level ### 377 if new_level == current_level: 378 return 379 elif new_level > current_level: 380 #print "...pushing level", new_level ### 381 self.indentation_stack.append(new_level) 382 self.produce('INDENT', '') 383 else: 384 while new_level < self.current_level(): 385 #print "...popping level", self.indentation_stack[-1] ### 386 self.indentation_stack.pop() 387 self.produce('DEDENT', '') 388 #print "...current level now", self.current_level() ### 389 if new_level != self.current_level(): 390 self.error("Inconsistent indentation") 391 392 def eof_action(self, text): 393 while len(self.indentation_stack) > 1: 394 self.produce('DEDENT', '') 395 self.indentation_stack.pop() 396 self.produce('EOF', '') 397 398 def next(self): 399 try: 400 sy, systring = self.read() 401 except UnrecognizedInput: 402 self.error("Unrecognized character") 403 if sy == IDENT: 404 if systring in self.keywords: 405 if systring == u'print' and print_function in self.context.future_directives: 406 self.keywords.discard('print') 407 systring = EncodedString(systring) 408 elif systring == u'exec' and self.context.language_level >= 3: 409 self.keywords.discard('exec') 410 systring = EncodedString(systring) 411 else: 412 sy = systring 413 else: 414 systring = EncodedString(systring) 415 self.sy = sy 416 self.systring = systring 417 if False: # debug_scanner: 418 _, line, col = self.position() 419 if not self.systring or self.sy == self.systring: 420 t = self.sy 421 else: 422 t = "%s %s" % (self.sy, self.systring) 423 print("--- %3d %2d %s" % (line, col, t)) 424 425 def peek(self): 426 saved = self.sy, self.systring 427 self.next() 428 next = self.sy, self.systring 429 self.unread(*next) 430 self.sy, self.systring = saved 431 return next 432 433 def put_back(self, sy, systring): 434 self.unread(self.sy, self.systring) 435 self.sy = sy 436 self.systring = systring 437 438 def unread(self, token, value): 439 # This method should be added to Plex 440 self.queue.insert(0, (token, value)) 441 442 def error(self, message, pos = None, fatal = True): 443 if pos is None: 444 pos = self.position() 445 if self.sy == 'INDENT': 446 err = error(pos, "Possible inconsistent indentation") 447 err = error(pos, message) 448 if fatal: raise err 449 450 def expect(self, what, message = None): 451 if self.sy == what: 452 self.next() 453 else: 454 self.expected(what, message) 455 456 def expect_keyword(self, what, message = None): 457 if self.sy == IDENT and self.systring == what: 458 self.next() 459 else: 460 self.expected(what, message) 461 462 def expected(self, what, message = None): 463 if message: 464 self.error(message) 465 else: 466 if self.sy == IDENT: 467 found = self.systring 468 else: 469 found = self.sy 470 self.error("Expected '%s', found '%s'" % (what, found)) 471 472 def expect_indent(self): 473 self.expect('INDENT', 474 "Expected an increase in indentation level") 475 476 def expect_dedent(self): 477 self.expect('DEDENT', 478 "Expected a decrease in indentation level") 479 480 def expect_newline(self, message = "Expected a newline"): 481 # Expect either a newline or end of file 482 if self.sy != 'EOF': 483 self.expect('NEWLINE', message) 484