1# cython: language_level=3, py2_import=True 2# 3# Cython Scanner - Lexical Definitions 4# 5 6raw_prefixes = "rR" 7bytes_prefixes = "bB" 8string_prefixes = "uU" + bytes_prefixes 9char_prefixes = "cC" 10any_string_prefix = raw_prefixes + string_prefixes + char_prefixes 11IDENT = 'IDENT' 12 13def make_lexicon(): 14 from Cython.Plex import \ 15 Str, Any, AnyBut, AnyChar, Rep, Rep1, Opt, Bol, Eol, Eof, \ 16 TEXT, IGNORE, State, Lexicon 17 from Scanning import Method 18 19 letter = Any("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_") 20 digit = Any("0123456789") 21 bindigit = Any("01") 22 octdigit = Any("01234567") 23 hexdigit = Any("0123456789ABCDEFabcdef") 24 indentation = Bol + Rep(Any(" \t")) 25 26 decimal = Rep1(digit) 27 dot = Str(".") 28 exponent = Any("Ee") + Opt(Any("+-")) + decimal 29 decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal) 30 31 name = letter + Rep(letter | digit) 32 intconst = decimal | (Str("0") + ((Any("Xx") + Rep1(hexdigit)) | 33 (Any("Oo") + Rep1(octdigit)) | 34 (Any("Bb") + Rep1(bindigit)) )) 35 intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu"))) 36 intliteral = intconst + intsuffix 37 fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent) 38 imagconst = (intconst | fltconst) + Any("jJ") 39 40 beginstring = Opt(Any(string_prefixes) + Opt(Any(raw_prefixes)) | 41 Any(raw_prefixes) + Opt(Any(bytes_prefixes)) | 42 Any(char_prefixes) 43 ) + (Str("'") | Str('"') | Str("'''") | Str('"""')) 44 two_oct = octdigit + octdigit 45 three_oct = octdigit + octdigit + octdigit 46 two_hex = hexdigit + hexdigit 47 four_hex = two_hex + two_hex 48 escapeseq = Str("\\") + (two_oct | three_oct | 49 Str('N{') + Rep(AnyBut('}')) + Str('}') | 50 Str('u') + four_hex | Str('x') + two_hex | 51 Str('U') + four_hex + four_hex | AnyChar) 52 53 deco = Str("@") 54 bra = Any("([{") 55 ket = Any(")]}") 56 punct = Any(":,;+-*/|&<>=.%`~^?!") 57 diphthong = Str("==", "<>", "!=", "<=", ">=", "<<", ">>", "**", "//", 58 "+=", "-=", "*=", "/=", "%=", "|=", "^=", "&=", 59 "<<=", ">>=", "**=", "//=", "->") 60 spaces = Rep1(Any(" \t\f")) 61 escaped_newline = Str("\\\n") 62 lineterm = Eol + Opt(Str("\n")) 63 64 comment = Str("#") + Rep(AnyBut("\n")) 65 66 return Lexicon([ 67 (name, IDENT), 68 (intliteral, 'INT'), 69 (fltconst, 'FLOAT'), 70 (imagconst, 'IMAG'), 71 (deco, 'DECORATOR'), 72 (punct | diphthong, TEXT), 73 74 (bra, Method('open_bracket_action')), 75 (ket, Method('close_bracket_action')), 76 (lineterm, Method('newline_action')), 77 78 (beginstring, Method('begin_string_action')), 79 80 (comment, IGNORE), 81 (spaces, IGNORE), 82 (escaped_newline, IGNORE), 83 84 State('INDENT', [ 85 (comment + lineterm, Method('commentline')), 86 (Opt(spaces) + Opt(comment) + lineterm, IGNORE), 87 (indentation, Method('indentation_action')), 88 (Eof, Method('eof_action')) 89 ]), 90 91 State('SQ_STRING', [ 92 (escapeseq, 'ESCAPE'), 93 (Rep1(AnyBut("'\"\n\\")), 'CHARS'), 94 (Str('"'), 'CHARS'), 95 (Str("\n"), Method('unclosed_string_action')), 96 (Str("'"), Method('end_string_action')), 97 (Eof, 'EOF') 98 ]), 99 100 State('DQ_STRING', [ 101 (escapeseq, 'ESCAPE'), 102 (Rep1(AnyBut('"\n\\')), 'CHARS'), 103 (Str("'"), 'CHARS'), 104 (Str("\n"), Method('unclosed_string_action')), 105 (Str('"'), Method('end_string_action')), 106 (Eof, 'EOF') 107 ]), 108 109 State('TSQ_STRING', [ 110 (escapeseq, 'ESCAPE'), 111 (Rep1(AnyBut("'\"\n\\")), 'CHARS'), 112 (Any("'\""), 'CHARS'), 113 (Str("\n"), 'NEWLINE'), 114 (Str("'''"), Method('end_string_action')), 115 (Eof, 'EOF') 116 ]), 117 118 State('TDQ_STRING', [ 119 (escapeseq, 'ESCAPE'), 120 (Rep1(AnyBut('"\'\n\\')), 'CHARS'), 121 (Any("'\""), 'CHARS'), 122 (Str("\n"), 'NEWLINE'), 123 (Str('"""'), Method('end_string_action')), 124 (Eof, 'EOF') 125 ]), 126 127 (Eof, Method('eof_action')) 128 ], 129 130 # FIXME: Plex 1.9 needs different args here from Plex 1.1.4 131 #debug_flags = scanner_debug_flags, 132 #debug_file = scanner_dump_file 133 ) 134 135