1"""Provide advanced parsing abilities for ParenMatch and other extensions. 2 3HyperParser uses PyParser. PyParser mostly gives information on the 4proper indentation of code. HyperParser gives additional information on 5the structure of code. 6""" 7 8import string 9import keyword 10from idlelib import PyParse 11 12class HyperParser: 13 14 def __init__(self, editwin, index): 15 "To initialize, analyze the surroundings of the given index." 16 17 self.editwin = editwin 18 self.text = text = editwin.text 19 20 parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth) 21 22 def index2line(index): 23 return int(float(index)) 24 lno = index2line(text.index(index)) 25 26 if not editwin.context_use_ps1: 27 for context in editwin.num_context_lines: 28 startat = max(lno - context, 1) 29 startatindex = repr(startat) + ".0" 30 stopatindex = "%d.end" % lno 31 # We add the newline because PyParse requires a newline 32 # at end. We add a space so that index won't be at end 33 # of line, so that its status will be the same as the 34 # char before it, if should. 35 parser.set_str(text.get(startatindex, stopatindex)+' \n') 36 bod = parser.find_good_parse_start( 37 editwin._build_char_in_string_func(startatindex)) 38 if bod is not None or startat == 1: 39 break 40 parser.set_lo(bod or 0) 41 else: 42 r = text.tag_prevrange("console", index) 43 if r: 44 startatindex = r[1] 45 else: 46 startatindex = "1.0" 47 stopatindex = "%d.end" % lno 48 # We add the newline because PyParse requires it. We add a 49 # space so that index won't be at end of line, so that its 50 # status will be the same as the char before it, if should. 51 parser.set_str(text.get(startatindex, stopatindex)+' \n') 52 parser.set_lo(0) 53 54 # We want what the parser has, minus the last newline and space. 55 self.rawtext = parser.str[:-2] 56 # Parser.str apparently preserves the statement we are in, so 57 # that stopatindex can be used to synchronize the string with 58 # the text box indices. 59 self.stopatindex = stopatindex 60 self.bracketing = parser.get_last_stmt_bracketing() 61 # find which pairs of bracketing are openers. These always 62 # correspond to a character of rawtext. 63 self.isopener = [i>0 and self.bracketing[i][1] > 64 self.bracketing[i-1][1] 65 for i in range(len(self.bracketing))] 66 67 self.set_index(index) 68 69 def set_index(self, index): 70 """Set the index to which the functions relate. 71 72 The index must be in the same statement. 73 """ 74 indexinrawtext = (len(self.rawtext) - 75 len(self.text.get(index, self.stopatindex))) 76 if indexinrawtext < 0: 77 raise ValueError("Index %s precedes the analyzed statement" 78 % index) 79 self.indexinrawtext = indexinrawtext 80 # find the rightmost bracket to which index belongs 81 self.indexbracket = 0 82 while (self.indexbracket < len(self.bracketing)-1 and 83 self.bracketing[self.indexbracket+1][0] < self.indexinrawtext): 84 self.indexbracket += 1 85 if (self.indexbracket < len(self.bracketing)-1 and 86 self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and 87 not self.isopener[self.indexbracket+1]): 88 self.indexbracket += 1 89 90 def is_in_string(self): 91 """Is the index given to the HyperParser in a string?""" 92 # The bracket to which we belong should be an opener. 93 # If it's an opener, it has to have a character. 94 return (self.isopener[self.indexbracket] and 95 self.rawtext[self.bracketing[self.indexbracket][0]] 96 in ('"', "'")) 97 98 def is_in_code(self): 99 """Is the index given to the HyperParser in normal code?""" 100 return (not self.isopener[self.indexbracket] or 101 self.rawtext[self.bracketing[self.indexbracket][0]] 102 not in ('#', '"', "'")) 103 104 def get_surrounding_brackets(self, openers='([{', mustclose=False): 105 """Return bracket indexes or None. 106 107 If the index given to the HyperParser is surrounded by a 108 bracket defined in openers (or at least has one before it), 109 return the indices of the opening bracket and the closing 110 bracket (or the end of line, whichever comes first). 111 112 If it is not surrounded by brackets, or the end of line comes 113 before the closing bracket and mustclose is True, returns None. 114 """ 115 116 bracketinglevel = self.bracketing[self.indexbracket][1] 117 before = self.indexbracket 118 while (not self.isopener[before] or 119 self.rawtext[self.bracketing[before][0]] not in openers or 120 self.bracketing[before][1] > bracketinglevel): 121 before -= 1 122 if before < 0: 123 return None 124 bracketinglevel = min(bracketinglevel, self.bracketing[before][1]) 125 after = self.indexbracket + 1 126 while (after < len(self.bracketing) and 127 self.bracketing[after][1] >= bracketinglevel): 128 after += 1 129 130 beforeindex = self.text.index("%s-%dc" % 131 (self.stopatindex, len(self.rawtext)-self.bracketing[before][0])) 132 if (after >= len(self.bracketing) or 133 self.bracketing[after][0] > len(self.rawtext)): 134 if mustclose: 135 return None 136 afterindex = self.stopatindex 137 else: 138 # We are after a real char, so it is a ')' and we give the 139 # index before it. 140 afterindex = self.text.index( 141 "%s-%dc" % (self.stopatindex, 142 len(self.rawtext)-(self.bracketing[after][0]-1))) 143 144 return beforeindex, afterindex 145 146 # Ascii chars that may be in a white space 147 _whitespace_chars = " \t\n\\" 148 # Ascii chars that may be in an identifier 149 _id_chars = string.ascii_letters + string.digits + "_" 150 # Ascii chars that may be the first char of an identifier 151 _id_first_chars = string.ascii_letters + "_" 152 153 # Given a string and pos, return the number of chars in the 154 # identifier which ends at pos, or 0 if there is no such one. Saved 155 # words are not identifiers. 156 def _eat_identifier(self, str, limit, pos): 157 i = pos 158 while i > limit and str[i-1] in self._id_chars: 159 i -= 1 160 if (i < pos and (str[i] not in self._id_first_chars or 161 keyword.iskeyword(str[i:pos]))): 162 i = pos 163 return pos - i 164 165 def get_expression(self): 166 """Return a string with the Python expression which ends at the 167 given index, which is empty if there is no real one. 168 """ 169 if not self.is_in_code(): 170 raise ValueError("get_expression should only be called" 171 "if index is inside a code.") 172 173 rawtext = self.rawtext 174 bracketing = self.bracketing 175 176 brck_index = self.indexbracket 177 brck_limit = bracketing[brck_index][0] 178 pos = self.indexinrawtext 179 180 last_identifier_pos = pos 181 postdot_phase = True 182 183 while 1: 184 # Eat whitespaces, comments, and if postdot_phase is False - a dot 185 while 1: 186 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars: 187 # Eat a whitespace 188 pos -= 1 189 elif (not postdot_phase and 190 pos > brck_limit and rawtext[pos-1] == '.'): 191 # Eat a dot 192 pos -= 1 193 postdot_phase = True 194 # The next line will fail if we are *inside* a comment, 195 # but we shouldn't be. 196 elif (pos == brck_limit and brck_index > 0 and 197 rawtext[bracketing[brck_index-1][0]] == '#'): 198 # Eat a comment 199 brck_index -= 2 200 brck_limit = bracketing[brck_index][0] 201 pos = bracketing[brck_index+1][0] 202 else: 203 # If we didn't eat anything, quit. 204 break 205 206 if not postdot_phase: 207 # We didn't find a dot, so the expression end at the 208 # last identifier pos. 209 break 210 211 ret = self._eat_identifier(rawtext, brck_limit, pos) 212 if ret: 213 # There is an identifier to eat 214 pos = pos - ret 215 last_identifier_pos = pos 216 # Now, to continue the search, we must find a dot. 217 postdot_phase = False 218 # (the loop continues now) 219 220 elif pos == brck_limit: 221 # We are at a bracketing limit. If it is a closing 222 # bracket, eat the bracket, otherwise, stop the search. 223 level = bracketing[brck_index][1] 224 while brck_index > 0 and bracketing[brck_index-1][1] > level: 225 brck_index -= 1 226 if bracketing[brck_index][0] == brck_limit: 227 # We were not at the end of a closing bracket 228 break 229 pos = bracketing[brck_index][0] 230 brck_index -= 1 231 brck_limit = bracketing[brck_index][0] 232 last_identifier_pos = pos 233 if rawtext[pos] in "([": 234 # [] and () may be used after an identifier, so we 235 # continue. postdot_phase is True, so we don't allow a dot. 236 pass 237 else: 238 # We can't continue after other types of brackets 239 if rawtext[pos] in "'\"": 240 # Scan a string prefix 241 while pos > 0 and rawtext[pos - 1] in "rRbBuU": 242 pos -= 1 243 last_identifier_pos = pos 244 break 245 246 else: 247 # We've found an operator or something. 248 break 249 250 return rawtext[last_identifier_pos:self.indexinrawtext] 251 252 253if __name__ == '__main__': 254 import unittest 255 unittest.main('idlelib.idle_test.test_hyperparser', verbosity=2) 256