1import itertools 2 3from ShCommands import Command, Pipeline 4 5def tcl_preprocess(data): 6 # Tcl has a preprocessing step to replace escaped newlines. 7 i = data.find('\\\n') 8 if i == -1: 9 return data 10 11 # Replace '\\\n' and subsequent whitespace by a single space. 12 n = len(data) 13 str = data[:i] 14 i += 2 15 while i < n and data[i] in ' \t': 16 i += 1 17 return str + ' ' + data[i:] 18 19class TclLexer: 20 """TclLexer - Lex a string into "words", following the Tcl syntax.""" 21 22 def __init__(self, data): 23 self.data = tcl_preprocess(data) 24 self.pos = 0 25 self.end = len(self.data) 26 27 def at_end(self): 28 return self.pos == self.end 29 30 def eat(self): 31 c = self.data[self.pos] 32 self.pos += 1 33 return c 34 35 def look(self): 36 return self.data[self.pos] 37 38 def maybe_eat(self, c): 39 """ 40 maybe_eat(c) - Consume the character c if it is the next character, 41 returning True if a character was consumed. """ 42 if self.data[self.pos] == c: 43 self.pos += 1 44 return True 45 return False 46 47 def escape(self, c): 48 if c == 'a': 49 return '\x07' 50 elif c == 'b': 51 return '\x08' 52 elif c == 'f': 53 return '\x0c' 54 elif c == 'n': 55 return '\n' 56 elif c == 'r': 57 return '\r' 58 elif c == 't': 59 return '\t' 60 elif c == 'v': 61 return '\x0b' 62 elif c in 'uxo': 63 raise ValueError,'Invalid quoted character %r' % c 64 else: 65 return c 66 67 def lex_braced(self): 68 # Lex until whitespace or end of string, the opening brace has already 69 # been consumed. 70 71 str = '' 72 while 1: 73 if self.at_end(): 74 raise ValueError,"Unterminated '{' quoted word" 75 76 c = self.eat() 77 if c == '}': 78 break 79 elif c == '{': 80 str += '{' + self.lex_braced() + '}' 81 elif c == '\\' and self.look() in '{}': 82 str += self.eat() 83 else: 84 str += c 85 86 return str 87 88 def lex_quoted(self): 89 str = '' 90 91 while 1: 92 if self.at_end(): 93 raise ValueError,"Unterminated '\"' quoted word" 94 95 c = self.eat() 96 if c == '"': 97 break 98 elif c == '\\': 99 if self.at_end(): 100 raise ValueError,'Missing quoted character' 101 102 str += self.escape(self.eat()) 103 else: 104 str += c 105 106 return str 107 108 def lex_unquoted(self, process_all=False): 109 # Lex until whitespace or end of string. 110 str = '' 111 while not self.at_end(): 112 if not process_all: 113 if self.look().isspace() or self.look() == ';': 114 break 115 116 c = self.eat() 117 if c == '\\': 118 if self.at_end(): 119 raise ValueError,'Missing quoted character' 120 121 str += self.escape(self.eat()) 122 elif c == '[': 123 raise NotImplementedError, ('Command substitution is ' 124 'not supported') 125 elif c == '$' and not self.at_end() and (self.look().isalpha() or 126 self.look() == '{'): 127 raise NotImplementedError, ('Variable substitution is ' 128 'not supported') 129 else: 130 str += c 131 132 return str 133 134 def lex_one_token(self): 135 if self.maybe_eat('"'): 136 return self.lex_quoted() 137 elif self.maybe_eat('{'): 138 # Check for argument substitution. 139 if not self.maybe_eat('*'): 140 return self.lex_braced() 141 142 if not self.maybe_eat('}'): 143 return '*' + self.lex_braced() 144 145 if self.at_end() or self.look().isspace(): 146 return '*' 147 148 raise NotImplementedError, "Argument substitution is unsupported" 149 else: 150 return self.lex_unquoted() 151 152 def lex(self): 153 while not self.at_end(): 154 c = self.look() 155 if c in ' \t': 156 self.eat() 157 elif c in ';\n': 158 self.eat() 159 yield (';',) 160 else: 161 yield self.lex_one_token() 162 163class TclExecCommand: 164 kRedirectPrefixes1 = ('<', '>') 165 kRedirectPrefixes2 = ('<@', '<<', '2>', '>&', '>>', '>@') 166 kRedirectPrefixes3 = ('2>@', '2>>', '>>&', '>&@') 167 kRedirectPrefixes4 = ('2>@1',) 168 169 def __init__(self, args): 170 self.args = iter(args) 171 172 def lex(self): 173 try: 174 return self.args.next() 175 except StopIteration: 176 return None 177 178 def look(self): 179 next = self.lex() 180 if next is not None: 181 self.args = itertools.chain([next], self.args) 182 return next 183 184 def parse_redirect(self, tok, length): 185 if len(tok) == length: 186 arg = self.lex() 187 if arg is None: 188 raise ValueError,'Missing argument to %r redirection' % tok 189 else: 190 tok,arg = tok[:length],tok[length:] 191 192 if tok[0] == '2': 193 op = (tok[1:],2) 194 else: 195 op = (tok,) 196 return (op, arg) 197 198 def parse_pipeline(self): 199 if self.look() is None: 200 raise ValueError,"Expected at least one argument to exec" 201 202 commands = [Command([],[])] 203 while 1: 204 arg = self.lex() 205 if arg is None: 206 break 207 elif arg == '|': 208 commands.append(Command([],[])) 209 elif arg == '|&': 210 # Write this as a redirect of stderr; it must come first because 211 # stdout may have already been redirected. 212 commands[-1].redirects.insert(0, (('>&',2),'1')) 213 commands.append(Command([],[])) 214 elif arg[:4] in TclExecCommand.kRedirectPrefixes4: 215 commands[-1].redirects.append(self.parse_redirect(arg, 4)) 216 elif arg[:3] in TclExecCommand.kRedirectPrefixes3: 217 commands[-1].redirects.append(self.parse_redirect(arg, 3)) 218 elif arg[:2] in TclExecCommand.kRedirectPrefixes2: 219 commands[-1].redirects.append(self.parse_redirect(arg, 2)) 220 elif arg[:1] in TclExecCommand.kRedirectPrefixes1: 221 commands[-1].redirects.append(self.parse_redirect(arg, 1)) 222 else: 223 commands[-1].args.append(arg) 224 225 return Pipeline(commands, False, pipe_err=True) 226 227 def parse(self): 228 ignoreStderr = False 229 keepNewline = False 230 231 # Parse arguments. 232 while 1: 233 next = self.look() 234 if not isinstance(next, str) or next[0] != '-': 235 break 236 237 if next == '--': 238 self.lex() 239 break 240 elif next == '-ignorestderr': 241 ignoreStderr = True 242 elif next == '-keepnewline': 243 keepNewline = True 244 else: 245 raise ValueError,"Invalid exec argument %r" % next 246 247 return (ignoreStderr, keepNewline, self.parse_pipeline()) 248 249### 250 251import unittest 252 253class TestTclLexer(unittest.TestCase): 254 def lex(self, str, *args, **kwargs): 255 return list(TclLexer(str, *args, **kwargs).lex()) 256 257 def test_preprocess(self): 258 self.assertEqual(tcl_preprocess('a b'), 'a b') 259 self.assertEqual(tcl_preprocess('a\\\nb c'), 'a b c') 260 261 def test_unquoted(self): 262 self.assertEqual(self.lex('a b c'), 263 ['a', 'b', 'c']) 264 self.assertEqual(self.lex(r'a\nb\tc\ '), 265 ['a\nb\tc ']) 266 self.assertEqual(self.lex(r'a \\\$b c $\\'), 267 ['a', r'\$b', 'c', '$\\']) 268 269 def test_braced(self): 270 self.assertEqual(self.lex('a {b c} {}'), 271 ['a', 'b c', '']) 272 self.assertEqual(self.lex(r'a {b {c\n}}'), 273 ['a', 'b {c\\n}']) 274 self.assertEqual(self.lex(r'a {b\{}'), 275 ['a', 'b{']) 276 self.assertEqual(self.lex(r'{*}'), ['*']) 277 self.assertEqual(self.lex(r'{*} a'), ['*', 'a']) 278 self.assertEqual(self.lex(r'{*} a'), ['*', 'a']) 279 self.assertEqual(self.lex('{a\\\n b}'), 280 ['a b']) 281 282 def test_quoted(self): 283 self.assertEqual(self.lex('a "b c"'), 284 ['a', 'b c']) 285 286 def test_terminators(self): 287 self.assertEqual(self.lex('a\nb'), 288 ['a', (';',), 'b']) 289 self.assertEqual(self.lex('a;b'), 290 ['a', (';',), 'b']) 291 self.assertEqual(self.lex('a ; b'), 292 ['a', (';',), 'b']) 293 294class TestTclExecCommand(unittest.TestCase): 295 def parse(self, str): 296 return TclExecCommand(list(TclLexer(str).lex())).parse() 297 298 def test_basic(self): 299 self.assertEqual(self.parse('echo hello'), 300 (False, False, 301 Pipeline([Command(['echo', 'hello'], [])], 302 False, True))) 303 self.assertEqual(self.parse('echo hello | grep hello'), 304 (False, False, 305 Pipeline([Command(['echo', 'hello'], []), 306 Command(['grep', 'hello'], [])], 307 False, True))) 308 309 def test_redirect(self): 310 self.assertEqual(self.parse('echo hello > a >b >>c 2> d |& e'), 311 (False, False, 312 Pipeline([Command(['echo', 'hello'], 313 [(('>&',2),'1'), 314 (('>',),'a'), 315 (('>',),'b'), 316 (('>>',),'c'), 317 (('>',2),'d')]), 318 Command(['e'], [])], 319 False, True))) 320 321if __name__ == '__main__': 322 unittest.main() 323