1import io 2import itertools 3import shlex 4import string 5import unittest 6 7 8# The original test data set was from shellwords, by Hartmut Goebel. 9 10data = r"""x|x| 11foo bar|foo|bar| 12 foo bar|foo|bar| 13 foo bar |foo|bar| 14foo bar bla fasel|foo|bar|bla|fasel| 15x y z xxxx|x|y|z|xxxx| 16\x bar|\|x|bar| 17\ x bar|\|x|bar| 18\ bar|\|bar| 19foo \x bar|foo|\|x|bar| 20foo \ x bar|foo|\|x|bar| 21foo \ bar|foo|\|bar| 22foo "bar" bla|foo|"bar"|bla| 23"foo" "bar" "bla"|"foo"|"bar"|"bla"| 24"foo" bar "bla"|"foo"|bar|"bla"| 25"foo" bar bla|"foo"|bar|bla| 26foo 'bar' bla|foo|'bar'|bla| 27'foo' 'bar' 'bla'|'foo'|'bar'|'bla'| 28'foo' bar 'bla'|'foo'|bar|'bla'| 29'foo' bar bla|'foo'|bar|bla| 30blurb foo"bar"bar"fasel" baz|blurb|foo"bar"bar"fasel"|baz| 31blurb foo'bar'bar'fasel' baz|blurb|foo'bar'bar'fasel'|baz| 32""|""| 33''|''| 34foo "" bar|foo|""|bar| 35foo '' bar|foo|''|bar| 36foo "" "" "" bar|foo|""|""|""|bar| 37foo '' '' '' bar|foo|''|''|''|bar| 38\""|\|""| 39"\"|"\"| 40"foo\ bar"|"foo\ bar"| 41"foo\\ bar"|"foo\\ bar"| 42"foo\\ bar\"|"foo\\ bar\"| 43"foo\\" bar\""|"foo\\"|bar|\|""| 44"foo\\ bar\" dfadf"|"foo\\ bar\"|dfadf"| 45"foo\\\ bar\" dfadf"|"foo\\\ bar\"|dfadf"| 46"foo\\\x bar\" dfadf"|"foo\\\x bar\"|dfadf"| 47"foo\x bar\" dfadf"|"foo\x bar\"|dfadf"| 48\''|\|''| 49'foo\ bar'|'foo\ bar'| 50'foo\\ bar'|'foo\\ bar'| 51"foo\\\x bar\" df'a\ 'df'|"foo\\\x bar\"|df'a|\|'df'| 52\"foo"|\|"foo"| 53\"foo"\x|\|"foo"|\|x| 54"foo\x"|"foo\x"| 55"foo\ "|"foo\ "| 56foo\ xx|foo|\|xx| 57foo\ x\x|foo|\|x|\|x| 58foo\ x\x\""|foo|\|x|\|x|\|""| 59"foo\ x\x"|"foo\ x\x"| 60"foo\ x\x\\"|"foo\ x\x\\"| 61"foo\ x\x\\""foobar"|"foo\ x\x\\"|"foobar"| 62"foo\ x\x\\"\''"foobar"|"foo\ x\x\\"|\|''|"foobar"| 63"foo\ x\x\\"\'"fo'obar"|"foo\ x\x\\"|\|'"fo'|obar"| 64"foo\ x\x\\"\'"fo'obar" 'don'\''t'|"foo\ x\x\\"|\|'"fo'|obar"|'don'|\|''|t'| 65'foo\ bar'|'foo\ bar'| 66'foo\\ bar'|'foo\\ bar'| 67foo\ bar|foo|\|bar| 68foo#bar\nbaz|foobaz| 69:-) ;-)|:|-|)|;|-|)| 70áéíóú|á|é|í|ó|ú| 71""" 72 73posix_data = r"""x|x| 74foo bar|foo|bar| 75 foo bar|foo|bar| 76 foo bar |foo|bar| 77foo bar bla fasel|foo|bar|bla|fasel| 78x y z xxxx|x|y|z|xxxx| 79\x bar|x|bar| 80\ x bar| x|bar| 81\ bar| bar| 82foo \x bar|foo|x|bar| 83foo \ x bar|foo| x|bar| 84foo \ bar|foo| bar| 85foo "bar" bla|foo|bar|bla| 86"foo" "bar" "bla"|foo|bar|bla| 87"foo" bar "bla"|foo|bar|bla| 88"foo" bar bla|foo|bar|bla| 89foo 'bar' bla|foo|bar|bla| 90'foo' 'bar' 'bla'|foo|bar|bla| 91'foo' bar 'bla'|foo|bar|bla| 92'foo' bar bla|foo|bar|bla| 93blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz| 94blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz| 95""|| 96''|| 97foo "" bar|foo||bar| 98foo '' bar|foo||bar| 99foo "" "" "" bar|foo||||bar| 100foo '' '' '' bar|foo||||bar| 101\"|"| 102"\""|"| 103"foo\ bar"|foo\ bar| 104"foo\\ bar"|foo\ bar| 105"foo\\ bar\""|foo\ bar"| 106"foo\\" bar\"|foo\|bar"| 107"foo\\ bar\" dfadf"|foo\ bar" dfadf| 108"foo\\\ bar\" dfadf"|foo\\ bar" dfadf| 109"foo\\\x bar\" dfadf"|foo\\x bar" dfadf| 110"foo\x bar\" dfadf"|foo\x bar" dfadf| 111\'|'| 112'foo\ bar'|foo\ bar| 113'foo\\ bar'|foo\\ bar| 114"foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df| 115\"foo|"foo| 116\"foo\x|"foox| 117"foo\x"|foo\x| 118"foo\ "|foo\ | 119foo\ xx|foo xx| 120foo\ x\x|foo xx| 121foo\ x\x\"|foo xx"| 122"foo\ x\x"|foo\ x\x| 123"foo\ x\x\\"|foo\ x\x\| 124"foo\ x\x\\""foobar"|foo\ x\x\foobar| 125"foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar| 126"foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar| 127"foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't| 128"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\| 129'foo\ bar'|foo\ bar| 130'foo\\ bar'|foo\\ bar| 131foo\ bar|foo bar| 132foo#bar\nbaz|foo|baz| 133:-) ;-)|:-)|;-)| 134áéíóú|áéíóú| 135""" 136 137class ShlexTest(unittest.TestCase): 138 def setUp(self): 139 self.data = [x.split("|")[:-1] 140 for x in data.splitlines()] 141 self.posix_data = [x.split("|")[:-1] 142 for x in posix_data.splitlines()] 143 for item in self.data: 144 item[0] = item[0].replace(r"\n", "\n") 145 for item in self.posix_data: 146 item[0] = item[0].replace(r"\n", "\n") 147 148 def splitTest(self, data, comments): 149 for i in range(len(data)): 150 l = shlex.split(data[i][0], comments=comments) 151 self.assertEqual(l, data[i][1:], 152 "%s: %s != %s" % 153 (data[i][0], l, data[i][1:])) 154 155 def oldSplit(self, s): 156 ret = [] 157 lex = shlex.shlex(io.StringIO(s)) 158 tok = lex.get_token() 159 while tok: 160 ret.append(tok) 161 tok = lex.get_token() 162 return ret 163 164 def testSplitNone(self): 165 with self.assertRaises(ValueError): 166 shlex.split(None) 167 168 def testSplitPosix(self): 169 """Test data splitting with posix parser""" 170 self.splitTest(self.posix_data, comments=True) 171 172 def testCompat(self): 173 """Test compatibility interface""" 174 for i in range(len(self.data)): 175 l = self.oldSplit(self.data[i][0]) 176 self.assertEqual(l, self.data[i][1:], 177 "%s: %s != %s" % 178 (self.data[i][0], l, self.data[i][1:])) 179 180 def testSyntaxSplitAmpersandAndPipe(self): 181 """Test handling of syntax splitting of &, |""" 182 # Could take these forms: &&, &, |&, ;&, ;;& 183 # of course, the same applies to | and || 184 # these should all parse to the same output 185 for delimiter in ('&&', '&', '|&', ';&', ';;&', 186 '||', '|', '&|', ';|', ';;|'): 187 src = ['echo hi %s echo bye' % delimiter, 188 'echo hi%secho bye' % delimiter] 189 ref = ['echo', 'hi', delimiter, 'echo', 'bye'] 190 for ss, ws in itertools.product(src, (False, True)): 191 s = shlex.shlex(ss, punctuation_chars=True) 192 s.whitespace_split = ws 193 result = list(s) 194 self.assertEqual(ref, result, 195 "While splitting '%s' [ws=%s]" % (ss, ws)) 196 197 def testSyntaxSplitSemicolon(self): 198 """Test handling of syntax splitting of ;""" 199 # Could take these forms: ;, ;;, ;&, ;;& 200 # these should all parse to the same output 201 for delimiter in (';', ';;', ';&', ';;&'): 202 src = ['echo hi %s echo bye' % delimiter, 203 'echo hi%s echo bye' % delimiter, 204 'echo hi%secho bye' % delimiter] 205 ref = ['echo', 'hi', delimiter, 'echo', 'bye'] 206 for ss, ws in itertools.product(src, (False, True)): 207 s = shlex.shlex(ss, punctuation_chars=True) 208 s.whitespace_split = ws 209 result = list(s) 210 self.assertEqual(ref, result, 211 "While splitting '%s' [ws=%s]" % (ss, ws)) 212 213 def testSyntaxSplitRedirect(self): 214 """Test handling of syntax splitting of >""" 215 # of course, the same applies to <, | 216 # these should all parse to the same output 217 for delimiter in ('<', '|'): 218 src = ['echo hi %s out' % delimiter, 219 'echo hi%s out' % delimiter, 220 'echo hi%sout' % delimiter] 221 ref = ['echo', 'hi', delimiter, 'out'] 222 for ss, ws in itertools.product(src, (False, True)): 223 s = shlex.shlex(ss, punctuation_chars=True) 224 result = list(s) 225 self.assertEqual(ref, result, 226 "While splitting '%s' [ws=%s]" % (ss, ws)) 227 228 def testSyntaxSplitParen(self): 229 """Test handling of syntax splitting of ()""" 230 # these should all parse to the same output 231 src = ['( echo hi )', 232 '(echo hi)'] 233 ref = ['(', 'echo', 'hi', ')'] 234 for ss, ws in itertools.product(src, (False, True)): 235 s = shlex.shlex(ss, punctuation_chars=True) 236 s.whitespace_split = ws 237 result = list(s) 238 self.assertEqual(ref, result, 239 "While splitting '%s' [ws=%s]" % (ss, ws)) 240 241 def testSyntaxSplitCustom(self): 242 """Test handling of syntax splitting with custom chars""" 243 ss = "~/a&&b-c --color=auto||d *.py?" 244 ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?'] 245 s = shlex.shlex(ss, punctuation_chars="|") 246 result = list(s) 247 self.assertEqual(ref, result, "While splitting '%s' [ws=False]" % ss) 248 ref = ['~/a&&b-c', '--color=auto', '||', 'd', '*.py?'] 249 s = shlex.shlex(ss, punctuation_chars="|") 250 s.whitespace_split = True 251 result = list(s) 252 self.assertEqual(ref, result, "While splitting '%s' [ws=True]" % ss) 253 254 def testTokenTypes(self): 255 """Test that tokens are split with types as expected.""" 256 for source, expected in ( 257 ('a && b || c', 258 [('a', 'a'), ('&&', 'c'), ('b', 'a'), 259 ('||', 'c'), ('c', 'a')]), 260 ): 261 s = shlex.shlex(source, punctuation_chars=True) 262 observed = [] 263 while True: 264 t = s.get_token() 265 if t == s.eof: 266 break 267 if t[0] in s.punctuation_chars: 268 tt = 'c' 269 else: 270 tt = 'a' 271 observed.append((t, tt)) 272 self.assertEqual(observed, expected) 273 274 def testPunctuationInWordChars(self): 275 """Test that any punctuation chars are removed from wordchars""" 276 s = shlex.shlex('a_b__c', punctuation_chars='_') 277 self.assertNotIn('_', s.wordchars) 278 self.assertEqual(list(s), ['a', '_', 'b', '__', 'c']) 279 280 def testPunctuationWithWhitespaceSplit(self): 281 """Test that with whitespace_split, behaviour is as expected""" 282 s = shlex.shlex('a && b || c', punctuation_chars='&') 283 # whitespace_split is False, so splitting will be based on 284 # punctuation_chars 285 self.assertEqual(list(s), ['a', '&&', 'b', '|', '|', 'c']) 286 s = shlex.shlex('a && b || c', punctuation_chars='&') 287 s.whitespace_split = True 288 # whitespace_split is True, so splitting will be based on 289 # white space 290 self.assertEqual(list(s), ['a', '&&', 'b', '||', 'c']) 291 292 def testPunctuationWithPosix(self): 293 """Test that punctuation_chars and posix behave correctly together.""" 294 # see Issue #29132 295 s = shlex.shlex('f >"abc"', posix=True, punctuation_chars=True) 296 self.assertEqual(list(s), ['f', '>', 'abc']) 297 s = shlex.shlex('f >\\"abc\\"', posix=True, punctuation_chars=True) 298 self.assertEqual(list(s), ['f', '>', '"abc"']) 299 300 def testEmptyStringHandling(self): 301 """Test that parsing of empty strings is correctly handled.""" 302 # see Issue #21999 303 expected = ['', ')', 'abc'] 304 for punct in (False, True): 305 s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct) 306 slist = list(s) 307 self.assertEqual(slist, expected) 308 expected = ["''", ')', 'abc'] 309 s = shlex.shlex("'')abc", punctuation_chars=True) 310 self.assertEqual(list(s), expected) 311 312 def testUnicodeHandling(self): 313 """Test punctuation_chars and whitespace_split handle unicode.""" 314 ss = "\u2119\u01b4\u2602\u210c\u00f8\u1f24" 315 # Should be parsed as one complete token (whitespace_split=True). 316 ref = ['\u2119\u01b4\u2602\u210c\u00f8\u1f24'] 317 s = shlex.shlex(ss, punctuation_chars=True) 318 s.whitespace_split = True 319 self.assertEqual(list(s), ref) 320 # Without whitespace_split, uses wordchars and splits on all. 321 ref = ['\u2119', '\u01b4', '\u2602', '\u210c', '\u00f8', '\u1f24'] 322 s = shlex.shlex(ss, punctuation_chars=True) 323 self.assertEqual(list(s), ref) 324 325 def testQuote(self): 326 safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./' 327 unicode_sample = '\xe9\xe0\xdf' # e + acute accent, a + grave, sharp s 328 unsafe = '"`$\\!' + unicode_sample 329 330 self.assertEqual(shlex.quote(''), "''") 331 self.assertEqual(shlex.quote(safeunquoted), safeunquoted) 332 self.assertEqual(shlex.quote('test file name'), "'test file name'") 333 for u in unsafe: 334 self.assertEqual(shlex.quote('test%sname' % u), 335 "'test%sname'" % u) 336 for u in unsafe: 337 self.assertEqual(shlex.quote("test%s'name'" % u), 338 "'test%s'\"'\"'name'\"'\"''" % u) 339 340 def testJoin(self): 341 for split_command, command in [ 342 (['a ', 'b'], "'a ' b"), 343 (['a', ' b'], "a ' b'"), 344 (['a', ' ', 'b'], "a ' ' b"), 345 (['"a', 'b"'], '\'"a\' \'b"\''), 346 ]: 347 with self.subTest(command=command): 348 joined = shlex.join(split_command) 349 self.assertEqual(joined, command) 350 351 def testJoinRoundtrip(self): 352 all_data = self.data + self.posix_data 353 for command, *split_command in all_data: 354 with self.subTest(command=command): 355 joined = shlex.join(split_command) 356 resplit = shlex.split(joined) 357 self.assertEqual(split_command, resplit) 358 359 def testPunctuationCharsReadOnly(self): 360 punctuation_chars = "/|$%^" 361 shlex_instance = shlex.shlex(punctuation_chars=punctuation_chars) 362 self.assertEqual(shlex_instance.punctuation_chars, punctuation_chars) 363 with self.assertRaises(AttributeError): 364 shlex_instance.punctuation_chars = False 365 366 367# Allow this test to be used with old shlex.py 368if not getattr(shlex, "split", None): 369 for methname in dir(ShlexTest): 370 if methname.startswith("test") and methname != "testCompat": 371 delattr(ShlexTest, methname) 372 373if __name__ == "__main__": 374 unittest.main() 375