1import io 2import itertools 3import shlex 4import string 5import unittest 6from unittest import mock 7 8 9# The original test data set was from shellwords, by Hartmut Goebel. 10 11data = r"""x|x| 12foo bar|foo|bar| 13 foo bar|foo|bar| 14 foo bar |foo|bar| 15foo bar bla fasel|foo|bar|bla|fasel| 16x y z xxxx|x|y|z|xxxx| 17\x bar|\|x|bar| 18\ x bar|\|x|bar| 19\ bar|\|bar| 20foo \x bar|foo|\|x|bar| 21foo \ x bar|foo|\|x|bar| 22foo \ bar|foo|\|bar| 23foo "bar" bla|foo|"bar"|bla| 24"foo" "bar" "bla"|"foo"|"bar"|"bla"| 25"foo" bar "bla"|"foo"|bar|"bla"| 26"foo" bar bla|"foo"|bar|bla| 27foo 'bar' bla|foo|'bar'|bla| 28'foo' 'bar' 'bla'|'foo'|'bar'|'bla'| 29'foo' bar 'bla'|'foo'|bar|'bla'| 30'foo' bar bla|'foo'|bar|bla| 31blurb foo"bar"bar"fasel" baz|blurb|foo"bar"bar"fasel"|baz| 32blurb foo'bar'bar'fasel' baz|blurb|foo'bar'bar'fasel'|baz| 33""|""| 34''|''| 35foo "" bar|foo|""|bar| 36foo '' bar|foo|''|bar| 37foo "" "" "" bar|foo|""|""|""|bar| 38foo '' '' '' bar|foo|''|''|''|bar| 39\""|\|""| 40"\"|"\"| 41"foo\ bar"|"foo\ bar"| 42"foo\\ bar"|"foo\\ bar"| 43"foo\\ bar\"|"foo\\ bar\"| 44"foo\\" bar\""|"foo\\"|bar|\|""| 45"foo\\ bar\" dfadf"|"foo\\ bar\"|dfadf"| 46"foo\\\ bar\" dfadf"|"foo\\\ bar\"|dfadf"| 47"foo\\\x bar\" dfadf"|"foo\\\x bar\"|dfadf"| 48"foo\x bar\" dfadf"|"foo\x bar\"|dfadf"| 49\''|\|''| 50'foo\ bar'|'foo\ bar'| 51'foo\\ bar'|'foo\\ bar'| 52"foo\\\x bar\" df'a\ 'df'|"foo\\\x bar\"|df'a|\|'df'| 53\"foo"|\|"foo"| 54\"foo"\x|\|"foo"|\|x| 55"foo\x"|"foo\x"| 56"foo\ "|"foo\ "| 57foo\ xx|foo|\|xx| 58foo\ x\x|foo|\|x|\|x| 59foo\ x\x\""|foo|\|x|\|x|\|""| 60"foo\ x\x"|"foo\ x\x"| 61"foo\ x\x\\"|"foo\ x\x\\"| 62"foo\ x\x\\""foobar"|"foo\ x\x\\"|"foobar"| 63"foo\ x\x\\"\''"foobar"|"foo\ x\x\\"|\|''|"foobar"| 64"foo\ x\x\\"\'"fo'obar"|"foo\ x\x\\"|\|'"fo'|obar"| 65"foo\ x\x\\"\'"fo'obar" 'don'\''t'|"foo\ x\x\\"|\|'"fo'|obar"|'don'|\|''|t'| 66'foo\ bar'|'foo\ bar'| 67'foo\\ bar'|'foo\\ bar'| 68foo\ bar|foo|\|bar| 69foo#bar\nbaz|foobaz| 70:-) ;-)|:|-|)|;|-|)| 71áéíóú|á|é|í|ó|ú| 72""" 73 74posix_data = r"""x|x| 75foo bar|foo|bar| 76 foo bar|foo|bar| 77 foo bar |foo|bar| 78foo bar bla fasel|foo|bar|bla|fasel| 79x y z xxxx|x|y|z|xxxx| 80\x bar|x|bar| 81\ x bar| x|bar| 82\ bar| bar| 83foo \x bar|foo|x|bar| 84foo \ x bar|foo| x|bar| 85foo \ bar|foo| bar| 86foo "bar" bla|foo|bar|bla| 87"foo" "bar" "bla"|foo|bar|bla| 88"foo" bar "bla"|foo|bar|bla| 89"foo" bar bla|foo|bar|bla| 90foo 'bar' bla|foo|bar|bla| 91'foo' 'bar' 'bla'|foo|bar|bla| 92'foo' bar 'bla'|foo|bar|bla| 93'foo' bar bla|foo|bar|bla| 94blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz| 95blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz| 96""|| 97''|| 98foo "" bar|foo||bar| 99foo '' bar|foo||bar| 100foo "" "" "" bar|foo||||bar| 101foo '' '' '' bar|foo||||bar| 102\"|"| 103"\""|"| 104"foo\ bar"|foo\ bar| 105"foo\\ bar"|foo\ bar| 106"foo\\ bar\""|foo\ bar"| 107"foo\\" bar\"|foo\|bar"| 108"foo\\ bar\" dfadf"|foo\ bar" dfadf| 109"foo\\\ bar\" dfadf"|foo\\ bar" dfadf| 110"foo\\\x bar\" dfadf"|foo\\x bar" dfadf| 111"foo\x bar\" dfadf"|foo\x bar" dfadf| 112\'|'| 113'foo\ bar'|foo\ bar| 114'foo\\ bar'|foo\\ bar| 115"foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df| 116\"foo|"foo| 117\"foo\x|"foox| 118"foo\x"|foo\x| 119"foo\ "|foo\ | 120foo\ xx|foo xx| 121foo\ x\x|foo xx| 122foo\ x\x\"|foo xx"| 123"foo\ x\x"|foo\ x\x| 124"foo\ x\x\\"|foo\ x\x\| 125"foo\ x\x\\""foobar"|foo\ x\x\foobar| 126"foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar| 127"foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar| 128"foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't| 129"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\| 130'foo\ bar'|foo\ bar| 131'foo\\ bar'|foo\\ bar| 132foo\ bar|foo bar| 133foo#bar\nbaz|foo|baz| 134:-) ;-)|:-)|;-)| 135áéíóú|áéíóú| 136""" 137 138class ShlexTest(unittest.TestCase): 139 def setUp(self): 140 self.data = [x.split("|")[:-1] 141 for x in data.splitlines()] 142 self.posix_data = [x.split("|")[:-1] 143 for x in posix_data.splitlines()] 144 for item in self.data: 145 item[0] = item[0].replace(r"\n", "\n") 146 for item in self.posix_data: 147 item[0] = item[0].replace(r"\n", "\n") 148 149 def splitTest(self, data, comments): 150 for i in range(len(data)): 151 l = shlex.split(data[i][0], comments=comments) 152 self.assertEqual(l, data[i][1:], 153 "%s: %s != %s" % 154 (data[i][0], l, data[i][1:])) 155 156 def oldSplit(self, s): 157 ret = [] 158 lex = shlex.shlex(io.StringIO(s)) 159 tok = lex.get_token() 160 while tok: 161 ret.append(tok) 162 tok = lex.get_token() 163 return ret 164 165 @mock.patch('sys.stdin', io.StringIO()) 166 def testSplitNoneDeprecation(self): 167 with self.assertWarns(DeprecationWarning): 168 shlex.split(None) 169 170 def testSplitPosix(self): 171 """Test data splitting with posix parser""" 172 self.splitTest(self.posix_data, comments=True) 173 174 def testCompat(self): 175 """Test compatibility interface""" 176 for i in range(len(self.data)): 177 l = self.oldSplit(self.data[i][0]) 178 self.assertEqual(l, self.data[i][1:], 179 "%s: %s != %s" % 180 (self.data[i][0], l, self.data[i][1:])) 181 182 def testSyntaxSplitAmpersandAndPipe(self): 183 """Test handling of syntax splitting of &, |""" 184 # Could take these forms: &&, &, |&, ;&, ;;& 185 # of course, the same applies to | and || 186 # these should all parse to the same output 187 for delimiter in ('&&', '&', '|&', ';&', ';;&', 188 '||', '|', '&|', ';|', ';;|'): 189 src = ['echo hi %s echo bye' % delimiter, 190 'echo hi%secho bye' % delimiter] 191 ref = ['echo', 'hi', delimiter, 'echo', 'bye'] 192 for ss, ws in itertools.product(src, (False, True)): 193 s = shlex.shlex(ss, punctuation_chars=True) 194 s.whitespace_split = ws 195 result = list(s) 196 self.assertEqual(ref, result, 197 "While splitting '%s' [ws=%s]" % (ss, ws)) 198 199 def testSyntaxSplitSemicolon(self): 200 """Test handling of syntax splitting of ;""" 201 # Could take these forms: ;, ;;, ;&, ;;& 202 # these should all parse to the same output 203 for delimiter in (';', ';;', ';&', ';;&'): 204 src = ['echo hi %s echo bye' % delimiter, 205 'echo hi%s echo bye' % delimiter, 206 'echo hi%secho bye' % delimiter] 207 ref = ['echo', 'hi', delimiter, 'echo', 'bye'] 208 for ss, ws in itertools.product(src, (False, True)): 209 s = shlex.shlex(ss, punctuation_chars=True) 210 s.whitespace_split = ws 211 result = list(s) 212 self.assertEqual(ref, result, 213 "While splitting '%s' [ws=%s]" % (ss, ws)) 214 215 def testSyntaxSplitRedirect(self): 216 """Test handling of syntax splitting of >""" 217 # of course, the same applies to <, | 218 # these should all parse to the same output 219 for delimiter in ('<', '|'): 220 src = ['echo hi %s out' % delimiter, 221 'echo hi%s out' % delimiter, 222 'echo hi%sout' % delimiter] 223 ref = ['echo', 'hi', delimiter, 'out'] 224 for ss, ws in itertools.product(src, (False, True)): 225 s = shlex.shlex(ss, punctuation_chars=True) 226 result = list(s) 227 self.assertEqual(ref, result, 228 "While splitting '%s' [ws=%s]" % (ss, ws)) 229 230 def testSyntaxSplitParen(self): 231 """Test handling of syntax splitting of ()""" 232 # these should all parse to the same output 233 src = ['( echo hi )', 234 '(echo hi)'] 235 ref = ['(', 'echo', 'hi', ')'] 236 for ss, ws in itertools.product(src, (False, True)): 237 s = shlex.shlex(ss, punctuation_chars=True) 238 s.whitespace_split = ws 239 result = list(s) 240 self.assertEqual(ref, result, 241 "While splitting '%s' [ws=%s]" % (ss, ws)) 242 243 def testSyntaxSplitCustom(self): 244 """Test handling of syntax splitting with custom chars""" 245 ss = "~/a&&b-c --color=auto||d *.py?" 246 ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?'] 247 s = shlex.shlex(ss, punctuation_chars="|") 248 result = list(s) 249 self.assertEqual(ref, result, "While splitting '%s' [ws=False]" % ss) 250 ref = ['~/a&&b-c', '--color=auto', '||', 'd', '*.py?'] 251 s = shlex.shlex(ss, punctuation_chars="|") 252 s.whitespace_split = True 253 result = list(s) 254 self.assertEqual(ref, result, "While splitting '%s' [ws=True]" % ss) 255 256 def testTokenTypes(self): 257 """Test that tokens are split with types as expected.""" 258 for source, expected in ( 259 ('a && b || c', 260 [('a', 'a'), ('&&', 'c'), ('b', 'a'), 261 ('||', 'c'), ('c', 'a')]), 262 ): 263 s = shlex.shlex(source, punctuation_chars=True) 264 observed = [] 265 while True: 266 t = s.get_token() 267 if t == s.eof: 268 break 269 if t[0] in s.punctuation_chars: 270 tt = 'c' 271 else: 272 tt = 'a' 273 observed.append((t, tt)) 274 self.assertEqual(observed, expected) 275 276 def testPunctuationInWordChars(self): 277 """Test that any punctuation chars are removed from wordchars""" 278 s = shlex.shlex('a_b__c', punctuation_chars='_') 279 self.assertNotIn('_', s.wordchars) 280 self.assertEqual(list(s), ['a', '_', 'b', '__', 'c']) 281 282 def testPunctuationWithWhitespaceSplit(self): 283 """Test that with whitespace_split, behaviour is as expected""" 284 s = shlex.shlex('a && b || c', punctuation_chars='&') 285 # whitespace_split is False, so splitting will be based on 286 # punctuation_chars 287 self.assertEqual(list(s), ['a', '&&', 'b', '|', '|', 'c']) 288 s = shlex.shlex('a && b || c', punctuation_chars='&') 289 s.whitespace_split = True 290 # whitespace_split is True, so splitting will be based on 291 # white space 292 self.assertEqual(list(s), ['a', '&&', 'b', '||', 'c']) 293 294 def testPunctuationWithPosix(self): 295 """Test that punctuation_chars and posix behave correctly together.""" 296 # see Issue #29132 297 s = shlex.shlex('f >"abc"', posix=True, punctuation_chars=True) 298 self.assertEqual(list(s), ['f', '>', 'abc']) 299 s = shlex.shlex('f >\\"abc\\"', posix=True, punctuation_chars=True) 300 self.assertEqual(list(s), ['f', '>', '"abc"']) 301 302 def testEmptyStringHandling(self): 303 """Test that parsing of empty strings is correctly handled.""" 304 # see Issue #21999 305 expected = ['', ')', 'abc'] 306 for punct in (False, True): 307 s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct) 308 slist = list(s) 309 self.assertEqual(slist, expected) 310 expected = ["''", ')', 'abc'] 311 s = shlex.shlex("'')abc", punctuation_chars=True) 312 self.assertEqual(list(s), expected) 313 314 def testUnicodeHandling(self): 315 """Test punctuation_chars and whitespace_split handle unicode.""" 316 ss = "\u2119\u01b4\u2602\u210c\u00f8\u1f24" 317 # Should be parsed as one complete token (whitespace_split=True). 318 ref = ['\u2119\u01b4\u2602\u210c\u00f8\u1f24'] 319 s = shlex.shlex(ss, punctuation_chars=True) 320 s.whitespace_split = True 321 self.assertEqual(list(s), ref) 322 # Without whitespace_split, uses wordchars and splits on all. 323 ref = ['\u2119', '\u01b4', '\u2602', '\u210c', '\u00f8', '\u1f24'] 324 s = shlex.shlex(ss, punctuation_chars=True) 325 self.assertEqual(list(s), ref) 326 327 def testQuote(self): 328 safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./' 329 unicode_sample = '\xe9\xe0\xdf' # e + acute accent, a + grave, sharp s 330 unsafe = '"`$\\!' + unicode_sample 331 332 self.assertEqual(shlex.quote(''), "''") 333 self.assertEqual(shlex.quote(safeunquoted), safeunquoted) 334 self.assertEqual(shlex.quote('test file name'), "'test file name'") 335 for u in unsafe: 336 self.assertEqual(shlex.quote('test%sname' % u), 337 "'test%sname'" % u) 338 for u in unsafe: 339 self.assertEqual(shlex.quote("test%s'name'" % u), 340 "'test%s'\"'\"'name'\"'\"''" % u) 341 342 def testJoin(self): 343 for split_command, command in [ 344 (['a ', 'b'], "'a ' b"), 345 (['a', ' b'], "a ' b'"), 346 (['a', ' ', 'b'], "a ' ' b"), 347 (['"a', 'b"'], '\'"a\' \'b"\''), 348 ]: 349 with self.subTest(command=command): 350 joined = shlex.join(split_command) 351 self.assertEqual(joined, command) 352 353 def testJoinRoundtrip(self): 354 all_data = self.data + self.posix_data 355 for command, *split_command in all_data: 356 with self.subTest(command=command): 357 joined = shlex.join(split_command) 358 resplit = shlex.split(joined) 359 self.assertEqual(split_command, resplit) 360 361 def testPunctuationCharsReadOnly(self): 362 punctuation_chars = "/|$%^" 363 shlex_instance = shlex.shlex(punctuation_chars=punctuation_chars) 364 self.assertEqual(shlex_instance.punctuation_chars, punctuation_chars) 365 with self.assertRaises(AttributeError): 366 shlex_instance.punctuation_chars = False 367 368 369# Allow this test to be used with old shlex.py 370if not getattr(shlex, "split", None): 371 for methname in dir(ShlexTest): 372 if methname.startswith("test") and methname != "testCompat": 373 delattr(ShlexTest, methname) 374 375if __name__ == "__main__": 376 unittest.main() 377