• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import io
2import itertools
3import shlex
4import string
5import unittest
6
7
8# The original test data set was from shellwords, by Hartmut Goebel.
9
10data = r"""x|x|
11foo bar|foo|bar|
12 foo bar|foo|bar|
13 foo bar |foo|bar|
14foo   bar    bla     fasel|foo|bar|bla|fasel|
15x y  z              xxxx|x|y|z|xxxx|
16\x bar|\|x|bar|
17\ x bar|\|x|bar|
18\ bar|\|bar|
19foo \x bar|foo|\|x|bar|
20foo \ x bar|foo|\|x|bar|
21foo \ bar|foo|\|bar|
22foo "bar" bla|foo|"bar"|bla|
23"foo" "bar" "bla"|"foo"|"bar"|"bla"|
24"foo" bar "bla"|"foo"|bar|"bla"|
25"foo" bar bla|"foo"|bar|bla|
26foo 'bar' bla|foo|'bar'|bla|
27'foo' 'bar' 'bla'|'foo'|'bar'|'bla'|
28'foo' bar 'bla'|'foo'|bar|'bla'|
29'foo' bar bla|'foo'|bar|bla|
30blurb foo"bar"bar"fasel" baz|blurb|foo"bar"bar"fasel"|baz|
31blurb foo'bar'bar'fasel' baz|blurb|foo'bar'bar'fasel'|baz|
32""|""|
33''|''|
34foo "" bar|foo|""|bar|
35foo '' bar|foo|''|bar|
36foo "" "" "" bar|foo|""|""|""|bar|
37foo '' '' '' bar|foo|''|''|''|bar|
38\""|\|""|
39"\"|"\"|
40"foo\ bar"|"foo\ bar"|
41"foo\\ bar"|"foo\\ bar"|
42"foo\\ bar\"|"foo\\ bar\"|
43"foo\\" bar\""|"foo\\"|bar|\|""|
44"foo\\ bar\" dfadf"|"foo\\ bar\"|dfadf"|
45"foo\\\ bar\" dfadf"|"foo\\\ bar\"|dfadf"|
46"foo\\\x bar\" dfadf"|"foo\\\x bar\"|dfadf"|
47"foo\x bar\" dfadf"|"foo\x bar\"|dfadf"|
48\''|\|''|
49'foo\ bar'|'foo\ bar'|
50'foo\\ bar'|'foo\\ bar'|
51"foo\\\x bar\" df'a\ 'df'|"foo\\\x bar\"|df'a|\|'df'|
52\"foo"|\|"foo"|
53\"foo"\x|\|"foo"|\|x|
54"foo\x"|"foo\x"|
55"foo\ "|"foo\ "|
56foo\ xx|foo|\|xx|
57foo\ x\x|foo|\|x|\|x|
58foo\ x\x\""|foo|\|x|\|x|\|""|
59"foo\ x\x"|"foo\ x\x"|
60"foo\ x\x\\"|"foo\ x\x\\"|
61"foo\ x\x\\""foobar"|"foo\ x\x\\"|"foobar"|
62"foo\ x\x\\"\''"foobar"|"foo\ x\x\\"|\|''|"foobar"|
63"foo\ x\x\\"\'"fo'obar"|"foo\ x\x\\"|\|'"fo'|obar"|
64"foo\ x\x\\"\'"fo'obar" 'don'\''t'|"foo\ x\x\\"|\|'"fo'|obar"|'don'|\|''|t'|
65'foo\ bar'|'foo\ bar'|
66'foo\\ bar'|'foo\\ bar'|
67foo\ bar|foo|\|bar|
68foo#bar\nbaz|foobaz|
69:-) ;-)|:|-|)|;|-|)|
70áéíóú|á|é|í|ó|ú|
71"""
72
73posix_data = r"""x|x|
74foo bar|foo|bar|
75 foo bar|foo|bar|
76 foo bar |foo|bar|
77foo   bar    bla     fasel|foo|bar|bla|fasel|
78x y  z              xxxx|x|y|z|xxxx|
79\x bar|x|bar|
80\ x bar| x|bar|
81\ bar| bar|
82foo \x bar|foo|x|bar|
83foo \ x bar|foo| x|bar|
84foo \ bar|foo| bar|
85foo "bar" bla|foo|bar|bla|
86"foo" "bar" "bla"|foo|bar|bla|
87"foo" bar "bla"|foo|bar|bla|
88"foo" bar bla|foo|bar|bla|
89foo 'bar' bla|foo|bar|bla|
90'foo' 'bar' 'bla'|foo|bar|bla|
91'foo' bar 'bla'|foo|bar|bla|
92'foo' bar bla|foo|bar|bla|
93blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz|
94blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz|
95""||
96''||
97foo "" bar|foo||bar|
98foo '' bar|foo||bar|
99foo "" "" "" bar|foo||||bar|
100foo '' '' '' bar|foo||||bar|
101\"|"|
102"\""|"|
103"foo\ bar"|foo\ bar|
104"foo\\ bar"|foo\ bar|
105"foo\\ bar\""|foo\ bar"|
106"foo\\" bar\"|foo\|bar"|
107"foo\\ bar\" dfadf"|foo\ bar" dfadf|
108"foo\\\ bar\" dfadf"|foo\\ bar" dfadf|
109"foo\\\x bar\" dfadf"|foo\\x bar" dfadf|
110"foo\x bar\" dfadf"|foo\x bar" dfadf|
111\'|'|
112'foo\ bar'|foo\ bar|
113'foo\\ bar'|foo\\ bar|
114"foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df|
115\"foo|"foo|
116\"foo\x|"foox|
117"foo\x"|foo\x|
118"foo\ "|foo\ |
119foo\ xx|foo xx|
120foo\ x\x|foo xx|
121foo\ x\x\"|foo xx"|
122"foo\ x\x"|foo\ x\x|
123"foo\ x\x\\"|foo\ x\x\|
124"foo\ x\x\\""foobar"|foo\ x\x\foobar|
125"foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar|
126"foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar|
127"foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't|
128"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\|
129'foo\ bar'|foo\ bar|
130'foo\\ bar'|foo\\ bar|
131foo\ bar|foo bar|
132foo#bar\nbaz|foo|baz|
133:-) ;-)|:-)|;-)|
134áéíóú|áéíóú|
135"""
136
137class ShlexTest(unittest.TestCase):
138    def setUp(self):
139        self.data = [x.split("|")[:-1]
140                     for x in data.splitlines()]
141        self.posix_data = [x.split("|")[:-1]
142                           for x in posix_data.splitlines()]
143        for item in self.data:
144            item[0] = item[0].replace(r"\n", "\n")
145        for item in self.posix_data:
146            item[0] = item[0].replace(r"\n", "\n")
147
148    def splitTest(self, data, comments):
149        for i in range(len(data)):
150            l = shlex.split(data[i][0], comments=comments)
151            self.assertEqual(l, data[i][1:],
152                             "%s: %s != %s" %
153                             (data[i][0], l, data[i][1:]))
154
155    def oldSplit(self, s):
156        ret = []
157        lex = shlex.shlex(io.StringIO(s))
158        tok = lex.get_token()
159        while tok:
160            ret.append(tok)
161            tok = lex.get_token()
162        return ret
163
164    def testSplitNone(self):
165        with self.assertRaises(ValueError):
166            shlex.split(None)
167
168    def testSplitPosix(self):
169        """Test data splitting with posix parser"""
170        self.splitTest(self.posix_data, comments=True)
171
172    def testCompat(self):
173        """Test compatibility interface"""
174        for i in range(len(self.data)):
175            l = self.oldSplit(self.data[i][0])
176            self.assertEqual(l, self.data[i][1:],
177                             "%s: %s != %s" %
178                             (self.data[i][0], l, self.data[i][1:]))
179
180    def testSyntaxSplitAmpersandAndPipe(self):
181        """Test handling of syntax splitting of &, |"""
182        # Could take these forms: &&, &, |&, ;&, ;;&
183        # of course, the same applies to | and ||
184        # these should all parse to the same output
185        for delimiter in ('&&', '&', '|&', ';&', ';;&',
186                          '||', '|', '&|', ';|', ';;|'):
187            src = ['echo hi %s echo bye' % delimiter,
188                   'echo hi%secho bye' % delimiter]
189            ref = ['echo', 'hi', delimiter, 'echo', 'bye']
190            for ss, ws in itertools.product(src, (False, True)):
191                s = shlex.shlex(ss, punctuation_chars=True)
192                s.whitespace_split = ws
193                result = list(s)
194                self.assertEqual(ref, result,
195                                 "While splitting '%s' [ws=%s]" % (ss, ws))
196
197    def testSyntaxSplitSemicolon(self):
198        """Test handling of syntax splitting of ;"""
199        # Could take these forms: ;, ;;, ;&, ;;&
200        # these should all parse to the same output
201        for delimiter in (';', ';;', ';&', ';;&'):
202            src = ['echo hi %s echo bye' % delimiter,
203                   'echo hi%s echo bye' % delimiter,
204                   'echo hi%secho bye' % delimiter]
205            ref = ['echo', 'hi', delimiter, 'echo', 'bye']
206            for ss, ws in itertools.product(src, (False, True)):
207                s = shlex.shlex(ss, punctuation_chars=True)
208                s.whitespace_split = ws
209                result = list(s)
210                self.assertEqual(ref, result,
211                                 "While splitting '%s' [ws=%s]" % (ss, ws))
212
213    def testSyntaxSplitRedirect(self):
214        """Test handling of syntax splitting of >"""
215        # of course, the same applies to <, |
216        # these should all parse to the same output
217        for delimiter in ('<', '|'):
218            src = ['echo hi %s out' % delimiter,
219                   'echo hi%s out' % delimiter,
220                   'echo hi%sout' % delimiter]
221            ref = ['echo', 'hi', delimiter, 'out']
222            for ss, ws in itertools.product(src, (False, True)):
223                s = shlex.shlex(ss, punctuation_chars=True)
224                result = list(s)
225                self.assertEqual(ref, result,
226                                 "While splitting '%s' [ws=%s]" % (ss, ws))
227
228    def testSyntaxSplitParen(self):
229        """Test handling of syntax splitting of ()"""
230        # these should all parse to the same output
231        src = ['( echo hi )',
232               '(echo hi)']
233        ref = ['(', 'echo', 'hi', ')']
234        for ss, ws in itertools.product(src, (False, True)):
235            s = shlex.shlex(ss, punctuation_chars=True)
236            s.whitespace_split = ws
237            result = list(s)
238            self.assertEqual(ref, result,
239                             "While splitting '%s' [ws=%s]" % (ss, ws))
240
241    def testSyntaxSplitCustom(self):
242        """Test handling of syntax splitting with custom chars"""
243        ss = "~/a&&b-c --color=auto||d *.py?"
244        ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?']
245        s = shlex.shlex(ss, punctuation_chars="|")
246        result = list(s)
247        self.assertEqual(ref, result, "While splitting '%s' [ws=False]" % ss)
248        ref = ['~/a&&b-c', '--color=auto', '||', 'd', '*.py?']
249        s = shlex.shlex(ss, punctuation_chars="|")
250        s.whitespace_split = True
251        result = list(s)
252        self.assertEqual(ref, result, "While splitting '%s' [ws=True]" % ss)
253
254    def testTokenTypes(self):
255        """Test that tokens are split with types as expected."""
256        for source, expected in (
257                                ('a && b || c',
258                                 [('a', 'a'), ('&&', 'c'), ('b', 'a'),
259                                  ('||', 'c'), ('c', 'a')]),
260                              ):
261            s = shlex.shlex(source, punctuation_chars=True)
262            observed = []
263            while True:
264                t = s.get_token()
265                if t == s.eof:
266                    break
267                if t[0] in s.punctuation_chars:
268                    tt = 'c'
269                else:
270                    tt = 'a'
271                observed.append((t, tt))
272            self.assertEqual(observed, expected)
273
274    def testPunctuationInWordChars(self):
275        """Test that any punctuation chars are removed from wordchars"""
276        s = shlex.shlex('a_b__c', punctuation_chars='_')
277        self.assertNotIn('_', s.wordchars)
278        self.assertEqual(list(s), ['a', '_', 'b', '__', 'c'])
279
280    def testPunctuationWithWhitespaceSplit(self):
281        """Test that with whitespace_split, behaviour is as expected"""
282        s = shlex.shlex('a  && b  ||  c', punctuation_chars='&')
283        # whitespace_split is False, so splitting will be based on
284        # punctuation_chars
285        self.assertEqual(list(s), ['a', '&&', 'b', '|', '|', 'c'])
286        s = shlex.shlex('a  && b  ||  c', punctuation_chars='&')
287        s.whitespace_split = True
288        # whitespace_split is True, so splitting will be based on
289        # white space
290        self.assertEqual(list(s), ['a', '&&', 'b', '||', 'c'])
291
292    def testPunctuationWithPosix(self):
293        """Test that punctuation_chars and posix behave correctly together."""
294        # see Issue #29132
295        s = shlex.shlex('f >"abc"', posix=True, punctuation_chars=True)
296        self.assertEqual(list(s), ['f', '>', 'abc'])
297        s = shlex.shlex('f >\\"abc\\"', posix=True, punctuation_chars=True)
298        self.assertEqual(list(s), ['f', '>', '"abc"'])
299
300    def testEmptyStringHandling(self):
301        """Test that parsing of empty strings is correctly handled."""
302        # see Issue #21999
303        expected = ['', ')', 'abc']
304        for punct in (False, True):
305            s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct)
306            slist = list(s)
307            self.assertEqual(slist, expected)
308        expected = ["''", ')', 'abc']
309        s = shlex.shlex("'')abc", punctuation_chars=True)
310        self.assertEqual(list(s), expected)
311
312    def testUnicodeHandling(self):
313        """Test punctuation_chars and whitespace_split handle unicode."""
314        ss = "\u2119\u01b4\u2602\u210c\u00f8\u1f24"
315        # Should be parsed as one complete token (whitespace_split=True).
316        ref = ['\u2119\u01b4\u2602\u210c\u00f8\u1f24']
317        s = shlex.shlex(ss, punctuation_chars=True)
318        s.whitespace_split = True
319        self.assertEqual(list(s), ref)
320        # Without whitespace_split, uses wordchars and splits on all.
321        ref = ['\u2119', '\u01b4', '\u2602', '\u210c', '\u00f8', '\u1f24']
322        s = shlex.shlex(ss, punctuation_chars=True)
323        self.assertEqual(list(s), ref)
324
325    def testQuote(self):
326        safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./'
327        unicode_sample = '\xe9\xe0\xdf'  # e + acute accent, a + grave, sharp s
328        unsafe = '"`$\\!' + unicode_sample
329
330        self.assertEqual(shlex.quote(''), "''")
331        self.assertEqual(shlex.quote(safeunquoted), safeunquoted)
332        self.assertEqual(shlex.quote('test file name'), "'test file name'")
333        for u in unsafe:
334            self.assertEqual(shlex.quote('test%sname' % u),
335                             "'test%sname'" % u)
336        for u in unsafe:
337            self.assertEqual(shlex.quote("test%s'name'" % u),
338                             "'test%s'\"'\"'name'\"'\"''" % u)
339
340    def testJoin(self):
341        for split_command, command in [
342            (['a ', 'b'], "'a ' b"),
343            (['a', ' b'], "a ' b'"),
344            (['a', ' ', 'b'], "a ' ' b"),
345            (['"a', 'b"'], '\'"a\' \'b"\''),
346        ]:
347            with self.subTest(command=command):
348                joined = shlex.join(split_command)
349                self.assertEqual(joined, command)
350
351    def testJoinRoundtrip(self):
352        all_data = self.data + self.posix_data
353        for command, *split_command in all_data:
354            with self.subTest(command=command):
355                joined = shlex.join(split_command)
356                resplit = shlex.split(joined)
357                self.assertEqual(split_command, resplit)
358
359    def testPunctuationCharsReadOnly(self):
360        punctuation_chars = "/|$%^"
361        shlex_instance = shlex.shlex(punctuation_chars=punctuation_chars)
362        self.assertEqual(shlex_instance.punctuation_chars, punctuation_chars)
363        with self.assertRaises(AttributeError):
364            shlex_instance.punctuation_chars = False
365
366
367# Allow this test to be used with old shlex.py
368if not getattr(shlex, "split", None):
369    for methname in dir(ShlexTest):
370        if methname.startswith("test") and methname != "testCompat":
371            delattr(ShlexTest, methname)
372
373if __name__ == "__main__":
374    unittest.main()
375