• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1from test.support import check_warnings
2import cgi
3import os
4import sys
5import tempfile
6import unittest
7import warnings
8from collections import namedtuple
9from io import StringIO, BytesIO
10from test import support
11
12class HackedSysModule:
13    # The regression test will have real values in sys.argv, which
14    # will completely confuse the test of the cgi module
15    argv = []
16    stdin = sys.stdin
17
18cgi.sys = HackedSysModule()
19
20class ComparableException:
21    def __init__(self, err):
22        self.err = err
23
24    def __str__(self):
25        return str(self.err)
26
27    def __eq__(self, anExc):
28        if not isinstance(anExc, Exception):
29            return NotImplemented
30        return (self.err.__class__ == anExc.__class__ and
31                self.err.args == anExc.args)
32
33    def __getattr__(self, attr):
34        return getattr(self.err, attr)
35
36def do_test(buf, method):
37    env = {}
38    if method == "GET":
39        fp = None
40        env['REQUEST_METHOD'] = 'GET'
41        env['QUERY_STRING'] = buf
42    elif method == "POST":
43        fp = BytesIO(buf.encode('latin-1')) # FieldStorage expects bytes
44        env['REQUEST_METHOD'] = 'POST'
45        env['CONTENT_TYPE'] = 'application/x-www-form-urlencoded'
46        env['CONTENT_LENGTH'] = str(len(buf))
47    else:
48        raise ValueError("unknown method: %s" % method)
49    try:
50        return cgi.parse(fp, env, strict_parsing=1)
51    except Exception as err:
52        return ComparableException(err)
53
54parse_strict_test_cases = [
55    ("", ValueError("bad query field: ''")),
56    ("&", ValueError("bad query field: ''")),
57    ("&&", ValueError("bad query field: ''")),
58    (";", ValueError("bad query field: ''")),
59    (";&;", ValueError("bad query field: ''")),
60    # Should the next few really be valid?
61    ("=", {}),
62    ("=&=", {}),
63    ("=;=", {}),
64    # This rest seem to make sense
65    ("=a", {'': ['a']}),
66    ("&=a", ValueError("bad query field: ''")),
67    ("=a&", ValueError("bad query field: ''")),
68    ("=&a", ValueError("bad query field: 'a'")),
69    ("b=a", {'b': ['a']}),
70    ("b+=a", {'b ': ['a']}),
71    ("a=b=a", {'a': ['b=a']}),
72    ("a=+b=a", {'a': [' b=a']}),
73    ("&b=a", ValueError("bad query field: ''")),
74    ("b&=a", ValueError("bad query field: 'b'")),
75    ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
76    ("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
77    ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
78    ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
79    ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
80    ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
81     {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
82      'cuyer': ['r'],
83      'expire': ['964546263'],
84      'kid': ['130003.300038'],
85      'lobale': ['en-US'],
86      'order_id': ['0bb2e248638833d48cb7fed300000f1b'],
87      'ss': ['env'],
88      'view': ['bustomer'],
89      }),
90
91    ("group_id=5470&set=custom&_assigned_to=31392&_status=1&_category=100&SUBMIT=Browse",
92     {'SUBMIT': ['Browse'],
93      '_assigned_to': ['31392'],
94      '_category': ['100'],
95      '_status': ['1'],
96      'group_id': ['5470'],
97      'set': ['custom'],
98      })
99    ]
100
101def norm(seq):
102    return sorted(seq, key=repr)
103
104def first_elts(list):
105    return [p[0] for p in list]
106
107def first_second_elts(list):
108    return [(p[0], p[1][0]) for p in list]
109
110def gen_result(data, environ):
111    encoding = 'latin-1'
112    fake_stdin = BytesIO(data.encode(encoding))
113    fake_stdin.seek(0)
114    form = cgi.FieldStorage(fp=fake_stdin, environ=environ, encoding=encoding)
115
116    result = {}
117    for k, v in dict(form).items():
118        result[k] = isinstance(v, list) and form.getlist(k) or v.value
119
120    return result
121
122class CgiTests(unittest.TestCase):
123
124    def test_parse_multipart(self):
125        fp = BytesIO(POSTDATA.encode('latin1'))
126        env = {'boundary': BOUNDARY.encode('latin1'),
127               'CONTENT-LENGTH': '558'}
128        result = cgi.parse_multipart(fp, env)
129        expected = {'submit': [' Add '], 'id': ['1234'],
130                    'file': [b'Testing 123.\n'], 'title': ['']}
131        self.assertEqual(result, expected)
132
133    def test_parse_multipart_invalid_encoding(self):
134        BOUNDARY = "JfISa01"
135        POSTDATA = """--JfISa01
136Content-Disposition: form-data; name="submit-name"
137Content-Length: 3
138
139\u2603
140--JfISa01"""
141        fp = BytesIO(POSTDATA.encode('utf8'))
142        env = {'boundary': BOUNDARY.encode('latin1'),
143               'CONTENT-LENGTH': str(len(POSTDATA.encode('utf8')))}
144        result = cgi.parse_multipart(fp, env, encoding="ascii",
145                                     errors="surrogateescape")
146        expected = {'submit-name': ["\udce2\udc98\udc83"]}
147        self.assertEqual(result, expected)
148        self.assertEqual("\u2603".encode('utf8'),
149                         result["submit-name"][0].encode('utf8', 'surrogateescape'))
150
151    def test_fieldstorage_properties(self):
152        fs = cgi.FieldStorage()
153        self.assertFalse(fs)
154        self.assertIn("FieldStorage", repr(fs))
155        self.assertEqual(list(fs), list(fs.keys()))
156        fs.list.append(namedtuple('MockFieldStorage', 'name')('fieldvalue'))
157        self.assertTrue(fs)
158
159    def test_fieldstorage_invalid(self):
160        self.assertRaises(TypeError, cgi.FieldStorage, "not-a-file-obj",
161                                                            environ={"REQUEST_METHOD":"PUT"})
162        self.assertRaises(TypeError, cgi.FieldStorage, "foo", "bar")
163        fs = cgi.FieldStorage(headers={'content-type':'text/plain'})
164        self.assertRaises(TypeError, bool, fs)
165
166    def test_escape(self):
167        # cgi.escape() is deprecated.
168        with warnings.catch_warnings():
169            warnings.filterwarnings('ignore', r'cgi\.escape',
170                                     DeprecationWarning)
171            self.assertEqual("test & string", cgi.escape("test & string"))
172            self.assertEqual("&lt;test string&gt;", cgi.escape("<test string>"))
173            self.assertEqual("&quot;test string&quot;", cgi.escape('"test string"', True))
174
175    def test_strict(self):
176        for orig, expect in parse_strict_test_cases:
177            # Test basic parsing
178            d = do_test(orig, "GET")
179            self.assertEqual(d, expect, "Error parsing %s method GET" % repr(orig))
180            d = do_test(orig, "POST")
181            self.assertEqual(d, expect, "Error parsing %s method POST" % repr(orig))
182
183            env = {'QUERY_STRING': orig}
184            fs = cgi.FieldStorage(environ=env)
185            if isinstance(expect, dict):
186                # test dict interface
187                self.assertEqual(len(expect), len(fs))
188                self.assertCountEqual(expect.keys(), fs.keys())
189                ##self.assertEqual(norm(expect.values()), norm(fs.values()))
190                ##self.assertEqual(norm(expect.items()), norm(fs.items()))
191                self.assertEqual(fs.getvalue("nonexistent field", "default"), "default")
192                # test individual fields
193                for key in expect.keys():
194                    expect_val = expect[key]
195                    self.assertIn(key, fs)
196                    if len(expect_val) > 1:
197                        self.assertEqual(fs.getvalue(key), expect_val)
198                    else:
199                        self.assertEqual(fs.getvalue(key), expect_val[0])
200
201    def test_log(self):
202        cgi.log("Testing")
203
204        cgi.logfp = StringIO()
205        cgi.initlog("%s", "Testing initlog 1")
206        cgi.log("%s", "Testing log 2")
207        self.assertEqual(cgi.logfp.getvalue(), "Testing initlog 1\nTesting log 2\n")
208        if os.path.exists(os.devnull):
209            cgi.logfp = None
210            cgi.logfile = os.devnull
211            cgi.initlog("%s", "Testing log 3")
212            self.addCleanup(cgi.closelog)
213            cgi.log("Testing log 4")
214
215    def test_fieldstorage_readline(self):
216        # FieldStorage uses readline, which has the capacity to read all
217        # contents of the input file into memory; we use readline's size argument
218        # to prevent that for files that do not contain any newlines in
219        # non-GET/HEAD requests
220        class TestReadlineFile:
221            def __init__(self, file):
222                self.file = file
223                self.numcalls = 0
224
225            def readline(self, size=None):
226                self.numcalls += 1
227                if size:
228                    return self.file.readline(size)
229                else:
230                    return self.file.readline()
231
232            def __getattr__(self, name):
233                file = self.__dict__['file']
234                a = getattr(file, name)
235                if not isinstance(a, int):
236                    setattr(self, name, a)
237                return a
238
239        f = TestReadlineFile(tempfile.TemporaryFile("wb+"))
240        self.addCleanup(f.close)
241        f.write(b'x' * 256 * 1024)
242        f.seek(0)
243        env = {'REQUEST_METHOD':'PUT'}
244        fs = cgi.FieldStorage(fp=f, environ=env)
245        self.addCleanup(fs.file.close)
246        # if we're not chunking properly, readline is only called twice
247        # (by read_binary); if we are chunking properly, it will be called 5 times
248        # as long as the chunksize is 1 << 16.
249        self.assertGreater(f.numcalls, 2)
250        f.close()
251
252    def test_fieldstorage_multipart(self):
253        #Test basic FieldStorage multipart parsing
254        env = {
255            'REQUEST_METHOD': 'POST',
256            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
257            'CONTENT_LENGTH': '558'}
258        fp = BytesIO(POSTDATA.encode('latin-1'))
259        fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
260        self.assertEqual(len(fs.list), 4)
261        expect = [{'name':'id', 'filename':None, 'value':'1234'},
262                  {'name':'title', 'filename':None, 'value':''},
263                  {'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'},
264                  {'name':'submit', 'filename':None, 'value':' Add '}]
265        for x in range(len(fs.list)):
266            for k, exp in expect[x].items():
267                got = getattr(fs.list[x], k)
268                self.assertEqual(got, exp)
269
270    def test_fieldstorage_multipart_leading_whitespace(self):
271        env = {
272            'REQUEST_METHOD': 'POST',
273            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
274            'CONTENT_LENGTH': '560'}
275        # Add some leading whitespace to our post data that will cause the
276        # first line to not be the innerboundary.
277        fp = BytesIO(b"\r\n" + POSTDATA.encode('latin-1'))
278        fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
279        self.assertEqual(len(fs.list), 4)
280        expect = [{'name':'id', 'filename':None, 'value':'1234'},
281                  {'name':'title', 'filename':None, 'value':''},
282                  {'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'},
283                  {'name':'submit', 'filename':None, 'value':' Add '}]
284        for x in range(len(fs.list)):
285            for k, exp in expect[x].items():
286                got = getattr(fs.list[x], k)
287                self.assertEqual(got, exp)
288
289    def test_fieldstorage_multipart_non_ascii(self):
290        #Test basic FieldStorage multipart parsing
291        env = {'REQUEST_METHOD':'POST',
292            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
293            'CONTENT_LENGTH':'558'}
294        for encoding in ['iso-8859-1','utf-8']:
295            fp = BytesIO(POSTDATA_NON_ASCII.encode(encoding))
296            fs = cgi.FieldStorage(fp, environ=env,encoding=encoding)
297            self.assertEqual(len(fs.list), 1)
298            expect = [{'name':'id', 'filename':None, 'value':'\xe7\xf1\x80'}]
299            for x in range(len(fs.list)):
300                for k, exp in expect[x].items():
301                    got = getattr(fs.list[x], k)
302                    self.assertEqual(got, exp)
303
304    def test_fieldstorage_multipart_maxline(self):
305        # Issue #18167
306        maxline = 1 << 16
307        self.maxDiff = None
308        def check(content):
309            data = """---123
310Content-Disposition: form-data; name="upload"; filename="fake.txt"
311Content-Type: text/plain
312
313%s
314---123--
315""".replace('\n', '\r\n') % content
316            environ = {
317                'CONTENT_LENGTH':   str(len(data)),
318                'CONTENT_TYPE':     'multipart/form-data; boundary=-123',
319                'REQUEST_METHOD':   'POST',
320            }
321            self.assertEqual(gen_result(data, environ),
322                             {'upload': content.encode('latin1')})
323        check('x' * (maxline - 1))
324        check('x' * (maxline - 1) + '\r')
325        check('x' * (maxline - 1) + '\r' + 'y' * (maxline - 1))
326
327    def test_fieldstorage_multipart_w3c(self):
328        # Test basic FieldStorage multipart parsing (W3C sample)
329        env = {
330            'REQUEST_METHOD': 'POST',
331            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY_W3),
332            'CONTENT_LENGTH': str(len(POSTDATA_W3))}
333        fp = BytesIO(POSTDATA_W3.encode('latin-1'))
334        fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
335        self.assertEqual(len(fs.list), 2)
336        self.assertEqual(fs.list[0].name, 'submit-name')
337        self.assertEqual(fs.list[0].value, 'Larry')
338        self.assertEqual(fs.list[1].name, 'files')
339        files = fs.list[1].value
340        self.assertEqual(len(files), 2)
341        expect = [{'name': None, 'filename': 'file1.txt', 'value': b'... contents of file1.txt ...'},
342                  {'name': None, 'filename': 'file2.gif', 'value': b'...contents of file2.gif...'}]
343        for x in range(len(files)):
344            for k, exp in expect[x].items():
345                got = getattr(files[x], k)
346                self.assertEqual(got, exp)
347
348    def test_fieldstorage_part_content_length(self):
349        BOUNDARY = "JfISa01"
350        POSTDATA = """--JfISa01
351Content-Disposition: form-data; name="submit-name"
352Content-Length: 5
353
354Larry
355--JfISa01"""
356        env = {
357            'REQUEST_METHOD': 'POST',
358            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
359            'CONTENT_LENGTH': str(len(POSTDATA))}
360        fp = BytesIO(POSTDATA.encode('latin-1'))
361        fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
362        self.assertEqual(len(fs.list), 1)
363        self.assertEqual(fs.list[0].name, 'submit-name')
364        self.assertEqual(fs.list[0].value, 'Larry')
365
366    def test_fieldstorage_as_context_manager(self):
367        fp = BytesIO(b'x' * 10)
368        env = {'REQUEST_METHOD': 'PUT'}
369        with cgi.FieldStorage(fp=fp, environ=env) as fs:
370            content = fs.file.read()
371            self.assertFalse(fs.file.closed)
372        self.assertTrue(fs.file.closed)
373        self.assertEqual(content, 'x' * 10)
374        with self.assertRaisesRegex(ValueError, 'I/O operation on closed file'):
375            fs.file.read()
376
377    _qs_result = {
378        'key1': 'value1',
379        'key2': ['value2x', 'value2y'],
380        'key3': 'value3',
381        'key4': 'value4'
382    }
383    def testQSAndUrlEncode(self):
384        data = "key2=value2x&key3=value3&key4=value4"
385        environ = {
386            'CONTENT_LENGTH':   str(len(data)),
387            'CONTENT_TYPE':     'application/x-www-form-urlencoded',
388            'QUERY_STRING':     'key1=value1&key2=value2y',
389            'REQUEST_METHOD':   'POST',
390        }
391        v = gen_result(data, environ)
392        self.assertEqual(self._qs_result, v)
393
394    def test_max_num_fields(self):
395        # For application/x-www-form-urlencoded
396        data = '&'.join(['a=a']*11)
397        environ = {
398            'CONTENT_LENGTH': str(len(data)),
399            'CONTENT_TYPE': 'application/x-www-form-urlencoded',
400            'REQUEST_METHOD': 'POST',
401        }
402
403        with self.assertRaises(ValueError):
404            cgi.FieldStorage(
405                fp=BytesIO(data.encode()),
406                environ=environ,
407                max_num_fields=10,
408            )
409
410        # For multipart/form-data
411        data = """---123
412Content-Disposition: form-data; name="a"
413
4143
415---123
416Content-Type: application/x-www-form-urlencoded
417
418a=4
419---123
420Content-Type: application/x-www-form-urlencoded
421
422a=5
423---123--
424"""
425        environ = {
426            'CONTENT_LENGTH':   str(len(data)),
427            'CONTENT_TYPE':     'multipart/form-data; boundary=-123',
428            'QUERY_STRING':     'a=1&a=2',
429            'REQUEST_METHOD':   'POST',
430        }
431
432        # 2 GET entities
433        # 1 top level POST entities
434        # 1 entity within the second POST entity
435        # 1 entity within the third POST entity
436        with self.assertRaises(ValueError):
437            cgi.FieldStorage(
438                fp=BytesIO(data.encode()),
439                environ=environ,
440                max_num_fields=4,
441            )
442        cgi.FieldStorage(
443            fp=BytesIO(data.encode()),
444            environ=environ,
445            max_num_fields=5,
446        )
447
448    def testQSAndFormData(self):
449        data = """---123
450Content-Disposition: form-data; name="key2"
451
452value2y
453---123
454Content-Disposition: form-data; name="key3"
455
456value3
457---123
458Content-Disposition: form-data; name="key4"
459
460value4
461---123--
462"""
463        environ = {
464            'CONTENT_LENGTH':   str(len(data)),
465            'CONTENT_TYPE':     'multipart/form-data; boundary=-123',
466            'QUERY_STRING':     'key1=value1&key2=value2x',
467            'REQUEST_METHOD':   'POST',
468        }
469        v = gen_result(data, environ)
470        self.assertEqual(self._qs_result, v)
471
472    def testQSAndFormDataFile(self):
473        data = """---123
474Content-Disposition: form-data; name="key2"
475
476value2y
477---123
478Content-Disposition: form-data; name="key3"
479
480value3
481---123
482Content-Disposition: form-data; name="key4"
483
484value4
485---123
486Content-Disposition: form-data; name="upload"; filename="fake.txt"
487Content-Type: text/plain
488
489this is the content of the fake file
490
491---123--
492"""
493        environ = {
494            'CONTENT_LENGTH':   str(len(data)),
495            'CONTENT_TYPE':     'multipart/form-data; boundary=-123',
496            'QUERY_STRING':     'key1=value1&key2=value2x',
497            'REQUEST_METHOD':   'POST',
498        }
499        result = self._qs_result.copy()
500        result.update({
501            'upload': b'this is the content of the fake file\n'
502        })
503        v = gen_result(data, environ)
504        self.assertEqual(result, v)
505
506    def test_deprecated_parse_qs(self):
507        # this func is moved to urllib.parse, this is just a sanity check
508        with check_warnings(('cgi.parse_qs is deprecated, use urllib.parse.'
509                             'parse_qs instead', DeprecationWarning)):
510            self.assertEqual({'a': ['A1'], 'B': ['B3'], 'b': ['B2']},
511                             cgi.parse_qs('a=A1&b=B2&B=B3'))
512
513    def test_deprecated_parse_qsl(self):
514        # this func is moved to urllib.parse, this is just a sanity check
515        with check_warnings(('cgi.parse_qsl is deprecated, use urllib.parse.'
516                             'parse_qsl instead', DeprecationWarning)):
517            self.assertEqual([('a', 'A1'), ('b', 'B2'), ('B', 'B3')],
518                             cgi.parse_qsl('a=A1&b=B2&B=B3'))
519
520    def test_parse_header(self):
521        self.assertEqual(
522            cgi.parse_header("text/plain"),
523            ("text/plain", {}))
524        self.assertEqual(
525            cgi.parse_header("text/vnd.just.made.this.up ; "),
526            ("text/vnd.just.made.this.up", {}))
527        self.assertEqual(
528            cgi.parse_header("text/plain;charset=us-ascii"),
529            ("text/plain", {"charset": "us-ascii"}))
530        self.assertEqual(
531            cgi.parse_header('text/plain ; charset="us-ascii"'),
532            ("text/plain", {"charset": "us-ascii"}))
533        self.assertEqual(
534            cgi.parse_header('text/plain ; charset="us-ascii"; another=opt'),
535            ("text/plain", {"charset": "us-ascii", "another": "opt"}))
536        self.assertEqual(
537            cgi.parse_header('attachment; filename="silly.txt"'),
538            ("attachment", {"filename": "silly.txt"}))
539        self.assertEqual(
540            cgi.parse_header('attachment; filename="strange;name"'),
541            ("attachment", {"filename": "strange;name"}))
542        self.assertEqual(
543            cgi.parse_header('attachment; filename="strange;name";size=123;'),
544            ("attachment", {"filename": "strange;name", "size": "123"}))
545        self.assertEqual(
546            cgi.parse_header('form-data; name="files"; filename="fo\\"o;bar"'),
547            ("form-data", {"name": "files", "filename": 'fo"o;bar'}))
548
549    def test_all(self):
550        blacklist = {"logfile", "logfp", "initlog", "dolog", "nolog",
551                     "closelog", "log", "maxlen", "valid_boundary"}
552        support.check__all__(self, cgi, blacklist=blacklist)
553
554
555BOUNDARY = "---------------------------721837373350705526688164684"
556
557POSTDATA = """-----------------------------721837373350705526688164684
558Content-Disposition: form-data; name="id"
559
5601234
561-----------------------------721837373350705526688164684
562Content-Disposition: form-data; name="title"
563
564
565-----------------------------721837373350705526688164684
566Content-Disposition: form-data; name="file"; filename="test.txt"
567Content-Type: text/plain
568
569Testing 123.
570
571-----------------------------721837373350705526688164684
572Content-Disposition: form-data; name="submit"
573
574 Add\x20
575-----------------------------721837373350705526688164684--
576"""
577
578POSTDATA_NON_ASCII = """-----------------------------721837373350705526688164684
579Content-Disposition: form-data; name="id"
580
581\xe7\xf1\x80
582-----------------------------721837373350705526688164684
583"""
584
585# http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4
586BOUNDARY_W3 = "AaB03x"
587POSTDATA_W3 = """--AaB03x
588Content-Disposition: form-data; name="submit-name"
589
590Larry
591--AaB03x
592Content-Disposition: form-data; name="files"
593Content-Type: multipart/mixed; boundary=BbC04y
594
595--BbC04y
596Content-Disposition: file; filename="file1.txt"
597Content-Type: text/plain
598
599... contents of file1.txt ...
600--BbC04y
601Content-Disposition: file; filename="file2.gif"
602Content-Type: image/gif
603Content-Transfer-Encoding: binary
604
605...contents of file2.gif...
606--BbC04y--
607--AaB03x--
608"""
609
610if __name__ == '__main__':
611    unittest.main()
612