1from test.support import check_warnings 2import cgi 3import os 4import sys 5import tempfile 6import unittest 7import warnings 8from collections import namedtuple 9from io import StringIO, BytesIO 10from test import support 11 12class HackedSysModule: 13 # The regression test will have real values in sys.argv, which 14 # will completely confuse the test of the cgi module 15 argv = [] 16 stdin = sys.stdin 17 18cgi.sys = HackedSysModule() 19 20class ComparableException: 21 def __init__(self, err): 22 self.err = err 23 24 def __str__(self): 25 return str(self.err) 26 27 def __eq__(self, anExc): 28 if not isinstance(anExc, Exception): 29 return NotImplemented 30 return (self.err.__class__ == anExc.__class__ and 31 self.err.args == anExc.args) 32 33 def __getattr__(self, attr): 34 return getattr(self.err, attr) 35 36def do_test(buf, method): 37 env = {} 38 if method == "GET": 39 fp = None 40 env['REQUEST_METHOD'] = 'GET' 41 env['QUERY_STRING'] = buf 42 elif method == "POST": 43 fp = BytesIO(buf.encode('latin-1')) # FieldStorage expects bytes 44 env['REQUEST_METHOD'] = 'POST' 45 env['CONTENT_TYPE'] = 'application/x-www-form-urlencoded' 46 env['CONTENT_LENGTH'] = str(len(buf)) 47 else: 48 raise ValueError("unknown method: %s" % method) 49 try: 50 return cgi.parse(fp, env, strict_parsing=1) 51 except Exception as err: 52 return ComparableException(err) 53 54parse_strict_test_cases = [ 55 ("", ValueError("bad query field: ''")), 56 ("&", ValueError("bad query field: ''")), 57 ("&&", ValueError("bad query field: ''")), 58 (";", ValueError("bad query field: ''")), 59 (";&;", ValueError("bad query field: ''")), 60 # Should the next few really be valid? 61 ("=", {}), 62 ("=&=", {}), 63 ("=;=", {}), 64 # This rest seem to make sense 65 ("=a", {'': ['a']}), 66 ("&=a", ValueError("bad query field: ''")), 67 ("=a&", ValueError("bad query field: ''")), 68 ("=&a", ValueError("bad query field: 'a'")), 69 ("b=a", {'b': ['a']}), 70 ("b+=a", {'b ': ['a']}), 71 ("a=b=a", {'a': ['b=a']}), 72 ("a=+b=a", {'a': [' b=a']}), 73 ("&b=a", ValueError("bad query field: ''")), 74 ("b&=a", ValueError("bad query field: 'b'")), 75 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}), 76 ("a=a+b&a=b+a", {'a': ['a b', 'b a']}), 77 ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), 78 ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), 79 ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), 80 ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env", 81 {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'], 82 'cuyer': ['r'], 83 'expire': ['964546263'], 84 'kid': ['130003.300038'], 85 'lobale': ['en-US'], 86 'order_id': ['0bb2e248638833d48cb7fed300000f1b'], 87 'ss': ['env'], 88 'view': ['bustomer'], 89 }), 90 91 ("group_id=5470&set=custom&_assigned_to=31392&_status=1&_category=100&SUBMIT=Browse", 92 {'SUBMIT': ['Browse'], 93 '_assigned_to': ['31392'], 94 '_category': ['100'], 95 '_status': ['1'], 96 'group_id': ['5470'], 97 'set': ['custom'], 98 }) 99 ] 100 101def norm(seq): 102 return sorted(seq, key=repr) 103 104def first_elts(list): 105 return [p[0] for p in list] 106 107def first_second_elts(list): 108 return [(p[0], p[1][0]) for p in list] 109 110def gen_result(data, environ): 111 encoding = 'latin-1' 112 fake_stdin = BytesIO(data.encode(encoding)) 113 fake_stdin.seek(0) 114 form = cgi.FieldStorage(fp=fake_stdin, environ=environ, encoding=encoding) 115 116 result = {} 117 for k, v in dict(form).items(): 118 result[k] = isinstance(v, list) and form.getlist(k) or v.value 119 120 return result 121 122class CgiTests(unittest.TestCase): 123 124 def test_parse_multipart(self): 125 fp = BytesIO(POSTDATA.encode('latin1')) 126 env = {'boundary': BOUNDARY.encode('latin1'), 127 'CONTENT-LENGTH': '558'} 128 result = cgi.parse_multipart(fp, env) 129 expected = {'submit': [' Add '], 'id': ['1234'], 130 'file': [b'Testing 123.\n'], 'title': ['']} 131 self.assertEqual(result, expected) 132 133 def test_parse_multipart_invalid_encoding(self): 134 BOUNDARY = "JfISa01" 135 POSTDATA = """--JfISa01 136Content-Disposition: form-data; name="submit-name" 137Content-Length: 3 138 139\u2603 140--JfISa01""" 141 fp = BytesIO(POSTDATA.encode('utf8')) 142 env = {'boundary': BOUNDARY.encode('latin1'), 143 'CONTENT-LENGTH': str(len(POSTDATA.encode('utf8')))} 144 result = cgi.parse_multipart(fp, env, encoding="ascii", 145 errors="surrogateescape") 146 expected = {'submit-name': ["\udce2\udc98\udc83"]} 147 self.assertEqual(result, expected) 148 self.assertEqual("\u2603".encode('utf8'), 149 result["submit-name"][0].encode('utf8', 'surrogateescape')) 150 151 def test_fieldstorage_properties(self): 152 fs = cgi.FieldStorage() 153 self.assertFalse(fs) 154 self.assertIn("FieldStorage", repr(fs)) 155 self.assertEqual(list(fs), list(fs.keys())) 156 fs.list.append(namedtuple('MockFieldStorage', 'name')('fieldvalue')) 157 self.assertTrue(fs) 158 159 def test_fieldstorage_invalid(self): 160 self.assertRaises(TypeError, cgi.FieldStorage, "not-a-file-obj", 161 environ={"REQUEST_METHOD":"PUT"}) 162 self.assertRaises(TypeError, cgi.FieldStorage, "foo", "bar") 163 fs = cgi.FieldStorage(headers={'content-type':'text/plain'}) 164 self.assertRaises(TypeError, bool, fs) 165 166 def test_escape(self): 167 # cgi.escape() is deprecated. 168 with warnings.catch_warnings(): 169 warnings.filterwarnings('ignore', r'cgi\.escape', 170 DeprecationWarning) 171 self.assertEqual("test & string", cgi.escape("test & string")) 172 self.assertEqual("<test string>", cgi.escape("<test string>")) 173 self.assertEqual(""test string"", cgi.escape('"test string"', True)) 174 175 def test_strict(self): 176 for orig, expect in parse_strict_test_cases: 177 # Test basic parsing 178 d = do_test(orig, "GET") 179 self.assertEqual(d, expect, "Error parsing %s method GET" % repr(orig)) 180 d = do_test(orig, "POST") 181 self.assertEqual(d, expect, "Error parsing %s method POST" % repr(orig)) 182 183 env = {'QUERY_STRING': orig} 184 fs = cgi.FieldStorage(environ=env) 185 if isinstance(expect, dict): 186 # test dict interface 187 self.assertEqual(len(expect), len(fs)) 188 self.assertCountEqual(expect.keys(), fs.keys()) 189 ##self.assertEqual(norm(expect.values()), norm(fs.values())) 190 ##self.assertEqual(norm(expect.items()), norm(fs.items())) 191 self.assertEqual(fs.getvalue("nonexistent field", "default"), "default") 192 # test individual fields 193 for key in expect.keys(): 194 expect_val = expect[key] 195 self.assertIn(key, fs) 196 if len(expect_val) > 1: 197 self.assertEqual(fs.getvalue(key), expect_val) 198 else: 199 self.assertEqual(fs.getvalue(key), expect_val[0]) 200 201 def test_log(self): 202 cgi.log("Testing") 203 204 cgi.logfp = StringIO() 205 cgi.initlog("%s", "Testing initlog 1") 206 cgi.log("%s", "Testing log 2") 207 self.assertEqual(cgi.logfp.getvalue(), "Testing initlog 1\nTesting log 2\n") 208 if os.path.exists(os.devnull): 209 cgi.logfp = None 210 cgi.logfile = os.devnull 211 cgi.initlog("%s", "Testing log 3") 212 self.addCleanup(cgi.closelog) 213 cgi.log("Testing log 4") 214 215 def test_fieldstorage_readline(self): 216 # FieldStorage uses readline, which has the capacity to read all 217 # contents of the input file into memory; we use readline's size argument 218 # to prevent that for files that do not contain any newlines in 219 # non-GET/HEAD requests 220 class TestReadlineFile: 221 def __init__(self, file): 222 self.file = file 223 self.numcalls = 0 224 225 def readline(self, size=None): 226 self.numcalls += 1 227 if size: 228 return self.file.readline(size) 229 else: 230 return self.file.readline() 231 232 def __getattr__(self, name): 233 file = self.__dict__['file'] 234 a = getattr(file, name) 235 if not isinstance(a, int): 236 setattr(self, name, a) 237 return a 238 239 f = TestReadlineFile(tempfile.TemporaryFile("wb+")) 240 self.addCleanup(f.close) 241 f.write(b'x' * 256 * 1024) 242 f.seek(0) 243 env = {'REQUEST_METHOD':'PUT'} 244 fs = cgi.FieldStorage(fp=f, environ=env) 245 self.addCleanup(fs.file.close) 246 # if we're not chunking properly, readline is only called twice 247 # (by read_binary); if we are chunking properly, it will be called 5 times 248 # as long as the chunksize is 1 << 16. 249 self.assertGreater(f.numcalls, 2) 250 f.close() 251 252 def test_fieldstorage_multipart(self): 253 #Test basic FieldStorage multipart parsing 254 env = { 255 'REQUEST_METHOD': 'POST', 256 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY), 257 'CONTENT_LENGTH': '558'} 258 fp = BytesIO(POSTDATA.encode('latin-1')) 259 fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1") 260 self.assertEqual(len(fs.list), 4) 261 expect = [{'name':'id', 'filename':None, 'value':'1234'}, 262 {'name':'title', 'filename':None, 'value':''}, 263 {'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'}, 264 {'name':'submit', 'filename':None, 'value':' Add '}] 265 for x in range(len(fs.list)): 266 for k, exp in expect[x].items(): 267 got = getattr(fs.list[x], k) 268 self.assertEqual(got, exp) 269 270 def test_fieldstorage_multipart_leading_whitespace(self): 271 env = { 272 'REQUEST_METHOD': 'POST', 273 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY), 274 'CONTENT_LENGTH': '560'} 275 # Add some leading whitespace to our post data that will cause the 276 # first line to not be the innerboundary. 277 fp = BytesIO(b"\r\n" + POSTDATA.encode('latin-1')) 278 fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1") 279 self.assertEqual(len(fs.list), 4) 280 expect = [{'name':'id', 'filename':None, 'value':'1234'}, 281 {'name':'title', 'filename':None, 'value':''}, 282 {'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'}, 283 {'name':'submit', 'filename':None, 'value':' Add '}] 284 for x in range(len(fs.list)): 285 for k, exp in expect[x].items(): 286 got = getattr(fs.list[x], k) 287 self.assertEqual(got, exp) 288 289 def test_fieldstorage_multipart_non_ascii(self): 290 #Test basic FieldStorage multipart parsing 291 env = {'REQUEST_METHOD':'POST', 292 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY), 293 'CONTENT_LENGTH':'558'} 294 for encoding in ['iso-8859-1','utf-8']: 295 fp = BytesIO(POSTDATA_NON_ASCII.encode(encoding)) 296 fs = cgi.FieldStorage(fp, environ=env,encoding=encoding) 297 self.assertEqual(len(fs.list), 1) 298 expect = [{'name':'id', 'filename':None, 'value':'\xe7\xf1\x80'}] 299 for x in range(len(fs.list)): 300 for k, exp in expect[x].items(): 301 got = getattr(fs.list[x], k) 302 self.assertEqual(got, exp) 303 304 def test_fieldstorage_multipart_maxline(self): 305 # Issue #18167 306 maxline = 1 << 16 307 self.maxDiff = None 308 def check(content): 309 data = """---123 310Content-Disposition: form-data; name="upload"; filename="fake.txt" 311Content-Type: text/plain 312 313%s 314---123-- 315""".replace('\n', '\r\n') % content 316 environ = { 317 'CONTENT_LENGTH': str(len(data)), 318 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', 319 'REQUEST_METHOD': 'POST', 320 } 321 self.assertEqual(gen_result(data, environ), 322 {'upload': content.encode('latin1')}) 323 check('x' * (maxline - 1)) 324 check('x' * (maxline - 1) + '\r') 325 check('x' * (maxline - 1) + '\r' + 'y' * (maxline - 1)) 326 327 def test_fieldstorage_multipart_w3c(self): 328 # Test basic FieldStorage multipart parsing (W3C sample) 329 env = { 330 'REQUEST_METHOD': 'POST', 331 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY_W3), 332 'CONTENT_LENGTH': str(len(POSTDATA_W3))} 333 fp = BytesIO(POSTDATA_W3.encode('latin-1')) 334 fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1") 335 self.assertEqual(len(fs.list), 2) 336 self.assertEqual(fs.list[0].name, 'submit-name') 337 self.assertEqual(fs.list[0].value, 'Larry') 338 self.assertEqual(fs.list[1].name, 'files') 339 files = fs.list[1].value 340 self.assertEqual(len(files), 2) 341 expect = [{'name': None, 'filename': 'file1.txt', 'value': b'... contents of file1.txt ...'}, 342 {'name': None, 'filename': 'file2.gif', 'value': b'...contents of file2.gif...'}] 343 for x in range(len(files)): 344 for k, exp in expect[x].items(): 345 got = getattr(files[x], k) 346 self.assertEqual(got, exp) 347 348 def test_fieldstorage_part_content_length(self): 349 BOUNDARY = "JfISa01" 350 POSTDATA = """--JfISa01 351Content-Disposition: form-data; name="submit-name" 352Content-Length: 5 353 354Larry 355--JfISa01""" 356 env = { 357 'REQUEST_METHOD': 'POST', 358 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY), 359 'CONTENT_LENGTH': str(len(POSTDATA))} 360 fp = BytesIO(POSTDATA.encode('latin-1')) 361 fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1") 362 self.assertEqual(len(fs.list), 1) 363 self.assertEqual(fs.list[0].name, 'submit-name') 364 self.assertEqual(fs.list[0].value, 'Larry') 365 366 def test_fieldstorage_as_context_manager(self): 367 fp = BytesIO(b'x' * 10) 368 env = {'REQUEST_METHOD': 'PUT'} 369 with cgi.FieldStorage(fp=fp, environ=env) as fs: 370 content = fs.file.read() 371 self.assertFalse(fs.file.closed) 372 self.assertTrue(fs.file.closed) 373 self.assertEqual(content, 'x' * 10) 374 with self.assertRaisesRegex(ValueError, 'I/O operation on closed file'): 375 fs.file.read() 376 377 _qs_result = { 378 'key1': 'value1', 379 'key2': ['value2x', 'value2y'], 380 'key3': 'value3', 381 'key4': 'value4' 382 } 383 def testQSAndUrlEncode(self): 384 data = "key2=value2x&key3=value3&key4=value4" 385 environ = { 386 'CONTENT_LENGTH': str(len(data)), 387 'CONTENT_TYPE': 'application/x-www-form-urlencoded', 388 'QUERY_STRING': 'key1=value1&key2=value2y', 389 'REQUEST_METHOD': 'POST', 390 } 391 v = gen_result(data, environ) 392 self.assertEqual(self._qs_result, v) 393 394 def test_max_num_fields(self): 395 # For application/x-www-form-urlencoded 396 data = '&'.join(['a=a']*11) 397 environ = { 398 'CONTENT_LENGTH': str(len(data)), 399 'CONTENT_TYPE': 'application/x-www-form-urlencoded', 400 'REQUEST_METHOD': 'POST', 401 } 402 403 with self.assertRaises(ValueError): 404 cgi.FieldStorage( 405 fp=BytesIO(data.encode()), 406 environ=environ, 407 max_num_fields=10, 408 ) 409 410 # For multipart/form-data 411 data = """---123 412Content-Disposition: form-data; name="a" 413 4143 415---123 416Content-Type: application/x-www-form-urlencoded 417 418a=4 419---123 420Content-Type: application/x-www-form-urlencoded 421 422a=5 423---123-- 424""" 425 environ = { 426 'CONTENT_LENGTH': str(len(data)), 427 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', 428 'QUERY_STRING': 'a=1&a=2', 429 'REQUEST_METHOD': 'POST', 430 } 431 432 # 2 GET entities 433 # 1 top level POST entities 434 # 1 entity within the second POST entity 435 # 1 entity within the third POST entity 436 with self.assertRaises(ValueError): 437 cgi.FieldStorage( 438 fp=BytesIO(data.encode()), 439 environ=environ, 440 max_num_fields=4, 441 ) 442 cgi.FieldStorage( 443 fp=BytesIO(data.encode()), 444 environ=environ, 445 max_num_fields=5, 446 ) 447 448 def testQSAndFormData(self): 449 data = """---123 450Content-Disposition: form-data; name="key2" 451 452value2y 453---123 454Content-Disposition: form-data; name="key3" 455 456value3 457---123 458Content-Disposition: form-data; name="key4" 459 460value4 461---123-- 462""" 463 environ = { 464 'CONTENT_LENGTH': str(len(data)), 465 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', 466 'QUERY_STRING': 'key1=value1&key2=value2x', 467 'REQUEST_METHOD': 'POST', 468 } 469 v = gen_result(data, environ) 470 self.assertEqual(self._qs_result, v) 471 472 def testQSAndFormDataFile(self): 473 data = """---123 474Content-Disposition: form-data; name="key2" 475 476value2y 477---123 478Content-Disposition: form-data; name="key3" 479 480value3 481---123 482Content-Disposition: form-data; name="key4" 483 484value4 485---123 486Content-Disposition: form-data; name="upload"; filename="fake.txt" 487Content-Type: text/plain 488 489this is the content of the fake file 490 491---123-- 492""" 493 environ = { 494 'CONTENT_LENGTH': str(len(data)), 495 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', 496 'QUERY_STRING': 'key1=value1&key2=value2x', 497 'REQUEST_METHOD': 'POST', 498 } 499 result = self._qs_result.copy() 500 result.update({ 501 'upload': b'this is the content of the fake file\n' 502 }) 503 v = gen_result(data, environ) 504 self.assertEqual(result, v) 505 506 def test_deprecated_parse_qs(self): 507 # this func is moved to urllib.parse, this is just a sanity check 508 with check_warnings(('cgi.parse_qs is deprecated, use urllib.parse.' 509 'parse_qs instead', DeprecationWarning)): 510 self.assertEqual({'a': ['A1'], 'B': ['B3'], 'b': ['B2']}, 511 cgi.parse_qs('a=A1&b=B2&B=B3')) 512 513 def test_deprecated_parse_qsl(self): 514 # this func is moved to urllib.parse, this is just a sanity check 515 with check_warnings(('cgi.parse_qsl is deprecated, use urllib.parse.' 516 'parse_qsl instead', DeprecationWarning)): 517 self.assertEqual([('a', 'A1'), ('b', 'B2'), ('B', 'B3')], 518 cgi.parse_qsl('a=A1&b=B2&B=B3')) 519 520 def test_parse_header(self): 521 self.assertEqual( 522 cgi.parse_header("text/plain"), 523 ("text/plain", {})) 524 self.assertEqual( 525 cgi.parse_header("text/vnd.just.made.this.up ; "), 526 ("text/vnd.just.made.this.up", {})) 527 self.assertEqual( 528 cgi.parse_header("text/plain;charset=us-ascii"), 529 ("text/plain", {"charset": "us-ascii"})) 530 self.assertEqual( 531 cgi.parse_header('text/plain ; charset="us-ascii"'), 532 ("text/plain", {"charset": "us-ascii"})) 533 self.assertEqual( 534 cgi.parse_header('text/plain ; charset="us-ascii"; another=opt'), 535 ("text/plain", {"charset": "us-ascii", "another": "opt"})) 536 self.assertEqual( 537 cgi.parse_header('attachment; filename="silly.txt"'), 538 ("attachment", {"filename": "silly.txt"})) 539 self.assertEqual( 540 cgi.parse_header('attachment; filename="strange;name"'), 541 ("attachment", {"filename": "strange;name"})) 542 self.assertEqual( 543 cgi.parse_header('attachment; filename="strange;name";size=123;'), 544 ("attachment", {"filename": "strange;name", "size": "123"})) 545 self.assertEqual( 546 cgi.parse_header('form-data; name="files"; filename="fo\\"o;bar"'), 547 ("form-data", {"name": "files", "filename": 'fo"o;bar'})) 548 549 def test_all(self): 550 blacklist = {"logfile", "logfp", "initlog", "dolog", "nolog", 551 "closelog", "log", "maxlen", "valid_boundary"} 552 support.check__all__(self, cgi, blacklist=blacklist) 553 554 555BOUNDARY = "---------------------------721837373350705526688164684" 556 557POSTDATA = """-----------------------------721837373350705526688164684 558Content-Disposition: form-data; name="id" 559 5601234 561-----------------------------721837373350705526688164684 562Content-Disposition: form-data; name="title" 563 564 565-----------------------------721837373350705526688164684 566Content-Disposition: form-data; name="file"; filename="test.txt" 567Content-Type: text/plain 568 569Testing 123. 570 571-----------------------------721837373350705526688164684 572Content-Disposition: form-data; name="submit" 573 574 Add\x20 575-----------------------------721837373350705526688164684-- 576""" 577 578POSTDATA_NON_ASCII = """-----------------------------721837373350705526688164684 579Content-Disposition: form-data; name="id" 580 581\xe7\xf1\x80 582-----------------------------721837373350705526688164684 583""" 584 585# http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4 586BOUNDARY_W3 = "AaB03x" 587POSTDATA_W3 = """--AaB03x 588Content-Disposition: form-data; name="submit-name" 589 590Larry 591--AaB03x 592Content-Disposition: form-data; name="files" 593Content-Type: multipart/mixed; boundary=BbC04y 594 595--BbC04y 596Content-Disposition: file; filename="file1.txt" 597Content-Type: text/plain 598 599... contents of file1.txt ... 600--BbC04y 601Content-Disposition: file; filename="file2.gif" 602Content-Type: image/gif 603Content-Transfer-Encoding: binary 604 605...contents of file2.gif... 606--BbC04y-- 607--AaB03x-- 608""" 609 610if __name__ == '__main__': 611 unittest.main() 612