1import cgi 2import os 3import sys 4import tempfile 5import unittest 6from collections import namedtuple 7from io import StringIO, BytesIO 8from test import support 9 10class HackedSysModule: 11 # The regression test will have real values in sys.argv, which 12 # will completely confuse the test of the cgi module 13 argv = [] 14 stdin = sys.stdin 15 16cgi.sys = HackedSysModule() 17 18class ComparableException: 19 def __init__(self, err): 20 self.err = err 21 22 def __str__(self): 23 return str(self.err) 24 25 def __eq__(self, anExc): 26 if not isinstance(anExc, Exception): 27 return NotImplemented 28 return (self.err.__class__ == anExc.__class__ and 29 self.err.args == anExc.args) 30 31 def __getattr__(self, attr): 32 return getattr(self.err, attr) 33 34def do_test(buf, method): 35 env = {} 36 if method == "GET": 37 fp = None 38 env['REQUEST_METHOD'] = 'GET' 39 env['QUERY_STRING'] = buf 40 elif method == "POST": 41 fp = BytesIO(buf.encode('latin-1')) # FieldStorage expects bytes 42 env['REQUEST_METHOD'] = 'POST' 43 env['CONTENT_TYPE'] = 'application/x-www-form-urlencoded' 44 env['CONTENT_LENGTH'] = str(len(buf)) 45 else: 46 raise ValueError("unknown method: %s" % method) 47 try: 48 return cgi.parse(fp, env, strict_parsing=1) 49 except Exception as err: 50 return ComparableException(err) 51 52parse_strict_test_cases = [ 53 ("", ValueError("bad query field: ''")), 54 ("&", ValueError("bad query field: ''")), 55 ("&&", ValueError("bad query field: ''")), 56 (";", ValueError("bad query field: ''")), 57 (";&;", ValueError("bad query field: ''")), 58 # Should the next few really be valid? 59 ("=", {}), 60 ("=&=", {}), 61 ("=;=", {}), 62 # This rest seem to make sense 63 ("=a", {'': ['a']}), 64 ("&=a", ValueError("bad query field: ''")), 65 ("=a&", ValueError("bad query field: ''")), 66 ("=&a", ValueError("bad query field: 'a'")), 67 ("b=a", {'b': ['a']}), 68 ("b+=a", {'b ': ['a']}), 69 ("a=b=a", {'a': ['b=a']}), 70 ("a=+b=a", {'a': [' b=a']}), 71 ("&b=a", ValueError("bad query field: ''")), 72 ("b&=a", ValueError("bad query field: 'b'")), 73 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}), 74 ("a=a+b&a=b+a", {'a': ['a b', 'b a']}), 75 ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), 76 ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), 77 ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), 78 ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env", 79 {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'], 80 'cuyer': ['r'], 81 'expire': ['964546263'], 82 'kid': ['130003.300038'], 83 'lobale': ['en-US'], 84 'order_id': ['0bb2e248638833d48cb7fed300000f1b'], 85 'ss': ['env'], 86 'view': ['bustomer'], 87 }), 88 89 ("group_id=5470&set=custom&_assigned_to=31392&_status=1&_category=100&SUBMIT=Browse", 90 {'SUBMIT': ['Browse'], 91 '_assigned_to': ['31392'], 92 '_category': ['100'], 93 '_status': ['1'], 94 'group_id': ['5470'], 95 'set': ['custom'], 96 }) 97 ] 98 99def norm(seq): 100 return sorted(seq, key=repr) 101 102def first_elts(list): 103 return [p[0] for p in list] 104 105def first_second_elts(list): 106 return [(p[0], p[1][0]) for p in list] 107 108def gen_result(data, environ): 109 encoding = 'latin-1' 110 fake_stdin = BytesIO(data.encode(encoding)) 111 fake_stdin.seek(0) 112 form = cgi.FieldStorage(fp=fake_stdin, environ=environ, encoding=encoding) 113 114 result = {} 115 for k, v in dict(form).items(): 116 result[k] = isinstance(v, list) and form.getlist(k) or v.value 117 118 return result 119 120class CgiTests(unittest.TestCase): 121 122 def test_parse_multipart(self): 123 fp = BytesIO(POSTDATA.encode('latin1')) 124 env = {'boundary': BOUNDARY.encode('latin1'), 125 'CONTENT-LENGTH': '558'} 126 result = cgi.parse_multipart(fp, env) 127 expected = {'submit': [' Add '], 'id': ['1234'], 128 'file': [b'Testing 123.\n'], 'title': ['']} 129 self.assertEqual(result, expected) 130 131 def test_parse_multipart_invalid_encoding(self): 132 BOUNDARY = "JfISa01" 133 POSTDATA = """--JfISa01 134Content-Disposition: form-data; name="submit-name" 135Content-Length: 3 136 137\u2603 138--JfISa01""" 139 fp = BytesIO(POSTDATA.encode('utf8')) 140 env = {'boundary': BOUNDARY.encode('latin1'), 141 'CONTENT-LENGTH': str(len(POSTDATA.encode('utf8')))} 142 result = cgi.parse_multipart(fp, env, encoding="ascii", 143 errors="surrogateescape") 144 expected = {'submit-name': ["\udce2\udc98\udc83"]} 145 self.assertEqual(result, expected) 146 self.assertEqual("\u2603".encode('utf8'), 147 result["submit-name"][0].encode('utf8', 'surrogateescape')) 148 149 def test_fieldstorage_properties(self): 150 fs = cgi.FieldStorage() 151 self.assertFalse(fs) 152 self.assertIn("FieldStorage", repr(fs)) 153 self.assertEqual(list(fs), list(fs.keys())) 154 fs.list.append(namedtuple('MockFieldStorage', 'name')('fieldvalue')) 155 self.assertTrue(fs) 156 157 def test_fieldstorage_invalid(self): 158 self.assertRaises(TypeError, cgi.FieldStorage, "not-a-file-obj", 159 environ={"REQUEST_METHOD":"PUT"}) 160 self.assertRaises(TypeError, cgi.FieldStorage, "foo", "bar") 161 fs = cgi.FieldStorage(headers={'content-type':'text/plain'}) 162 self.assertRaises(TypeError, bool, fs) 163 164 def test_strict(self): 165 for orig, expect in parse_strict_test_cases: 166 # Test basic parsing 167 d = do_test(orig, "GET") 168 self.assertEqual(d, expect, "Error parsing %s method GET" % repr(orig)) 169 d = do_test(orig, "POST") 170 self.assertEqual(d, expect, "Error parsing %s method POST" % repr(orig)) 171 172 env = {'QUERY_STRING': orig} 173 fs = cgi.FieldStorage(environ=env) 174 if isinstance(expect, dict): 175 # test dict interface 176 self.assertEqual(len(expect), len(fs)) 177 self.assertCountEqual(expect.keys(), fs.keys()) 178 ##self.assertEqual(norm(expect.values()), norm(fs.values())) 179 ##self.assertEqual(norm(expect.items()), norm(fs.items())) 180 self.assertEqual(fs.getvalue("nonexistent field", "default"), "default") 181 # test individual fields 182 for key in expect.keys(): 183 expect_val = expect[key] 184 self.assertIn(key, fs) 185 if len(expect_val) > 1: 186 self.assertEqual(fs.getvalue(key), expect_val) 187 else: 188 self.assertEqual(fs.getvalue(key), expect_val[0]) 189 190 def test_log(self): 191 cgi.log("Testing") 192 193 cgi.logfp = StringIO() 194 cgi.initlog("%s", "Testing initlog 1") 195 cgi.log("%s", "Testing log 2") 196 self.assertEqual(cgi.logfp.getvalue(), "Testing initlog 1\nTesting log 2\n") 197 if os.path.exists(os.devnull): 198 cgi.logfp = None 199 cgi.logfile = os.devnull 200 cgi.initlog("%s", "Testing log 3") 201 self.addCleanup(cgi.closelog) 202 cgi.log("Testing log 4") 203 204 def test_fieldstorage_readline(self): 205 # FieldStorage uses readline, which has the capacity to read all 206 # contents of the input file into memory; we use readline's size argument 207 # to prevent that for files that do not contain any newlines in 208 # non-GET/HEAD requests 209 class TestReadlineFile: 210 def __init__(self, file): 211 self.file = file 212 self.numcalls = 0 213 214 def readline(self, size=None): 215 self.numcalls += 1 216 if size: 217 return self.file.readline(size) 218 else: 219 return self.file.readline() 220 221 def __getattr__(self, name): 222 file = self.__dict__['file'] 223 a = getattr(file, name) 224 if not isinstance(a, int): 225 setattr(self, name, a) 226 return a 227 228 f = TestReadlineFile(tempfile.TemporaryFile("wb+")) 229 self.addCleanup(f.close) 230 f.write(b'x' * 256 * 1024) 231 f.seek(0) 232 env = {'REQUEST_METHOD':'PUT'} 233 fs = cgi.FieldStorage(fp=f, environ=env) 234 self.addCleanup(fs.file.close) 235 # if we're not chunking properly, readline is only called twice 236 # (by read_binary); if we are chunking properly, it will be called 5 times 237 # as long as the chunksize is 1 << 16. 238 self.assertGreater(f.numcalls, 2) 239 f.close() 240 241 def test_fieldstorage_multipart(self): 242 #Test basic FieldStorage multipart parsing 243 env = { 244 'REQUEST_METHOD': 'POST', 245 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY), 246 'CONTENT_LENGTH': '558'} 247 fp = BytesIO(POSTDATA.encode('latin-1')) 248 fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1") 249 self.assertEqual(len(fs.list), 4) 250 expect = [{'name':'id', 'filename':None, 'value':'1234'}, 251 {'name':'title', 'filename':None, 'value':''}, 252 {'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'}, 253 {'name':'submit', 'filename':None, 'value':' Add '}] 254 for x in range(len(fs.list)): 255 for k, exp in expect[x].items(): 256 got = getattr(fs.list[x], k) 257 self.assertEqual(got, exp) 258 259 def test_fieldstorage_multipart_leading_whitespace(self): 260 env = { 261 'REQUEST_METHOD': 'POST', 262 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY), 263 'CONTENT_LENGTH': '560'} 264 # Add some leading whitespace to our post data that will cause the 265 # first line to not be the innerboundary. 266 fp = BytesIO(b"\r\n" + POSTDATA.encode('latin-1')) 267 fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1") 268 self.assertEqual(len(fs.list), 4) 269 expect = [{'name':'id', 'filename':None, 'value':'1234'}, 270 {'name':'title', 'filename':None, 'value':''}, 271 {'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'}, 272 {'name':'submit', 'filename':None, 'value':' Add '}] 273 for x in range(len(fs.list)): 274 for k, exp in expect[x].items(): 275 got = getattr(fs.list[x], k) 276 self.assertEqual(got, exp) 277 278 def test_fieldstorage_multipart_non_ascii(self): 279 #Test basic FieldStorage multipart parsing 280 env = {'REQUEST_METHOD':'POST', 281 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY), 282 'CONTENT_LENGTH':'558'} 283 for encoding in ['iso-8859-1','utf-8']: 284 fp = BytesIO(POSTDATA_NON_ASCII.encode(encoding)) 285 fs = cgi.FieldStorage(fp, environ=env,encoding=encoding) 286 self.assertEqual(len(fs.list), 1) 287 expect = [{'name':'id', 'filename':None, 'value':'\xe7\xf1\x80'}] 288 for x in range(len(fs.list)): 289 for k, exp in expect[x].items(): 290 got = getattr(fs.list[x], k) 291 self.assertEqual(got, exp) 292 293 def test_fieldstorage_multipart_maxline(self): 294 # Issue #18167 295 maxline = 1 << 16 296 self.maxDiff = None 297 def check(content): 298 data = """---123 299Content-Disposition: form-data; name="upload"; filename="fake.txt" 300Content-Type: text/plain 301 302%s 303---123-- 304""".replace('\n', '\r\n') % content 305 environ = { 306 'CONTENT_LENGTH': str(len(data)), 307 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', 308 'REQUEST_METHOD': 'POST', 309 } 310 self.assertEqual(gen_result(data, environ), 311 {'upload': content.encode('latin1')}) 312 check('x' * (maxline - 1)) 313 check('x' * (maxline - 1) + '\r') 314 check('x' * (maxline - 1) + '\r' + 'y' * (maxline - 1)) 315 316 def test_fieldstorage_multipart_w3c(self): 317 # Test basic FieldStorage multipart parsing (W3C sample) 318 env = { 319 'REQUEST_METHOD': 'POST', 320 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY_W3), 321 'CONTENT_LENGTH': str(len(POSTDATA_W3))} 322 fp = BytesIO(POSTDATA_W3.encode('latin-1')) 323 fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1") 324 self.assertEqual(len(fs.list), 2) 325 self.assertEqual(fs.list[0].name, 'submit-name') 326 self.assertEqual(fs.list[0].value, 'Larry') 327 self.assertEqual(fs.list[1].name, 'files') 328 files = fs.list[1].value 329 self.assertEqual(len(files), 2) 330 expect = [{'name': None, 'filename': 'file1.txt', 'value': b'... contents of file1.txt ...'}, 331 {'name': None, 'filename': 'file2.gif', 'value': b'...contents of file2.gif...'}] 332 for x in range(len(files)): 333 for k, exp in expect[x].items(): 334 got = getattr(files[x], k) 335 self.assertEqual(got, exp) 336 337 def test_fieldstorage_part_content_length(self): 338 BOUNDARY = "JfISa01" 339 POSTDATA = """--JfISa01 340Content-Disposition: form-data; name="submit-name" 341Content-Length: 5 342 343Larry 344--JfISa01""" 345 env = { 346 'REQUEST_METHOD': 'POST', 347 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY), 348 'CONTENT_LENGTH': str(len(POSTDATA))} 349 fp = BytesIO(POSTDATA.encode('latin-1')) 350 fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1") 351 self.assertEqual(len(fs.list), 1) 352 self.assertEqual(fs.list[0].name, 'submit-name') 353 self.assertEqual(fs.list[0].value, 'Larry') 354 355 def test_field_storage_multipart_no_content_length(self): 356 fp = BytesIO(b"""--MyBoundary 357Content-Disposition: form-data; name="my-arg"; filename="foo" 358 359Test 360 361--MyBoundary-- 362""") 363 env = { 364 "REQUEST_METHOD": "POST", 365 "CONTENT_TYPE": "multipart/form-data; boundary=MyBoundary", 366 "wsgi.input": fp, 367 } 368 fields = cgi.FieldStorage(fp, environ=env) 369 370 self.assertEqual(len(fields["my-arg"].file.read()), 5) 371 372 def test_fieldstorage_as_context_manager(self): 373 fp = BytesIO(b'x' * 10) 374 env = {'REQUEST_METHOD': 'PUT'} 375 with cgi.FieldStorage(fp=fp, environ=env) as fs: 376 content = fs.file.read() 377 self.assertFalse(fs.file.closed) 378 self.assertTrue(fs.file.closed) 379 self.assertEqual(content, 'x' * 10) 380 with self.assertRaisesRegex(ValueError, 'I/O operation on closed file'): 381 fs.file.read() 382 383 _qs_result = { 384 'key1': 'value1', 385 'key2': ['value2x', 'value2y'], 386 'key3': 'value3', 387 'key4': 'value4' 388 } 389 def testQSAndUrlEncode(self): 390 data = "key2=value2x&key3=value3&key4=value4" 391 environ = { 392 'CONTENT_LENGTH': str(len(data)), 393 'CONTENT_TYPE': 'application/x-www-form-urlencoded', 394 'QUERY_STRING': 'key1=value1&key2=value2y', 395 'REQUEST_METHOD': 'POST', 396 } 397 v = gen_result(data, environ) 398 self.assertEqual(self._qs_result, v) 399 400 def test_max_num_fields(self): 401 # For application/x-www-form-urlencoded 402 data = '&'.join(['a=a']*11) 403 environ = { 404 'CONTENT_LENGTH': str(len(data)), 405 'CONTENT_TYPE': 'application/x-www-form-urlencoded', 406 'REQUEST_METHOD': 'POST', 407 } 408 409 with self.assertRaises(ValueError): 410 cgi.FieldStorage( 411 fp=BytesIO(data.encode()), 412 environ=environ, 413 max_num_fields=10, 414 ) 415 416 # For multipart/form-data 417 data = """---123 418Content-Disposition: form-data; name="a" 419 4203 421---123 422Content-Type: application/x-www-form-urlencoded 423 424a=4 425---123 426Content-Type: application/x-www-form-urlencoded 427 428a=5 429---123-- 430""" 431 environ = { 432 'CONTENT_LENGTH': str(len(data)), 433 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', 434 'QUERY_STRING': 'a=1&a=2', 435 'REQUEST_METHOD': 'POST', 436 } 437 438 # 2 GET entities 439 # 1 top level POST entities 440 # 1 entity within the second POST entity 441 # 1 entity within the third POST entity 442 with self.assertRaises(ValueError): 443 cgi.FieldStorage( 444 fp=BytesIO(data.encode()), 445 environ=environ, 446 max_num_fields=4, 447 ) 448 cgi.FieldStorage( 449 fp=BytesIO(data.encode()), 450 environ=environ, 451 max_num_fields=5, 452 ) 453 454 def testQSAndFormData(self): 455 data = """---123 456Content-Disposition: form-data; name="key2" 457 458value2y 459---123 460Content-Disposition: form-data; name="key3" 461 462value3 463---123 464Content-Disposition: form-data; name="key4" 465 466value4 467---123-- 468""" 469 environ = { 470 'CONTENT_LENGTH': str(len(data)), 471 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', 472 'QUERY_STRING': 'key1=value1&key2=value2x', 473 'REQUEST_METHOD': 'POST', 474 } 475 v = gen_result(data, environ) 476 self.assertEqual(self._qs_result, v) 477 478 def testQSAndFormDataFile(self): 479 data = """---123 480Content-Disposition: form-data; name="key2" 481 482value2y 483---123 484Content-Disposition: form-data; name="key3" 485 486value3 487---123 488Content-Disposition: form-data; name="key4" 489 490value4 491---123 492Content-Disposition: form-data; name="upload"; filename="fake.txt" 493Content-Type: text/plain 494 495this is the content of the fake file 496 497---123-- 498""" 499 environ = { 500 'CONTENT_LENGTH': str(len(data)), 501 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', 502 'QUERY_STRING': 'key1=value1&key2=value2x', 503 'REQUEST_METHOD': 'POST', 504 } 505 result = self._qs_result.copy() 506 result.update({ 507 'upload': b'this is the content of the fake file\n' 508 }) 509 v = gen_result(data, environ) 510 self.assertEqual(result, v) 511 512 def test_parse_header(self): 513 self.assertEqual( 514 cgi.parse_header("text/plain"), 515 ("text/plain", {})) 516 self.assertEqual( 517 cgi.parse_header("text/vnd.just.made.this.up ; "), 518 ("text/vnd.just.made.this.up", {})) 519 self.assertEqual( 520 cgi.parse_header("text/plain;charset=us-ascii"), 521 ("text/plain", {"charset": "us-ascii"})) 522 self.assertEqual( 523 cgi.parse_header('text/plain ; charset="us-ascii"'), 524 ("text/plain", {"charset": "us-ascii"})) 525 self.assertEqual( 526 cgi.parse_header('text/plain ; charset="us-ascii"; another=opt'), 527 ("text/plain", {"charset": "us-ascii", "another": "opt"})) 528 self.assertEqual( 529 cgi.parse_header('attachment; filename="silly.txt"'), 530 ("attachment", {"filename": "silly.txt"})) 531 self.assertEqual( 532 cgi.parse_header('attachment; filename="strange;name"'), 533 ("attachment", {"filename": "strange;name"})) 534 self.assertEqual( 535 cgi.parse_header('attachment; filename="strange;name";size=123;'), 536 ("attachment", {"filename": "strange;name", "size": "123"})) 537 self.assertEqual( 538 cgi.parse_header('form-data; name="files"; filename="fo\\"o;bar"'), 539 ("form-data", {"name": "files", "filename": 'fo"o;bar'})) 540 541 def test_all(self): 542 blacklist = {"logfile", "logfp", "initlog", "dolog", "nolog", 543 "closelog", "log", "maxlen", "valid_boundary"} 544 support.check__all__(self, cgi, blacklist=blacklist) 545 546 547BOUNDARY = "---------------------------721837373350705526688164684" 548 549POSTDATA = """-----------------------------721837373350705526688164684 550Content-Disposition: form-data; name="id" 551 5521234 553-----------------------------721837373350705526688164684 554Content-Disposition: form-data; name="title" 555 556 557-----------------------------721837373350705526688164684 558Content-Disposition: form-data; name="file"; filename="test.txt" 559Content-Type: text/plain 560 561Testing 123. 562 563-----------------------------721837373350705526688164684 564Content-Disposition: form-data; name="submit" 565 566 Add\x20 567-----------------------------721837373350705526688164684-- 568""" 569 570POSTDATA_NON_ASCII = """-----------------------------721837373350705526688164684 571Content-Disposition: form-data; name="id" 572 573\xe7\xf1\x80 574-----------------------------721837373350705526688164684 575""" 576 577# http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4 578BOUNDARY_W3 = "AaB03x" 579POSTDATA_W3 = """--AaB03x 580Content-Disposition: form-data; name="submit-name" 581 582Larry 583--AaB03x 584Content-Disposition: form-data; name="files" 585Content-Type: multipart/mixed; boundary=BbC04y 586 587--BbC04y 588Content-Disposition: file; filename="file1.txt" 589Content-Type: text/plain 590 591... contents of file1.txt ... 592--BbC04y 593Content-Disposition: file; filename="file2.gif" 594Content-Type: image/gif 595Content-Transfer-Encoding: binary 596 597...contents of file2.gif... 598--BbC04y-- 599--AaB03x-- 600""" 601 602if __name__ == '__main__': 603 unittest.main() 604