test_urllib.py - OpenGrok cross reference for /external/python/cpython3/Lib/test/test

Lines Matching +full:url +full:- +full:parse
3 import urllib.parse
43 def urlopen(url, data=None, proxies=None):  argument
44     """urlopen(url [, data]) -> open file-like object"""
54         return opener.open(url)
56         return opener.open(url, data)
88             self.io_refs -= 1
102             # bpo-36918: HTTPConnection destructor calls close() which calls
161         self.quoted_pathname = urllib.parse.quote(self.pathname)
209         self.assertEqual(self.returned_obj.url, self.quoted_pathname)
358         # Issue #11703: geturl() omits fragments in the original URL.
359         url = 'http://docs.python.org/library/urllib.html#OK'
362             fp = urllib.request.urlopen(url)
363             self.assertEqual(fp.geturl(), url)
384                 # test suite.  They use different url opening codepaths.  Plain
386                 # calls urllib.parse.quote() on the URL which makes all of the
387                 # above attempts at injection within the url _path_ safe.
396                 # This code path quotes the URL so there is no injection.
405         host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
410             # test suite.  They use different url opening codepaths.  Plain
412             # calls urllib.parse.quote() on the URL which makes all of the
413             # above attempts at injection within the url _path_ safe.
420             # This code path quotes the URL so there is no injection.
448         host = "localhost\r\nX-injected: header\r\n"
477 Content-Type: text/html; charset=iso-8859-1
491 Content-Type: text/html; charset=iso-8859-1
494             msg = "Redirection to url 'file:"
581             url = "http://{}@python.org/".format(userpass)
585             fp = urlopen(url)
587             self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
590             # the spaces are quoted in URL so no match
591             self.assertNotEqual(fp.geturl(), url)
611     """Test urlopen() opening a data URL."""
617         # text containing URL special- and unicode-characters
627             "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
630             "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
632         # base64 encoded data URL that contains ignorable spaces,
655             [('text/plain', ''), ('charset', 'ISO-8859-1')])
656         self.assertEqual(self.image_url_resp.info()['content-length'],
659             [('text/plain', ''), ('charset', 'US-ASCII')])
723             filePath.encode("utf-8")
840 Content-Length: 100
841 Content-Type: text/html; charset=iso-8859-1
863 Content-Length: 100
864 Content-Type: text/html; charset=iso-8859-1
879     character you write it as '%' + <2 character US-ASCII hex value>.
888     Data characters : letters, digits, and "-_.!~*'()"
890     Control characters : 0x00 - 0x1F, 0x7F
902         # Make sure quote() does not quote letters, digits, and "_,.-"
906                                  "_.-~"])
907         result = urllib.parse.quote(do_not_quote)
910         result = urllib.parse.quote_plus(do_not_quote)
916         self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
921         result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
924         result = urllib.parse.quote_plus(quote_by_default,
930         result = urllib.parse.quote(quote_by_default, safe=b"<>")
933         # "Safe" non-ASCII characters should have no effect
934         # (Since URIs are not allowed to have non-ASCII characters)
935         result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
936         expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
941         result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
942         expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
950         should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
955             result = urllib.parse.quote(char)
960             result = urllib.parse.quote_plus(char)
968         result = urllib.parse.quote(partial_quote)
971         result = urllib.parse.quote_plus(partial_quote)
978         result = urllib.parse.quote(' ')
981         result = urllib.parse.quote_plus(' ')
986         result = urllib.parse.quote(given)
990         result = urllib.parse.quote_plus(given)
995         self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
997         self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
1000         self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
1003         self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
1007         # Bytes should quote directly to percent-encoded values
1010         result = urllib.parse.quote(given)
1014         self.assertRaises(TypeError, urllib.parse.quote, given,
1015                             encoding="latin-1")
1017         result = urllib.parse.quote_from_bytes(given)
1023         # Characters in Latin-1 range, encoded by default in UTF-8
1026         result = urllib.parse.quote(given)
1029         # Characters in Latin-1 range, encoded by with None (default)
1030         result = urllib.parse.quote(given, encoding=None, errors=None)
1033         # Characters in Latin-1 range, encoded with Latin-1
1036         result = urllib.parse.quote(given, encoding="latin-1")
1039         # Characters in BMP, encoded by default in UTF-8
1042         result = urllib.parse.quote(given)
1045         # Characters in BMP, encoded with Latin-1
1047         self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
1048                                     encoding="latin-1")
1049         # Characters in BMP, encoded with Latin-1, with replace error handling
1052         result = urllib.parse.quote(given, encoding="latin-1",
1056         # Characters in BMP, Latin-1, with xmlcharref error handling
1059         result = urllib.parse.quote(given, encoding="latin-1",
1065         # Encoding (latin-1) test for quote_plus
1068         result = urllib.parse.quote_plus(given, encoding="latin-1")
1074         result = urllib.parse.quote_plus(given, encoding="latin-1",
1093             result = urllib.parse.unquote(given)
1096             result = urllib.parse.unquote_plus(given)
1103         result = urllib.parse.unquote(escape_string)
1107         self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
1108         self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
1111         # Test unquoting on bad percent-escapes
1114         result = urllib.parse.unquote(given)
1119         result = urllib.parse.unquote(given)
1124         result = urllib.parse.unquote(given)
1130         result = urllib.parse.unquote_to_bytes(given)
1135         result = urllib.parse.unquote_to_bytes(given)
1140         result = urllib.parse.unquote_to_bytes(given)
1143         self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1144         self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
1147         # Test unquoting on mixed-case hex digits in the percent-escapes
1150         result = urllib.parse.unquote_to_bytes(given)
1156         # Make sure unquoting works when have non-quoted characters
1160         result = urllib.parse.unquote(given)
1163         result = urllib.parse.unquote_plus(given)
1171         result = urllib.parse.unquote(given)
1175         result = urllib.parse.unquote_plus(given)
1182         result = urllib.parse.unquote_to_bytes(given)
1186         # Test on a string with unescaped non-ASCII characters
1187         # (Technically an invalid URI; expect those characters to be UTF-8
1189         result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1190         expect = b'\xe6\xbc\xa2\xc3\xbc'    # UTF-8 for "\u6f22\u00fc"
1197         result = urllib.parse.unquote_to_bytes(given)
1201         # Test with a bytes as input, with unescaped non-ASCII bytes
1205         result = urllib.parse.unquote_to_bytes(given)
1211         # Characters in the Latin-1 range, encoded with UTF-8
1214         result = urllib.parse.unquote(given)
1217         # Characters in the Latin-1 range, encoded with None (default)
1218         result = urllib.parse.unquote(given, encoding=None, errors=None)
1222         # Characters in the Latin-1 range, encoded with Latin-1
1223         result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1224                                       encoding="latin-1")
1229         # Characters in BMP, encoded with UTF-8
1232         result = urllib.parse.unquote(given)
1236         # Decode with UTF-8, invalid sequence
1239         result = urllib.parse.unquote(given)
1243         # Decode with UTF-8, invalid sequence, replace errors
1244         result = urllib.parse.unquote(given, errors="replace")
1248         # Decode with UTF-8, invalid sequence, ignoring errors
1251         result = urllib.parse.unquote(given, errors="ignore")
1255         # A mix of non-ASCII and percent-encoded characters, UTF-8
1256         result = urllib.parse.unquote("\u6f22%C3%BC")
1261         # A mix of non-ASCII and percent-encoded characters, Latin-1
1262         # (Note, the string contains non-Latin-1-representable characters)
1263         result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1272         result = urllib.parse.unquote(given)
1276         # A mix of non-ASCII hex-encoded characters and ASCII characters
1279         result = urllib.parse.unquote(given)
1283         # A mix of non-ASCII percent-encoded characters and ASCII characters
1286         result = urllib.parse.unquote(given)
1307         result = urllib.parse.urlencode(given)
1316         on_amp_left = result[amp_location - 1]
1332         # Test passing in a sequence of two-item sequences as an argument.
1334                             "using sequence of two-item tuples as input")
1340         result = urllib.parse.urlencode(given)
1344         result = urllib.parse.urlencode(given)
1350         expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1351         result = urllib.parse.urlencode(given)
1353         result = urllib.parse.urlencode(given, True)
1361         self.assertEqual("", urllib.parse.urlencode({}))
1362         self.assertEqual("", urllib.parse.urlencode([]))
1365         self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1366         self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1369         self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1371                          urllib.parse.urlencode({"a": [None, "a"]}, True))
1374                          urllib.parse.urlencode({"a": data}, True))
1380         result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1383         # Default is UTF-8 encoding.
1386         result = urllib.parse.urlencode(given)
1389         # Latin-1 encoding.
1392         result = urllib.parse.urlencode(given, encoding="latin-1")
1399         result = urllib.parse.urlencode(given, doseq=True,
1406         result = urllib.parse.urlencode(given, True,
1410         # Utf-8
1413         result = urllib.parse.urlencode(given, True)
1418         result = urllib.parse.urlencode(given, True)
1421         # latin-1
1424         result = urllib.parse.urlencode(given, True, encoding="latin-1")
1429         result = urllib.parse.urlencode(given, True, encoding="latin-1")
1435         result = urllib.parse.urlencode(given)
1437         result = urllib.parse.urlencode(given, True)
1443         result = urllib.parse.urlencode(given, True)
1449         # Default utf-8 encoding
1452         result = urllib.parse.urlencode(given, safe=":$")
1457         result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1464         result = urllib.parse.urlencode(given, True, safe=":$")
1467         # Test all above in latin-1 encoding
1470         result = urllib.parse.urlencode(given, safe=":$",
1471                                         encoding="latin-1")
1477         result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1478                                         encoding="latin-1")
1482         result = urllib.parse.urlencode(given, True, safe=":$",
1483                                         encoding="latin-1")
1506         expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1517         expect = "%s/using_quote" % urllib.parse.quote("make sure")
1552         for url in given:
1553             result = urllib.request.url2pathname(url)
1577             def open_spam(self, url):  argument
1578                 return url
1602         url = "http://www.python.org/file.txt"
1605         filename, _ = urllib.request.URLopener().retrieve(url)
1610         # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
1612             def open_local_file(self, url):  argument
1613                 return url
1614         for url in ('local_file://example', 'local-file://example'):
1615             self.assertRaises(OSError, urllib.request.urlopen, url)
1616             self.assertRaises(OSError, urllib.request.URLopener().open, url)
1617             self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
1618             self.assertRaises(OSError, DummyURLopener().open, url)
1619             self.assertRaises(OSError, DummyURLopener().retrieve, url)