parse.py - OpenGrok cross reference for /external/python/cpython3/Lib/urllib/parse.py

Lines Matching +full:decode +full:- +full:uri +full:- +full:component
5 RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding
11 RFC 2396:  "Uniform Resource Identifiers (URI)": Generic Syntax by T.
12 Berners-Lee, R. Fielding, and L. Masinter, August 1998.
19 RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.
69 # compatibility.  (They are undocumented, but have a public-looking name.)
85                 '+-.')
103 # presented, we may relax this by using latin-1
117     return tuple(x.decode(encoding, errors) if x else '' for x in args)
120     # Invokes decode if necessary to create str args
123     #   - noop for str inputs
124     #   - encoding function otherwise
127         # We special-case the empty string to support the
130             raise TypeError("Cannot mix str and non-str arguments")
148     def decode(self, encoding='ascii', errors='strict'):  member in _ResultMixinBytes
149         return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self))
184                 raise ValueError("Port out of range 0-65535")
259 A 2-tuple that contains the url without fragment identifier and the fragment
274 A 5-tuple that contains the different components of a URL. Similar to
289 The query component, that contains non-hierarchical data, that along with data
290 in path component, identifies a resource in the scope of URI's scheme and
302 A 6-tuple that contains components of a parsed URL.
309 Parameters for last path element used to dereference the URI in order to provide
360 # Set up the encode/decode result pairs
378     The result is a named 6-tuple with fields corresponding to the
382     The username, password, hostname, and port sub-components of netloc
386     component when no scheme is found in url.
389     fragment component from the previous component, which can be either
440 # https://www.rfc-editor.org/rfc/rfc3986#page-49 and https://url.spec.whatwg.org/
443         if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", hostname):
457     The result is a named 5-tuple with fields corresponding to the
461     The username, password, hostname, and port sub-components of netloc
465     component when no scheme is found in url.
468     fragment component from the previous component, which can be either
476     # (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both)
522     complete URL as a string. The data argument can be any five-item iterable.
570     if base_parts[-1] != '':
573         del base_parts[-1]
580         # filter out elements that would cause redundant slashes on re-joining
582         segments[1:-1] = filter(None, segments[1:-1])
599     if segments[-1] in ('.', '..'):
600         # do some post-processing here. if the last segment was a relative dir,
628     """unquote_to_bytes('abc%20def') -> b'abc def'."""
629     # Note: strings are encoded as UTF-8. This is only an issue if it contains
630     # unescaped non-ASCII characters, which URIs should not.
632         # Is it a string-like object?
636         string = string.encode('utf-8')
657 _asciire = re.compile('([\x00-\x7f]+)')
659 def unquote(string, encoding='utf-8', errors='replace'):
660     """Replace %xx escapes by their single-character equivalent. The optional
661     encoding and errors parameters specify how to decode percent-encoded
662     sequences into Unicode characters, as accepted by the bytes.decode()
664     By default, percent-encoded sequences are decoded with UTF-8, and invalid
667     unquote('abc%20def') -> 'abc def'.
670         return unquote_to_bytes(string).decode(encoding, errors)
675         encoding = 'utf-8'
682         append(unquote_to_bytes(bits[i]).decode(encoding, errors))
688              encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
693         qs: percent-encoded query string to be parsed
696             percent-encoded queries should be treated as blank strings.
706         encoding and errors: specify how to decode percent-encoded sequences
707             into Unicode characters, as accepted by the bytes.decode() method.
730               encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
735         qs: percent-encoded query string to be parsed
738             percent-encoded queries should be treated as blank strings.
747         encoding and errors: specify how to decode percent-encoded sequences
748             into Unicode characters, as accepted by the bytes.decode() method.
756         Returns a list, as G-d intended.
781             # Handle case of a control-name with no equal sign
796 def unquote_plus(string, encoding='utf-8', errors='replace'):
800     unquote_plus('%7e/abc+def') -> '~/abc def'
808                          b'_.-~')
823     String values are percent-encoded byte values, unless the key < 128, and
842     """quote('abc def') -> 'abc%20def'
849     RFC 3986 Uniform Resource Identifier (URI): Generic Syntax lists
852     unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
853     reserved      = gen-delims / sub-delims
854     gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
855     sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
858     Each of the reserved characters is reserved in some component of a URL,
861     The quote function %-escapes all characters that are neither in the
876     non-ASCII characters, as accepted by the str.encode method.
877     By default, encoding='utf-8' (characters are encoded with UTF-8), and
884             encoding = 'utf-8'
919     not perform string-to-bytes encoding.  It always returns an ASCII string.
920     quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f'
927         # Normalize 'safe' by converting to bytes and removing non-ASCII chars
933         return bs.decode()
939     """Encode a dict or sequence of two-element tuples into a URL query string.
944     If the query arg is a sequence of two-element tuples, the order of the
951     specified by quote_via (encoding and errors only if a component is a str).
957         # It's a bother at times that strings and string-like objects are
960             # non-sequence items should not work with len()
961             # non-empty strings will fail this
964             # Zero-length sequences of all types will get here and succeed,
969             raise TypeError("not a valid non-string sequence "
1000                     # Is this a sufficient test for sequence-ness?
1024     """to_bytes(u"URL") --> 'URL'."""
1030             url = url.encode("ASCII").decode()
1033                                " contains non-ASCII characters")
1043     if url[:1] == '<' and url[-1:] == '>':
1044         url = url[1:-1].strip()
1059     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1080     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1102     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1115     """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1127 # splittag('/path#tag') --> '/path', 'tag'
1130     """splitport('host:port') --> 'host', 'port'."""
1133         _portprog = re.compile('(.*):([0-9]*)', re.DOTALL)
1143 def splitnport(host, defport=-1):
1150 def _splitnport(host, defport=-1):
1152     Return given default port if no ':' found; defaults to -1.
1175     """splitquery('/path?query') --> '/path', 'query'."""
1190     """splittag('/path#tag') --> '/path', 'tag'."""
1205     """splitattr('/path;attr1=value1;attr2=value2;...') ->
1219     """splitvalue('attr=value') --> 'attr', 'value'."""