• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 Square, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.squareup.okhttp;
17 
18 import java.net.IDN;
19 import java.net.InetAddress;
20 import java.net.MalformedURLException;
21 import java.net.URI;
22 import java.net.URISyntaxException;
23 import java.net.URL;
24 import java.net.UnknownHostException;
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.Collections;
28 import java.util.LinkedHashSet;
29 import java.util.List;
30 import java.util.Locale;
31 import java.util.Set;
32 import okio.Buffer;
33 
34 /**
35  * A uniform resource locator (URL) with a scheme of either {@code http} or {@code https}. Use this
36  * class to compose and decompose Internet addresses. For example, this code will compose and print
37  * a URL for Google search: <pre>   {@code
38  *
39  *   HttpUrl url = new HttpUrl.Builder()
40  *       .scheme("https")
41  *       .host("www.google.com")
42  *       .addPathSegment("search")
43  *       .addQueryParameter("q", "polar bears")
44  *       .build();
45  *   System.out.println(url);
46  * }</pre>
47  *
48  * which prints: <pre>   {@code
49  *
50  *     https://www.google.com/search?q=polar%20bears
51  * }</pre>
52  *
53  * As another example, this code prints the human-readable query parameters of a Twitter search:
54  * <pre>   {@code
55  *
56  *   HttpUrl url = HttpUrl.parse("https://twitter.com/search?q=cute%20%23puppies&f=images");
57  *   for (int i = 0, size = url.querySize(); i < size; i++) {
58  *     System.out.println(url.queryParameterName(i) + ": " + url.queryParameterValue(i));
59  *   }
60  * }</pre>
61  *
62  * which prints: <pre>   {@code
63  *
64  *   q: cute #puppies
65  *   f: images
66  * }</pre>
67  *
68  * In addition to composing URLs from their component parts and decomposing URLs into their
69  * component parts, this class implements relative URL resolution: what address you'd reach by
70  * clicking a relative link on a specified page. For example: <pre>   {@code
71  *
72  *   HttpUrl base = HttpUrl.parse("https://www.youtube.com/user/WatchTheDaily/videos");
73  *   HttpUrl link = base.resolve("../../watch?v=cbP2N1BQdYc");
74  *   System.out.println(link);
75  * }</pre>
76  *
77  * which prints: <pre>   {@code
78  *
79  *   https://www.youtube.com/watch?v=cbP2N1BQdYc
80  * }</pre>
81  *
82  * <h3>What's in a URL?</h3>
83  *
84  * A URL has several components.
85  *
86  * <h4>Scheme</h4>
87  * Sometimes referred to as <i>protocol</i>, A URL's scheme describes what mechanism should be used
88  * to retrieve the resource. Although URLs have many schemes ({@code mailto}, {@code file}, {@code
89  * ftp}), this class only supports {@code http} and {@code https}. Use {@link URI java.net.URI} for
90  * URLs with arbitrary schemes.
91  *
92  * <h4>Username and Password</h4>
93  * Username and password are either present, or the empty string {@code ""} if absent. This class
94  * offers no mechanism to differentiate empty from absent. Neither of these components are popular
95  * in practice. Typically HTTP applications use other mechanisms for user identification and
96  * authentication.
97  *
98  * <h4>Host</h4>
99  * The host identifies the webserver that serves the URL's resource. It is either a hostname like
100  * {@code square.com} or {@code localhost}, an IPv4 address like {@code 192.168.0.1}, or an IPv6
101  * address like {@code ::1}.
102  *
103  * <p>Usually a webserver is reachable with multiple identifiers: its IP addresses, registered
104  * domain names, and even {@code localhost} when connecting from the server itself. Each of a
105  * webserver's names is a distinct URL and they are not interchangeable. For example, even if
106  * {@code http://square.github.io/dagger} and {@code http://google.github.io/dagger} are served by
107  * the same IP address, the two URLs identify different resources.
108  *
109  * <h4>Port</h4>
110  * The port used to connect to the webserver. By default this is 80 for HTTP and 443 for HTTPS. This
111  * class never returns -1 for the port: if no port is explicitly specified in the URL then the
112  * scheme's default is used.
113  *
114  * <h4>Path</h4>
115  * The path identifies a specific resource on the host. Paths have a hierarchical structure like
116  * "/square/okhttp/issues/1486". Each path segment is prefixed with "/". This class offers methods
117  * to compose and decompose paths by segment. If a path's last segment is the empty string, then the
118  * path ends with "/". This class always builds non-empty paths: if the path is omitted it defaults
119  * to "/", which is a path whose only segment is the empty string.
120  *
121  * <h4>Query</h4>
122  * The query is optional: it can be null, empty, or non-empty. For many HTTP URLs the query string
123  * is subdivided into a collection of name-value parameters. This class offers methods to set the
124  * query as the single string, or as individual name-value parameters. With name-value parameters
125  * the values are optional and names may be repeated.
126  *
127  * <h4>Fragment</h4>
128  * The fragment is optional: it can be null, empty, or non-empty. Unlike host, port, path, and query
129  * the fragment is not sent to the webserver: it's private to the client.
130  *
131  * <h3>Encoding</h3>
132  * Each component must be encoded before it is embedded in the complete URL. As we saw above, the
133  * string {@code cute #puppies} is encoded as {@code cute%20%23puppies} when used as a query
134  * parameter value.
135  *
136  * <h4>Percent encoding</h4>
137  * Percent encoding replaces a character (like {@code \ud83c\udf69}) with its UTF-8 hex bytes (like
138  * {@code %F0%9F%8D%A9}). This approach works for whitespace characters, control characters,
139  * non-ASCII characters, and characters that already have another meaning in a particular context.
140  *
141  * <p>Percent encoding is used in every URL component except for the hostname. But the set of
142  * characters that need to be encoded is different for each component. For example, the path
143  * component must escape all of its {@code ?} characters, otherwise it could be interpreted as the
144  * start of the URL's query. But within the query and fragment components, the {@code ?} character
145  * doesn't delimit anything and doesn't need to be escaped. <pre>   {@code
146  *
147  *   HttpUrl url = HttpUrl.parse("http://who-let-the-dogs.out").newBuilder()
148  *       .addPathSegment("_Who?_")
149  *       .query("_Who?_")
150  *       .fragment("_Who?_")
151  *       .build();
152  *   System.out.println(url);
153  * }</pre>
154  *
155  * This prints: <pre>   {@code
156  *
157  *   http://who-let-the-dogs.out/_Who%3F_?_Who?_#_Who?_
158  * }</pre>
159  *
160  * When parsing URLs that lack percent encoding where it is required, this class will percent encode
161  * the offending characters.
162  *
163  * <h4>IDNA Mapping and Punycode encoding</h4>
164  * Hostnames have different requirements and use a different encoding scheme. It consists of IDNA
165  * mapping and Punycode encoding.
166  *
167  * <p>In order to avoid confusion and discourage phishing attacks,
168  * <a href="http://www.unicode.org/reports/tr46/#ToASCII">IDNA Mapping</a> transforms names to avoid
169  * confusing characters. This includes basic case folding: transforming shouting {@code SQUARE.COM}
170  * into cool and casual {@code square.com}. It also handles more exotic characters. For example, the
171  * Unicode trademark sign (™) could be confused for the letters "TM" in {@code http://ho™mail.com}.
172  * To mitigate this, the single character (™) maps to the string (tm). There is similar policy for
173  * all of the 1.1 million Unicode code points. Note that some code points such as "\ud83c\udf69" are
174  * not mapped and cannot be used in a hostname.
175  *
176  * <p><a href="http://ietf.org/rfc/rfc3492.txt">Punycode</a> converts a Unicode string to an ASCII
177  * string to make international domain names work everywhere. For example, "σ" encodes as
178  * "xn--4xa". The encoded string is not human readable, but can be used with classes like {@link
179  * InetAddress} to establish connections.
180  *
181  * <h3>Why another URL model?</h3>
182  * Java includes both {@link URL java.net.URL} and {@link URI java.net.URI}. We offer a new URL
183  * model to address problems that the others don't.
184  *
185  * <h4>Different URLs should be different</h4>
186  * Although they have different content, {@code java.net.URL} considers the following two URLs
187  * equal, and the {@link Object#equals equals()} method between them returns true:
188  * <ul>
189  *   <li>http://square.github.io/
190  *   <li>http://google.github.io/
191  * </ul>
192  * This is because those two hosts share the same IP address. This is an old, bad design decision
193  * that makes {@code java.net.URL} unusable for many things. It shouldn't be used as a {@link
194  * java.util.Map Map} key or in a {@link Set}. Doing so is both inefficient because equality may
195  * require a DNS lookup, and incorrect because unequal URLs may be equal because of how they are
196  * hosted.
197  *
198  * <h4>Equal URLs should be equal</h4>
199  * These two URLs are semantically identical, but {@code java.net.URI} disagrees:
200  * <ul>
201  *   <li>http://host:80/
202  *   <li>http://host
203  * </ul>
204  * Both the unnecessary port specification ({@code :80}) and the absent trailing slash ({@code /})
205  * cause URI to bucket the two URLs separately. This harms URI's usefulness in collections. Any
206  * application that stores information-per-URL will need to either canonicalize manually, or suffer
207  * unnecessary redundancy for such URLs.
208  *
209  * <p>Because they don't attempt canonical form, these classes are surprisingly difficult to use
210  * securely. Suppose you're building a webservice that checks that incoming paths are prefixed
211  * "/static/images/" before serving the corresponding assets from the filesystem. <pre>   {@code
212  *
213  *   String attack = "http://example.com/static/images/../../../../../etc/passwd";
214  *   System.out.println(new URL(attack).getPath());
215  *   System.out.println(new URI(attack).getPath());
216  *   System.out.println(HttpUrl.parse(attack).path());
217  * }</pre>
218  *
219  * By canonicalizing the input paths, they are complicit in directory traversal attacks. Code that
220  * checks only the path prefix may suffer!
221  * <pre>   {@code
222  *
223  *    /static/images/../../../../../etc/passwd
224  *    /static/images/../../../../../etc/passwd
225  *    /etc/passwd
226  * }</pre>
227  *
228  * <h4>If it works on the web, it should work in your application</h4>
229  * The {@code java.net.URI} class is strict around what URLs it accepts. It rejects URLs like
230  * "http://example.com/abc|def" because the '|' character is unsupported. This class is more
231  * forgiving: it will automatically percent-encode the '|', yielding "http://example.com/abc%7Cdef".
232  * This kind behavior is consistent with web browsers. {@code HttpUrl} prefers consistency with
233  * major web browsers over consistency with obsolete specifications.
234  *
235  * <h4>Paths and Queries should decompose</h4>
236  * Neither of the built-in URL models offer direct access to path segments or query parameters.
237  * Manually using {@code StringBuilder} to assemble these components is cumbersome: do '+'
238  * characters get silently replaced with spaces? If a query parameter contains a '&amp;', does that
239  * get escaped? By offering methods to read and write individual query parameters directly,
240  * application developers are saved from the hassles of encoding and decoding.
241  *
242  * <h4>Plus a modern API</h4>
243  * The URL (JDK1.0) and URI (Java 1.4) classes predate builders and instead use telescoping
244  * constructors. For example, there's no API to compose a URI with a custom port without also
245  * providing a query and fragment.
246  *
247  * <p>Instances of {@link HttpUrl} are well-formed and always have a scheme, host, and path. With
248  * {@code java.net.URL} it's possible to create an awkward URL like {@code http:/} with scheme and
249  * path but no hostname. Building APIs that consume such malformed values is difficult!
250  *
251  * <p>This class has a modern API. It avoids punitive checked exceptions: {@link #parse parse()}
252  * returns null if the input is an invalid URL. You can even be explicit about whether each
253  * component has been encoded already.
254  */
255 public final class HttpUrl {
256   private static final char[] HEX_DIGITS =
257       { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
258   static final String USERNAME_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#";
259   static final String PASSWORD_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#";
260   static final String PATH_SEGMENT_ENCODE_SET = " \"<>^`{}|/\\?#";
261   static final String PATH_SEGMENT_ENCODE_SET_URI = "[]";
262   // ANDROID-CHANGED: http://b/30405333 - we do not encode single quote as %27 in query strings.
263   // static final String QUERY_ENCODE_SET = " \"'<>#";
264   // static final String QUERY_COMPONENT_ENCODE_SET = " \"'<>#&=";
265   static final String QUERY_ENCODE_SET = " \"<>#";
266   static final String QUERY_COMPONENT_ENCODE_SET = " \"<>#&=";
267   // ANDROID-CHANGED end.
268   static final String QUERY_COMPONENT_ENCODE_SET_URI = "\\^`{|}";
269   static final String FORM_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#&!$(),~";
270   static final String FRAGMENT_ENCODE_SET = "";
271   static final String FRAGMENT_ENCODE_SET_URI = " \"#<>\\^`{|}";
272 
273   /** Either "http" or "https". */
274   private final String scheme;
275 
276   /** Decoded username. */
277   private final String username;
278 
279   /** Decoded password. */
280   private final String password;
281 
282   /** Canonical hostname. */
283   private final String host;
284 
285   /** Either 80, 443 or a user-specified port. In range [1..65535]. */
286   private final int port;
287 
288   /**
289    * A list of canonical path segments. This list always contains at least one element, which may
290    * be the empty string. Each segment is formatted with a leading '/', so if path segments were
291    * ["a", "b", ""], then the encoded path would be "/a/b/".
292    */
293   private final List<String> pathSegments;
294 
295   /**
296    * Alternating, decoded query names and values, or null for no query. Names may be empty or
297    * non-empty, but never null. Values are null if the name has no corresponding '=' separator, or
298    * empty, or non-empty.
299    */
300   private final List<String> queryNamesAndValues;
301 
302   /** Decoded fragment. */
303   private final String fragment;
304 
305   /** Canonical URL. */
306   private final String url;
307 
HttpUrl(Builder builder)308   private HttpUrl(Builder builder) {
309     this.scheme = builder.scheme;
310     this.username = percentDecode(builder.encodedUsername, false);
311     this.password = percentDecode(builder.encodedPassword, false);
312     this.host = builder.host;
313     this.port = builder.effectivePort();
314     this.pathSegments = percentDecode(builder.encodedPathSegments, false);
315     this.queryNamesAndValues = builder.encodedQueryNamesAndValues != null
316         ? percentDecode(builder.encodedQueryNamesAndValues, true)
317         : null;
318     this.fragment = builder.encodedFragment != null
319         ? percentDecode(builder.encodedFragment, false)
320         : null;
321     this.url = builder.toString();
322   }
323 
324   /** Returns this URL as a {@link URL java.net.URL}. */
url()325   public URL url() {
326     try {
327       return new URL(url);
328     } catch (MalformedURLException e) {
329       throw new RuntimeException(e); // Unexpected!
330     }
331   }
332 
333   /**
334    * Returns this URL as a {@link URI java.net.URI}. Because {@code URI} is more strict than this
335    * class, the returned URI may be semantically different from this URL:
336    * <ul>
337    *   <li>Characters forbidden by URI like {@code [} and {@code |} will be escaped.
338    *   <li>Invalid percent-encoded sequences like {@code %xx} will be encoded like {@code %25xx}.
339    *   <li>Whitespace and control characters in the fragment will be stripped.
340    * </ul>
341    *
342    * <p>These differences may have a significant consequence when the URI is interpretted by a
343    * webserver. For this reason the {@linkplain URI URI class} and this method should be avoided.
344    */
uri()345   public URI uri() {
346     String uri = newBuilder().reencodeForUri().toString();
347     try {
348       return new URI(uri);
349     } catch (URISyntaxException e) {
350       // Unlikely edge case: the URI has a forbidden character in the fragment. Strip it & retry.
351       try {
352         String stripped = uri.replaceAll("[\\u0000-\\u001F\\u007F-\\u009F\\p{javaWhitespace}]", "");
353         return URI.create(stripped);
354       } catch (Exception e1) {
355         throw new RuntimeException(e); // Unexpected!
356       }
357     }
358   }
359 
360   /** Returns either "http" or "https". */
scheme()361   public String scheme() {
362     return scheme;
363   }
364 
isHttps()365   public boolean isHttps() {
366     return scheme.equals("https");
367   }
368 
369   /** Returns the username, or an empty string if none is set. */
encodedUsername()370   public String encodedUsername() {
371     if (username.isEmpty()) return "";
372     int usernameStart = scheme.length() + 3; // "://".length() == 3.
373     int usernameEnd = delimiterOffset(url, usernameStart, url.length(), ":@");
374     return url.substring(usernameStart, usernameEnd);
375   }
376 
username()377   public String username() {
378     return username;
379   }
380 
381   /** Returns the password, or an empty string if none is set. */
encodedPassword()382   public String encodedPassword() {
383     if (password.isEmpty()) return "";
384     int passwordStart = url.indexOf(':', scheme.length() + 3) + 1;
385     int passwordEnd = url.indexOf('@');
386     return url.substring(passwordStart, passwordEnd);
387   }
388 
389   /** Returns the decoded password, or an empty string if none is present. */
password()390   public String password() {
391     return password;
392   }
393 
394   /**
395    * Returns the host address suitable for use with {@link InetAddress#getAllByName(String)}. May
396    * be:
397    * <ul>
398    *   <li>A regular host name, like {@code android.com}.
399    *   <li>An IPv4 address, like {@code 127.0.0.1}.
400    *   <li>An IPv6 address, like {@code ::1}. Note that there are no square braces.
401    *   <li>An encoded IDN, like {@code xn--n3h.net}.
402    * </ul>
403    */
host()404   public String host() {
405     return host;
406   }
407 
408   /**
409    * Same as {@link #host} except that literal IPv6 addresses are surrounding by square
410    * braces. For example, this method will return {@code [::1]} where {@code host} returns
411    * {@code ::1}.
412    */
rfc2732host()413   public String rfc2732host() {
414     if (host.indexOf(':') == -1) {
415       return host;
416     }
417 
418     return "[" + host + "]";
419   }
420 
421   /**
422    * Returns the explicitly-specified port if one was provided, or the default port for this URL's
423    * scheme. For example, this returns 8443 for {@code https://square.com:8443/} and 443 for {@code
424    * https://square.com/}. The result is in {@code [1..65535]}.
425    */
port()426   public int port() {
427     return port;
428   }
429 
430   /**
431    * Returns 80 if {@code scheme.equals("http")}, 443 if {@code scheme.equals("https")} and -1
432    * otherwise.
433    */
defaultPort(String scheme)434   public static int defaultPort(String scheme) {
435     if (scheme.equals("http")) {
436       return 80;
437     } else if (scheme.equals("https")) {
438       return 443;
439     } else {
440       return -1;
441     }
442   }
443 
pathSize()444   public int pathSize() {
445     return pathSegments.size();
446   }
447 
448   /**
449    * Returns the entire path of this URL, encoded for use in HTTP resource resolution.
450    // ANDROID-BEGIN: http://b/29983827
451    //   * The returned path is always nonempty and is prefixed with {@code /}.
452    // ANDROID-END: http://b/29983827
453    */
encodedPath()454   public String encodedPath() {
455     int pathStart = url.indexOf('/', scheme.length() + 3); // "://".length() == 3.
456     // ANDROID-BEGIN: http://b/29983827
457     if (pathStart == -1) {
458       return "";
459     }
460     // ANDROID-END: http://b/29983827
461     int pathEnd = delimiterOffset(url, pathStart, url.length(), "?#");
462     return url.substring(pathStart, pathEnd);
463   }
464 
pathSegmentsToString(StringBuilder out, List<String> pathSegments)465   static void pathSegmentsToString(StringBuilder out, List<String> pathSegments) {
466     for (int i = 0, size = pathSegments.size(); i < size; i++) {
467       out.append('/');
468       out.append(pathSegments.get(i));
469     }
470   }
471 
encodedPathSegments()472   public List<String> encodedPathSegments() {
473     int pathStart = url.indexOf('/', scheme.length() + 3);
474     // ANDROID-BEGIN: http://b/29983827
475     if (pathStart == -1) {
476       return new ArrayList<>();
477     }
478     // ANDROID-END: http://b/29983827
479 
480     int pathEnd = delimiterOffset(url, pathStart, url.length(), "?#");
481     List<String> result = new ArrayList<>();
482     for (int i = pathStart; i < pathEnd; ) {
483       i++; // Skip the '/'.
484       int segmentEnd = delimiterOffset(url, i, pathEnd, "/");
485       result.add(url.substring(i, segmentEnd));
486       i = segmentEnd;
487     }
488     return result;
489   }
490 
pathSegments()491   public List<String> pathSegments() {
492     return pathSegments;
493   }
494 
495   /**
496    * Returns the query of this URL, encoded for use in HTTP resource resolution. The returned string
497    * may be null (for URLs with no query), empty (for URLs with an empty query) or non-empty (all
498    * other URLs).
499    */
encodedQuery()500   public String encodedQuery() {
501     if (queryNamesAndValues == null) return null; // No query.
502     int queryStart = url.indexOf('?') + 1;
503     int queryEnd = delimiterOffset(url, queryStart + 1, url.length(), "#");
504     return url.substring(queryStart, queryEnd);
505   }
506 
namesAndValuesToQueryString(StringBuilder out, List<String> namesAndValues)507   static void namesAndValuesToQueryString(StringBuilder out, List<String> namesAndValues) {
508     for (int i = 0, size = namesAndValues.size(); i < size; i += 2) {
509       String name = namesAndValues.get(i);
510       String value = namesAndValues.get(i + 1);
511       if (i > 0) out.append('&');
512       out.append(name);
513       if (value != null) {
514         out.append('=');
515         out.append(value);
516       }
517     }
518   }
519 
520   /**
521    * Cuts {@code encodedQuery} up into alternating parameter names and values. This divides a
522    * query string like {@code subject=math&easy&problem=5-2=3} into the list {@code ["subject",
523    * "math", "easy", null, "problem", "5-2=3"]}. Note that values may be null and may contain
524    * '=' characters.
525    */
queryStringToNamesAndValues(String encodedQuery)526   static List<String> queryStringToNamesAndValues(String encodedQuery) {
527     List<String> result = new ArrayList<>();
528     for (int pos = 0; pos <= encodedQuery.length(); ) {
529       int ampersandOffset = encodedQuery.indexOf('&', pos);
530       if (ampersandOffset == -1) ampersandOffset = encodedQuery.length();
531 
532       int equalsOffset = encodedQuery.indexOf('=', pos);
533       if (equalsOffset == -1 || equalsOffset > ampersandOffset) {
534         result.add(encodedQuery.substring(pos, ampersandOffset));
535         result.add(null); // No value for this name.
536       } else {
537         result.add(encodedQuery.substring(pos, equalsOffset));
538         result.add(encodedQuery.substring(equalsOffset + 1, ampersandOffset));
539       }
540       pos = ampersandOffset + 1;
541     }
542     return result;
543   }
544 
query()545   public String query() {
546     if (queryNamesAndValues == null) return null; // No query.
547     StringBuilder result = new StringBuilder();
548     namesAndValuesToQueryString(result, queryNamesAndValues);
549     return result.toString();
550   }
551 
querySize()552   public int querySize() {
553     return queryNamesAndValues != null ? queryNamesAndValues.size() / 2 : 0;
554   }
555 
556   /**
557    * Returns the first query parameter named {@code name} decoded using UTF-8, or null if there is
558    * no such query parameter.
559    */
queryParameter(String name)560   public String queryParameter(String name) {
561     if (queryNamesAndValues == null) return null;
562     for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) {
563       if (name.equals(queryNamesAndValues.get(i))) {
564         return queryNamesAndValues.get(i + 1);
565       }
566     }
567     return null;
568   }
569 
queryParameterNames()570   public Set<String> queryParameterNames() {
571     if (queryNamesAndValues == null) return Collections.emptySet();
572     Set<String> result = new LinkedHashSet<>();
573     for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) {
574       result.add(queryNamesAndValues.get(i));
575     }
576     return Collections.unmodifiableSet(result);
577   }
578 
queryParameterValues(String name)579   public List<String> queryParameterValues(String name) {
580     if (queryNamesAndValues == null) return Collections.emptyList();
581     List<String> result = new ArrayList<>();
582     for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) {
583       if (name.equals(queryNamesAndValues.get(i))) {
584         result.add(queryNamesAndValues.get(i + 1));
585       }
586     }
587     return Collections.unmodifiableList(result);
588   }
589 
queryParameterName(int index)590   public String queryParameterName(int index) {
591     return queryNamesAndValues.get(index * 2);
592   }
593 
queryParameterValue(int index)594   public String queryParameterValue(int index) {
595     return queryNamesAndValues.get(index * 2 + 1);
596   }
597 
encodedFragment()598   public String encodedFragment() {
599     if (fragment == null) return null;
600     int fragmentStart = url.indexOf('#') + 1;
601     return url.substring(fragmentStart);
602   }
603 
fragment()604   public String fragment() {
605     return fragment;
606   }
607 
608   /** Returns the URL that would be retrieved by following {@code link} from this URL. */
resolve(String link)609   public HttpUrl resolve(String link) {
610     // ANDROID-BEGIN: http://b/29983827
611     // Builder builder = new Builder();
612     Builder builder = new Builder(false);
613     // ANDROID-END: http://b/29983827
614     Builder.ParseResult result = builder.parse(this, link);
615     return result == Builder.ParseResult.SUCCESS ? builder.build() : null;
616   }
617 
newBuilder()618   public Builder newBuilder() {
619     // ANDROID-BEGIN: http://b/29983827
620     // Builder builder = new Builder();
621     Builder result = new Builder(false);
622     // ANDROID-END: http://b/29983827
623     result.scheme = scheme;
624     result.encodedUsername = encodedUsername();
625     result.encodedPassword = encodedPassword();
626     result.host = host;
627     // If we're set to a default port, unset it in case of a scheme change.
628     result.port = port != defaultPort(scheme) ? port : -1;
629     result.encodedPathSegments.clear();
630     result.encodedPathSegments.addAll(encodedPathSegments());
631     result.encodedQuery(encodedQuery());
632     result.encodedFragment = encodedFragment();
633     return result;
634   }
635 
636   /**
637    * Returns a new {@code HttpUrl} representing {@code url} if it is a well-formed HTTP or HTTPS
638    * URL, or null if it isn't.
639    */
parse(String url)640   public static HttpUrl parse(String url) {
641     // ANDROID-BEGIN: http://b/29983827
642     // Builder builder = new Builder();
643     Builder builder = new Builder(false);
644     // ANDROID-END: http://b/29983827
645     Builder.ParseResult result = builder.parse(null, url);
646     return result == Builder.ParseResult.SUCCESS ? builder.build() : null;
647   }
648 
649   /**
650    * Returns an {@link HttpUrl} for {@code url} if its protocol is {@code http} or {@code https}, or
651    * null if it has any other protocol.
652    */
get(URL url)653   public static HttpUrl get(URL url) {
654     return parse(url.toString());
655   }
656 
657   /**
658    * Returns a new {@code HttpUrl} representing {@code url} if it is a well-formed HTTP or HTTPS
659    * URL, or throws an exception if it isn't.
660    *
661    * @throws MalformedURLException if there was a non-host related URL issue
662    * @throws UnknownHostException if the host was invalid
663    */
getChecked(String url)664   static HttpUrl getChecked(String url) throws MalformedURLException, UnknownHostException {
665     // ANDROID-END: http://b/29983827
666     // Builder builder = new Builder();
667     Builder builder = new Builder(false);
668     // ANDROID-END: http://b/29983827
669     Builder.ParseResult result = builder.parse(null, url);
670     switch (result) {
671       case SUCCESS:
672         return builder.build();
673       case INVALID_HOST:
674         throw new UnknownHostException("Invalid host: " + url);
675       case UNSUPPORTED_SCHEME:
676       case MISSING_SCHEME:
677       case INVALID_PORT:
678       default:
679         throw new MalformedURLException("Invalid URL: " + result + " for " + url);
680     }
681   }
682 
get(URI uri)683   public static HttpUrl get(URI uri) {
684     return parse(uri.toString());
685   }
686 
equals(Object o)687   @Override public boolean equals(Object o) {
688     return o instanceof HttpUrl && ((HttpUrl) o).url.equals(url);
689   }
690 
hashCode()691   @Override public int hashCode() {
692     return url.hashCode();
693   }
694 
toString()695   @Override public String toString() {
696     return url;
697   }
698 
699   public static final class Builder {
700     String scheme;
701     String encodedUsername = "";
702     String encodedPassword = "";
703     String host;
704     int port = -1;
705     final List<String> encodedPathSegments = new ArrayList<>();
706     List<String> encodedQueryNamesAndValues;
707     String encodedFragment;
708 
709     // ANDROID-BEGIN: http://b/29983827
710     // public Builder() {
711     //   encodedPathSegments.add(""); // The default path is '/' which needs a trailing space.
712     // }
713 
Builder()714     public Builder() {
715       this(true); // // The default path is '/' which needs a trailing space.
716     }
717 
Builder(boolean startWithSlash)718     private Builder(boolean startWithSlash) {
719       if (startWithSlash) {
720         encodedPathSegments.add("");
721       }
722     }
723     // ANDROID-END: http://b/29983827
724 
scheme(String scheme)725     public Builder scheme(String scheme) {
726       if (scheme == null) {
727         throw new IllegalArgumentException("scheme == null");
728       } else if (scheme.equalsIgnoreCase("http")) {
729         this.scheme = "http";
730       } else if (scheme.equalsIgnoreCase("https")) {
731         this.scheme = "https";
732       } else {
733         throw new IllegalArgumentException("unexpected scheme: " + scheme);
734       }
735       return this;
736     }
737 
username(String username)738     public Builder username(String username) {
739       if (username == null) throw new IllegalArgumentException("username == null");
740       this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false, false, true);
741       return this;
742     }
743 
encodedUsername(String encodedUsername)744     public Builder encodedUsername(String encodedUsername) {
745       if (encodedUsername == null) throw new IllegalArgumentException("encodedUsername == null");
746       this.encodedUsername = canonicalize(
747           encodedUsername, USERNAME_ENCODE_SET, true, false, false, true);
748       return this;
749     }
750 
password(String password)751     public Builder password(String password) {
752       if (password == null) throw new IllegalArgumentException("password == null");
753       this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false, false, true);
754       return this;
755     }
756 
encodedPassword(String encodedPassword)757     public Builder encodedPassword(String encodedPassword) {
758       if (encodedPassword == null) throw new IllegalArgumentException("encodedPassword == null");
759       this.encodedPassword = canonicalize(
760           encodedPassword, PASSWORD_ENCODE_SET, true, false, false, true);
761       return this;
762     }
763 
764     /**
765      * @param host either a regular hostname, International Domain Name, IPv4 address, or IPv6
766      *     address.
767      */
host(String host)768     public Builder host(String host) {
769       if (host == null) throw new IllegalArgumentException("host == null");
770       String encoded = canonicalizeHost(host, 0, host.length());
771       if (encoded == null) throw new IllegalArgumentException("unexpected host: " + host);
772       this.host = encoded;
773       return this;
774     }
775 
port(int port)776     public Builder port(int port) {
777       if (port <= 0 || port > 65535) throw new IllegalArgumentException("unexpected port: " + port);
778       this.port = port;
779       return this;
780     }
781 
effectivePort()782     int effectivePort() {
783       return port != -1 ? port : defaultPort(scheme);
784     }
785 
addPathSegment(String pathSegment)786     public Builder addPathSegment(String pathSegment) {
787       if (pathSegment == null) throw new IllegalArgumentException("pathSegment == null");
788       push(pathSegment, 0, pathSegment.length(), false, false);
789       return this;
790     }
791 
addEncodedPathSegment(String encodedPathSegment)792     public Builder addEncodedPathSegment(String encodedPathSegment) {
793       if (encodedPathSegment == null) {
794         throw new IllegalArgumentException("encodedPathSegment == null");
795       }
796       push(encodedPathSegment, 0, encodedPathSegment.length(), false, true);
797       return this;
798     }
799 
setPathSegment(int index, String pathSegment)800     public Builder setPathSegment(int index, String pathSegment) {
801       if (pathSegment == null) throw new IllegalArgumentException("pathSegment == null");
802       String canonicalPathSegment = canonicalize(
803           pathSegment, 0, pathSegment.length(), PATH_SEGMENT_ENCODE_SET, false, false, false, true);
804       if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) {
805         throw new IllegalArgumentException("unexpected path segment: " + pathSegment);
806       }
807       encodedPathSegments.set(index, canonicalPathSegment);
808       return this;
809     }
810 
setEncodedPathSegment(int index, String encodedPathSegment)811     public Builder setEncodedPathSegment(int index, String encodedPathSegment) {
812       if (encodedPathSegment == null) {
813         throw new IllegalArgumentException("encodedPathSegment == null");
814       }
815       String canonicalPathSegment = canonicalize(encodedPathSegment,
816           0, encodedPathSegment.length(), PATH_SEGMENT_ENCODE_SET, true, false, false, true);
817       encodedPathSegments.set(index, canonicalPathSegment);
818       if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) {
819         throw new IllegalArgumentException("unexpected path segment: " + encodedPathSegment);
820       }
821       return this;
822     }
823 
removePathSegment(int index)824     public Builder removePathSegment(int index) {
825       encodedPathSegments.remove(index);
826       // ANDROID-BEGIN: http://b/29983827. Note this method only used from tests.
827       // Only changed for consistency.
828       //      if (encodedPathSegments.isEmpty()) {
829       //        encodedPathSegments.add(""); // Always leave at least one '/'.
830       //      }
831       // ANDROID-END: http://b/29983827 - only used from tests
832       return this;
833     }
834 
encodedPath(String encodedPath)835     public Builder encodedPath(String encodedPath) {
836       if (encodedPath == null) throw new IllegalArgumentException("encodedPath == null");
837       if (!encodedPath.startsWith("/")) {
838         throw new IllegalArgumentException("unexpected encodedPath: " + encodedPath);
839       }
840       resolvePath(encodedPath, 0, encodedPath.length());
841       return this;
842     }
843 
query(String query)844     public Builder query(String query) {
845       this.encodedQueryNamesAndValues = query != null
846           ? queryStringToNamesAndValues(canonicalize(
847               query, QUERY_ENCODE_SET, false, false, true, true))
848           : null;
849       return this;
850     }
851 
encodedQuery(String encodedQuery)852     public Builder encodedQuery(String encodedQuery) {
853       this.encodedQueryNamesAndValues = encodedQuery != null
854           ? queryStringToNamesAndValues(
855               canonicalize(encodedQuery, QUERY_ENCODE_SET, true, false, true, true))
856           : null;
857       return this;
858     }
859 
860     /** Encodes the query parameter using UTF-8 and adds it to this URL's query string. */
addQueryParameter(String name, String value)861     public Builder addQueryParameter(String name, String value) {
862       if (name == null) throw new IllegalArgumentException("name == null");
863       if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>();
864       encodedQueryNamesAndValues.add(
865           canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true));
866       encodedQueryNamesAndValues.add(value != null
867           ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, false, true, true)
868           : null);
869       return this;
870     }
871 
872     /** Adds the pre-encoded query parameter to this URL's query string. */
addEncodedQueryParameter(String encodedName, String encodedValue)873     public Builder addEncodedQueryParameter(String encodedName, String encodedValue) {
874       if (encodedName == null) throw new IllegalArgumentException("encodedName == null");
875       if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>();
876       encodedQueryNamesAndValues.add(
877           canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, false, true, true));
878       encodedQueryNamesAndValues.add(encodedValue != null
879           ? canonicalize(encodedValue, QUERY_COMPONENT_ENCODE_SET, true, false, true, true)
880           : null);
881       return this;
882     }
883 
setQueryParameter(String name, String value)884     public Builder setQueryParameter(String name, String value) {
885       removeAllQueryParameters(name);
886       addQueryParameter(name, value);
887       return this;
888     }
889 
setEncodedQueryParameter(String encodedName, String encodedValue)890     public Builder setEncodedQueryParameter(String encodedName, String encodedValue) {
891       removeAllEncodedQueryParameters(encodedName);
892       addEncodedQueryParameter(encodedName, encodedValue);
893       return this;
894     }
895 
removeAllQueryParameters(String name)896     public Builder removeAllQueryParameters(String name) {
897       if (name == null) throw new IllegalArgumentException("name == null");
898       if (encodedQueryNamesAndValues == null) return this;
899       String nameToRemove = canonicalize(
900           name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true);
901       removeAllCanonicalQueryParameters(nameToRemove);
902       return this;
903     }
904 
removeAllEncodedQueryParameters(String encodedName)905     public Builder removeAllEncodedQueryParameters(String encodedName) {
906       if (encodedName == null) throw new IllegalArgumentException("encodedName == null");
907       if (encodedQueryNamesAndValues == null) return this;
908       removeAllCanonicalQueryParameters(
909           canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, false, true, true));
910       return this;
911     }
912 
removeAllCanonicalQueryParameters(String canonicalName)913     private void removeAllCanonicalQueryParameters(String canonicalName) {
914       for (int i = encodedQueryNamesAndValues.size() - 2; i >= 0; i -= 2) {
915         if (canonicalName.equals(encodedQueryNamesAndValues.get(i))) {
916           encodedQueryNamesAndValues.remove(i + 1);
917           encodedQueryNamesAndValues.remove(i);
918           if (encodedQueryNamesAndValues.isEmpty()) {
919             encodedQueryNamesAndValues = null;
920             return;
921           }
922         }
923       }
924     }
925 
fragment(String fragment)926     public Builder fragment(String fragment) {
927       this.encodedFragment = fragment != null
928           ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false, false, false)
929           : null;
930       return this;
931     }
932 
encodedFragment(String encodedFragment)933     public Builder encodedFragment(String encodedFragment) {
934       this.encodedFragment = encodedFragment != null
935           ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false, false, false)
936           : null;
937       return this;
938     }
939 
940     /**
941      * Re-encodes the components of this URL so that it satisfies (obsolete) RFC 2396, which is
942      * particularly strict for certain components.
943      */
reencodeForUri()944     Builder reencodeForUri() {
945       for (int i = 0, size = encodedPathSegments.size(); i < size; i++) {
946         String pathSegment = encodedPathSegments.get(i);
947         encodedPathSegments.set(i,
948             canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, true, false, true));
949       }
950       if (encodedQueryNamesAndValues != null) {
951         for (int i = 0, size = encodedQueryNamesAndValues.size(); i < size; i++) {
952           String component = encodedQueryNamesAndValues.get(i);
953           if (component != null) {
954             encodedQueryNamesAndValues.set(i,
955                 canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true, true, true));
956           }
957         }
958       }
959       if (encodedFragment != null) {
960         encodedFragment = canonicalize(
961             encodedFragment, FRAGMENT_ENCODE_SET_URI, true, true, false, false);
962       }
963       return this;
964     }
965 
build()966     public HttpUrl build() {
967       if (scheme == null) throw new IllegalStateException("scheme == null");
968       if (host == null) throw new IllegalStateException("host == null");
969       return new HttpUrl(this);
970     }
971 
toString()972     @Override public String toString() {
973       StringBuilder result = new StringBuilder();
974       result.append(scheme);
975       result.append("://");
976 
977       if (!encodedUsername.isEmpty() || !encodedPassword.isEmpty()) {
978         result.append(encodedUsername);
979         if (!encodedPassword.isEmpty()) {
980           result.append(':');
981           result.append(encodedPassword);
982         }
983         result.append('@');
984       }
985 
986       if (host.indexOf(':') != -1) {
987         // Host is an IPv6 address.
988         result.append('[');
989         result.append(host);
990         result.append(']');
991       } else {
992         result.append(host);
993       }
994 
995       int effectivePort = effectivePort();
996       if (effectivePort != defaultPort(scheme)) {
997         result.append(':');
998         result.append(effectivePort);
999       }
1000 
1001       pathSegmentsToString(result, encodedPathSegments);
1002 
1003       if (encodedQueryNamesAndValues != null) {
1004         result.append('?');
1005         namesAndValuesToQueryString(result, encodedQueryNamesAndValues);
1006       }
1007 
1008       if (encodedFragment != null) {
1009         result.append('#');
1010         result.append(encodedFragment);
1011       }
1012 
1013       return result.toString();
1014     }
1015 
1016     enum ParseResult {
1017       SUCCESS,
1018       MISSING_SCHEME,
1019       UNSUPPORTED_SCHEME,
1020       INVALID_PORT,
1021       INVALID_HOST,
1022     }
1023 
parse(HttpUrl base, String input)1024     ParseResult parse(HttpUrl base, String input) {
1025       int pos = skipLeadingAsciiWhitespace(input, 0, input.length());
1026       int limit = skipTrailingAsciiWhitespace(input, pos, input.length());
1027 
1028       // Scheme.
1029       int schemeDelimiterOffset = schemeDelimiterOffset(input, pos, limit);
1030       if (schemeDelimiterOffset != -1) {
1031         if (input.regionMatches(true, pos, "https:", 0, 6)) {
1032           this.scheme = "https";
1033           pos += "https:".length();
1034         } else if (input.regionMatches(true, pos, "http:", 0, 5)) {
1035           this.scheme = "http";
1036           pos += "http:".length();
1037         } else {
1038           return ParseResult.UNSUPPORTED_SCHEME; // Not an HTTP scheme.
1039         }
1040       } else if (base != null) {
1041         this.scheme = base.scheme;
1042       } else {
1043         return ParseResult.MISSING_SCHEME; // No scheme.
1044       }
1045 
1046       // Authority.
1047       boolean hasUsername = false;
1048       boolean hasPassword = false;
1049       int slashCount = slashCount(input, pos, limit);
1050       if (slashCount >= 2 || base == null || !base.scheme.equals(this.scheme)) {
1051         // Read an authority if either:
1052         //  * The input starts with 2 or more slashes. These follow the scheme if it exists.
1053         //  * The input scheme exists and is different from the base URL's scheme.
1054         //
1055         // The structure of an authority is:
1056         //   username:password@host:port
1057         //
1058         // Username, password and port are optional.
1059         //   [username[:password]@]host[:port]
1060         pos += slashCount;
1061         authority:
1062         while (true) {
1063           int componentDelimiterOffset = delimiterOffset(input, pos, limit, "@/\\?#");
1064           int c = componentDelimiterOffset != limit
1065               ? input.charAt(componentDelimiterOffset)
1066               : -1;
1067           switch (c) {
1068             case '@':
1069               // User info precedes.
1070               if (!hasPassword) {
1071                 int passwordColonOffset = delimiterOffset(
1072                     input, pos, componentDelimiterOffset, ":");
1073                 String canonicalUsername = canonicalize(
1074                     input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false, false, true);
1075                 this.encodedUsername = hasUsername
1076                     ? this.encodedUsername + "%40" + canonicalUsername
1077                     : canonicalUsername;
1078                 if (passwordColonOffset != componentDelimiterOffset) {
1079                   hasPassword = true;
1080                   this.encodedPassword = canonicalize(input, passwordColonOffset + 1,
1081                       componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true);
1082                 }
1083                 hasUsername = true;
1084               } else {
1085                 this.encodedPassword = this.encodedPassword + "%40" + canonicalize(input, pos,
1086                     componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true);
1087               }
1088               pos = componentDelimiterOffset + 1;
1089               break;
1090 
1091             case -1:
1092             case '/':
1093             case '\\':
1094             case '?':
1095             case '#':
1096               // Host info precedes.
1097               int portColonOffset = portColonOffset(input, pos, componentDelimiterOffset);
1098               if (portColonOffset + 1 < componentDelimiterOffset) {
1099                 this.host = canonicalizeHost(input, pos, portColonOffset);
1100                 this.port = parsePort(input, portColonOffset + 1, componentDelimiterOffset);
1101                 if (this.port == -1) return ParseResult.INVALID_PORT; // Invalid port.
1102               } else {
1103                 this.host = canonicalizeHost(input, pos, portColonOffset);
1104                 this.port = defaultPort(this.scheme);
1105               }
1106               if (this.host == null) return ParseResult.INVALID_HOST; // Invalid host.
1107               pos = componentDelimiterOffset;
1108               break authority;
1109           }
1110         }
1111       } else {
1112         // This is a relative link. Copy over all authority components. Also maybe the path & query.
1113         this.encodedUsername = base.encodedUsername();
1114         this.encodedPassword = base.encodedPassword();
1115         this.host = base.host;
1116         this.port = base.port;
1117         this.encodedPathSegments.clear();
1118         this.encodedPathSegments.addAll(base.encodedPathSegments());
1119         if (pos == limit || input.charAt(pos) == '#') {
1120           encodedQuery(base.encodedQuery());
1121         }
1122       }
1123 
1124       // Resolve the relative path.
1125       int pathDelimiterOffset = delimiterOffset(input, pos, limit, "?#");
1126       resolvePath(input, pos, pathDelimiterOffset);
1127       pos = pathDelimiterOffset;
1128 
1129       // Query.
1130       if (pos < limit && input.charAt(pos) == '?') {
1131         int queryDelimiterOffset = delimiterOffset(input, pos, limit, "#");
1132         this.encodedQueryNamesAndValues = queryStringToNamesAndValues(canonicalize(
1133             input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, false, true, true));
1134         pos = queryDelimiterOffset;
1135       }
1136 
1137       // Fragment.
1138       if (pos < limit && input.charAt(pos) == '#') {
1139         this.encodedFragment = canonicalize(
1140             input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false, false);
1141       }
1142 
1143       return ParseResult.SUCCESS;
1144     }
1145 
resolvePath(String input, int pos, int limit)1146     private void resolvePath(String input, int pos, int limit) {
1147       // Read a delimiter.
1148       if (pos == limit) {
1149         // Empty path: keep the base path as-is.
1150         return;
1151       }
1152       char c = input.charAt(pos);
1153       if (c == '/' || c == '\\') {
1154         // Absolute path: reset to the default "/".
1155         encodedPathSegments.clear();
1156         encodedPathSegments.add("");
1157         pos++;
1158       } else {
1159         // ANDROID-BEGIN: http://b/29983827
1160         // // Relative path: clear everything after the last '/'.
1161         // encodedPathSegments.set(encodedPathSegments.size() - 1, "");
1162         // Relative path: clear everything after the last '/' (if there is one).
1163         if (!encodedPathSegments.isEmpty()) {
1164           encodedPathSegments.set(encodedPathSegments.size() - 1, "");
1165         }
1166         // ANDROID-END: http://b/29983827
1167       }
1168 
1169       // Read path segments.
1170       for (int i = pos; i < limit; ) {
1171         int pathSegmentDelimiterOffset = delimiterOffset(input, i, limit, "/\\");
1172         boolean segmentHasTrailingSlash = pathSegmentDelimiterOffset < limit;
1173         push(input, i, pathSegmentDelimiterOffset, segmentHasTrailingSlash, true);
1174         i = pathSegmentDelimiterOffset;
1175         if (segmentHasTrailingSlash) i++;
1176       }
1177     }
1178 
1179     /** Adds a path segment. If the input is ".." or equivalent, this pops a path segment. */
1180     private void push(String input, int pos, int limit, boolean addTrailingSlash,
1181         boolean alreadyEncoded) {
1182       String segment = canonicalize(
1183           input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false, false, true);
1184       if (isDot(segment)) {
1185         return; // Skip '.' path segments.
1186       }
1187       if (isDotDot(segment)) {
1188         pop();
1189         return;
1190       }
1191 
1192       // ANDROID-BEGIN: http://b/29983827
1193       // If the encodedPathSegments doesn't even include "/" then add the leading "/" before
1194       // pushing more segments or modifying existing segments.
1195       if (encodedPathSegments.isEmpty()) {
1196         encodedPathSegments.add("");
1197       }
1198       // ANDROID-END: http://b/29983827
1199 
1200       if (encodedPathSegments.get(encodedPathSegments.size() - 1).isEmpty()) {
1201         encodedPathSegments.set(encodedPathSegments.size() - 1, segment);
1202       } else {
1203         encodedPathSegments.add(segment);
1204       }
1205       if (addTrailingSlash) {
1206         encodedPathSegments.add("");
1207       }
1208     }
1209 
1210     private boolean isDot(String input) {
1211       return input.equals(".") || input.equalsIgnoreCase("%2e");
1212     }
1213 
1214     private boolean isDotDot(String input) {
1215       return input.equals("..")
1216           || input.equalsIgnoreCase("%2e.")
1217           || input.equalsIgnoreCase(".%2e")
1218           || input.equalsIgnoreCase("%2e%2e");
1219     }
1220 
1221     /**
1222      * Removes a path segment. When this method returns the last segment is always "", which means
1223      * the encoded path will have a trailing '/'.
1224      *
1225      * <p>Popping "/a/b/c/" yields "/a/b/". In this case the list of path segments goes from
1226      * ["a", "b", "c", ""] to ["a", "b", ""].
1227      *
1228      * <p>Popping "/a/b/c" also yields "/a/b/". The list of path segments goes from ["a", "b", "c"]
1229      * to ["a", "b", ""].
1230      */
1231     private void pop() {
1232       // ANDROID-BEGIN: http://b/29983827
1233       // Cannot pop() if there isn't even a "/". Leave the path as is. This method is only used
1234       // from push(). push() handles the empty case explicitly.
1235       if (encodedPathSegments.isEmpty()) {
1236         return;
1237       }
1238       // ANDROID-END: http://b/29983827
1239 
1240       String removed = encodedPathSegments.remove(encodedPathSegments.size() - 1);
1241 
1242       // Make sure the path ends with a '/' by either adding an empty string or clearing a segment.
1243       if (removed.isEmpty() && !encodedPathSegments.isEmpty()) {
1244         encodedPathSegments.set(encodedPathSegments.size() - 1, "");
1245       } else {
1246         encodedPathSegments.add("");
1247       }
1248     }
1249 
1250     /**
1251      * Increments {@code pos} until {@code input[pos]} is not ASCII whitespace. Stops at {@code
1252      * limit}.
1253      */
1254     private int skipLeadingAsciiWhitespace(String input, int pos, int limit) {
1255       for (int i = pos; i < limit; i++) {
1256         switch (input.charAt(i)) {
1257           case '\t':
1258           case '\n':
1259           case '\f':
1260           case '\r':
1261           case ' ':
1262             continue;
1263           default:
1264             return i;
1265         }
1266       }
1267       return limit;
1268     }
1269 
1270     /**
1271      * Decrements {@code limit} until {@code input[limit - 1]} is not ASCII whitespace. Stops at
1272      * {@code pos}.
1273      */
1274     private int skipTrailingAsciiWhitespace(String input, int pos, int limit) {
1275       for (int i = limit - 1; i >= pos; i--) {
1276         switch (input.charAt(i)) {
1277           case '\t':
1278           case '\n':
1279           case '\f':
1280           case '\r':
1281           case ' ':
1282             continue;
1283           default:
1284             return i + 1;
1285         }
1286       }
1287       return pos;
1288     }
1289 
1290     /**
1291      * Returns the index of the ':' in {@code input} that is after scheme characters. Returns -1 if
1292      * {@code input} does not have a scheme that starts at {@code pos}.
1293      */
1294     private static int schemeDelimiterOffset(String input, int pos, int limit) {
1295       if (limit - pos < 2) return -1;
1296 
1297       char c0 = input.charAt(pos);
1298       if ((c0 < 'a' || c0 > 'z') && (c0 < 'A' || c0 > 'Z')) return -1; // Not a scheme start char.
1299 
1300       for (int i = pos + 1; i < limit; i++) {
1301         char c = input.charAt(i);
1302 
1303         if ((c >= 'a' && c <= 'z')
1304             || (c >= 'A' && c <= 'Z')
1305             || (c >= '0' && c <= '9')
1306             || c == '+'
1307             || c == '-'
1308             || c == '.') {
1309           continue; // Scheme character. Keep going.
1310         } else if (c == ':') {
1311           return i; // Scheme prefix!
1312         } else {
1313           return -1; // Non-scheme character before the first ':'.
1314         }
1315       }
1316 
1317       return -1; // No ':'; doesn't start with a scheme.
1318     }
1319 
1320     /** Returns the number of '/' and '\' slashes in {@code input}, starting at {@code pos}. */
1321     private static int slashCount(String input, int pos, int limit) {
1322       int slashCount = 0;
1323       while (pos < limit) {
1324         char c = input.charAt(pos);
1325         if (c == '\\' || c == '/') {
1326           slashCount++;
1327           pos++;
1328         } else {
1329           break;
1330         }
1331       }
1332       return slashCount;
1333     }
1334 
1335     /** Finds the first ':' in {@code input}, skipping characters between square braces "[...]". */
1336     private static int portColonOffset(String input, int pos, int limit) {
1337       for (int i = pos; i < limit; i++) {
1338         switch (input.charAt(i)) {
1339           case '[':
1340             while (++i < limit) {
1341               if (input.charAt(i) == ']') break;
1342             }
1343             break;
1344           case ':':
1345             return i;
1346         }
1347       }
1348       return limit; // No colon.
1349     }
1350 
1351     private static String canonicalizeHost(String input, int pos, int limit) {
1352       // Start by percent decoding the host. The WHATWG spec suggests doing this only after we've
1353       // checked for IPv6 square braces. But Chrome does it first, and that's more lenient.
1354       String percentDecoded = percentDecode(input, pos, limit, false);
1355 
1356       // If the input is encased in square braces "[...]", drop 'em. We have an IPv6 address.
1357       if (percentDecoded.startsWith("[") && percentDecoded.endsWith("]")) {
1358         InetAddress inetAddress = decodeIpv6(percentDecoded, 1, percentDecoded.length() - 1);
1359         if (inetAddress == null) return null;
1360         byte[] address = inetAddress.getAddress();
1361         if (address.length == 16) return inet6AddressToAscii(address);
1362         throw new AssertionError();
1363       }
1364 
1365       return domainToAscii(percentDecoded);
1366     }
1367 
1368     /** Decodes an IPv6 address like 1111:2222:3333:4444:5555:6666:7777:8888 or ::1. */
1369     private static InetAddress decodeIpv6(String input, int pos, int limit) {
1370       byte[] address = new byte[16];
1371       int b = 0;
1372       int compress = -1;
1373       int groupOffset = -1;
1374 
1375       for (int i = pos; i < limit; ) {
1376         if (b == address.length) return null; // Too many groups.
1377 
1378         // Read a delimiter.
1379         if (i + 2 <= limit && input.regionMatches(i, "::", 0, 2)) {
1380           // Compression "::" delimiter, which is anywhere in the input, including its prefix.
1381           if (compress != -1) return null; // Multiple "::" delimiters.
1382           i += 2;
1383           b += 2;
1384           compress = b;
1385           if (i == limit) break;
1386         } else if (b != 0) {
1387           // Group separator ":" delimiter.
1388           if (input.regionMatches(i, ":", 0, 1)) {
1389             i++;
1390           } else if (input.regionMatches(i, ".", 0, 1)) {
1391             // If we see a '.', rewind to the beginning of the previous group and parse as IPv4.
1392             if (!decodeIpv4Suffix(input, groupOffset, limit, address, b - 2)) return null;
1393             b += 2; // We rewound two bytes and then added four.
1394             break;
1395           } else {
1396             return null; // Wrong delimiter.
1397           }
1398         }
1399 
1400         // Read a group, one to four hex digits.
1401         int value = 0;
1402         groupOffset = i;
1403         for (; i < limit; i++) {
1404           char c = input.charAt(i);
1405           int hexDigit = decodeHexDigit(c);
1406           if (hexDigit == -1) break;
1407           value = (value << 4) + hexDigit;
1408         }
1409         int groupLength = i - groupOffset;
1410         if (groupLength == 0 || groupLength > 4) return null; // Group is the wrong size.
1411 
1412         // We've successfully read a group. Assign its value to our byte array.
1413         address[b++] = (byte) ((value >>> 8) & 0xff);
1414         address[b++] = (byte) (value & 0xff);
1415       }
1416 
1417       // All done. If compression happened, we need to move bytes to the right place in the
1418       // address. Here's a sample:
1419       //
1420       //      input: "1111:2222:3333::7777:8888"
1421       //     before: { 11, 11, 22, 22, 33, 33, 00, 00, 77, 77, 88, 88, 00, 00, 00, 00  }
1422       //   compress: 6
1423       //          b: 10
1424       //      after: { 11, 11, 22, 22, 33, 33, 00, 00, 00, 00, 00, 00, 77, 77, 88, 88 }
1425       //
1426       if (b != address.length) {
1427         if (compress == -1) return null; // Address didn't have compression or enough groups.
1428         System.arraycopy(address, compress, address, address.length - (b - compress), b - compress);
1429         Arrays.fill(address, compress, compress + (address.length - b), (byte) 0);
1430       }
1431 
1432       try {
1433         return InetAddress.getByAddress(address);
1434       } catch (UnknownHostException e) {
1435         throw new AssertionError();
1436       }
1437     }
1438 
1439     /** Decodes an IPv4 address suffix of an IPv6 address, like 1111::5555:6666:192.168.0.1. */
1440     private static boolean decodeIpv4Suffix(
1441         String input, int pos, int limit, byte[] address, int addressOffset) {
1442       int b = addressOffset;
1443 
1444       for (int i = pos; i < limit; ) {
1445         if (b == address.length) return false; // Too many groups.
1446 
1447         // Read a delimiter.
1448         if (b != addressOffset) {
1449           if (input.charAt(i) != '.') return false; // Wrong delimiter.
1450           i++;
1451         }
1452 
1453         // Read 1 or more decimal digits for a value in 0..255.
1454         int value = 0;
1455         int groupOffset = i;
1456         for (; i < limit; i++) {
1457           char c = input.charAt(i);
1458           if (c < '0' || c > '9') break;
1459           if (value == 0 && groupOffset != i) return false; // Reject unnecessary leading '0's.
1460           value = (value * 10) + c - '0';
1461           if (value > 255) return false; // Value out of range.
1462         }
1463         int groupLength = i - groupOffset;
1464         if (groupLength == 0) return false; // No digits.
1465 
1466         // We've successfully read a byte.
1467         address[b++] = (byte) value;
1468       }
1469 
1470       if (b != addressOffset + 4) return false; // Too few groups. We wanted exactly four.
1471       return true; // Success.
1472     }
1473 
1474     /**
1475      * Performs IDN ToASCII encoding and canonicalize the result to lowercase. e.g. This converts
1476      * {@code ☃.net} to {@code xn--n3h.net}, and {@code WwW.GoOgLe.cOm} to {@code www.google.com}.
1477      * {@code null} will be returned if the input cannot be ToASCII encoded or if the result
1478      * contains unsupported ASCII characters.
1479      */
1480     private static String domainToAscii(String input) {
1481       try {
1482         String result = IDN.toASCII(input).toLowerCase(Locale.US);
1483         if (result.isEmpty()) return null;
1484 
1485         // Confirm that the IDN ToASCII result doesn't contain any illegal characters.
1486         if (containsInvalidHostnameAsciiCodes(result)) {
1487           return null;
1488         }
1489         // TODO: implement all label limits.
1490         return result;
1491       } catch (IllegalArgumentException e) {
1492         return null;
1493       }
1494     }
1495 
1496     private static boolean containsInvalidHostnameAsciiCodes(String hostnameAscii) {
1497       for (int i = 0; i < hostnameAscii.length(); i++) {
1498         char c = hostnameAscii.charAt(i);
1499         // The WHATWG Host parsing rules accepts some character codes which are invalid by
1500         // definition for OkHttp's host header checks (and the WHATWG Host syntax definition). Here
1501         // we rule out characters that would cause problems in host headers.
1502         if (c <= '\u001f' || c >= '\u007f') {
1503           return true;
1504         }
1505         // Check for the characters mentioned in the WHATWG Host parsing spec:
1506         // U+0000, U+0009, U+000A, U+000D, U+0020, "#", "%", "/", ":", "?", "@", "[", "\", and "]"
1507         // (excluding the characters covered above).
1508         if (" #%/:?@[\\]".indexOf(c) != -1) {
1509           return true;
1510         }
1511       }
1512       return false;
1513     }
1514 
1515     private static String inet6AddressToAscii(byte[] address) {
1516       // Go through the address looking for the longest run of 0s. Each group is 2-bytes.
1517       int longestRunOffset = -1;
1518       int longestRunLength = 0;
1519       for (int i = 0; i < address.length; i += 2) {
1520         int currentRunOffset = i;
1521         while (i < 16 && address[i] == 0 && address[i + 1] == 0) {
1522           i += 2;
1523         }
1524         int currentRunLength = i - currentRunOffset;
1525         if (currentRunLength > longestRunLength) {
1526           longestRunOffset = currentRunOffset;
1527           longestRunLength = currentRunLength;
1528         }
1529       }
1530 
1531       // Emit each 2-byte group in hex, separated by ':'. The longest run of zeroes is "::".
1532       Buffer result = new Buffer();
1533       for (int i = 0; i < address.length; ) {
1534         if (i == longestRunOffset) {
1535           result.writeByte(':');
1536           i += longestRunLength;
1537           if (i == 16) result.writeByte(':');
1538         } else {
1539           if (i > 0) result.writeByte(':');
1540           int group = (address[i] & 0xff) << 8 | address[i + 1] & 0xff;
1541           result.writeHexadecimalUnsignedLong(group);
1542           i += 2;
1543         }
1544       }
1545       return result.readUtf8();
1546     }
1547 
1548     private static int parsePort(String input, int pos, int limit) {
1549       try {
1550         // Canonicalize the port string to skip '\n' etc.
1551         String portString = canonicalize(input, pos, limit, "", false, false, false, true);
1552         int i = Integer.parseInt(portString);
1553         if (i > 0 && i <= 65535) return i;
1554         return -1;
1555       } catch (NumberFormatException e) {
1556         return -1; // Invalid port.
1557       }
1558     }
1559   }
1560 
1561   /**
1562    * Returns the index of the first character in {@code input} that contains a character in {@code
1563    * delimiters}. Returns limit if there is no such character.
1564    */
1565   private static int delimiterOffset(String input, int pos, int limit, String delimiters) {
1566     for (int i = pos; i < limit; i++) {
1567       if (delimiters.indexOf(input.charAt(i)) != -1) return i;
1568     }
1569     return limit;
1570   }
1571 
1572   static String percentDecode(String encoded, boolean plusIsSpace) {
1573     return percentDecode(encoded, 0, encoded.length(), plusIsSpace);
1574   }
1575 
1576   private List<String> percentDecode(List<String> list, boolean plusIsSpace) {
1577     List<String> result = new ArrayList<>(list.size());
1578     for (String s : list) {
1579       result.add(s != null ? percentDecode(s, plusIsSpace) : null);
1580     }
1581     return Collections.unmodifiableList(result);
1582   }
1583 
1584   static String percentDecode(String encoded, int pos, int limit, boolean plusIsSpace) {
1585     for (int i = pos; i < limit; i++) {
1586       char c = encoded.charAt(i);
1587       if (c == '%' || (c == '+' && plusIsSpace)) {
1588         // Slow path: the character at i requires decoding!
1589         Buffer out = new Buffer();
1590         out.writeUtf8(encoded, pos, i);
1591         percentDecode(out, encoded, i, limit, plusIsSpace);
1592         return out.readUtf8();
1593       }
1594     }
1595 
1596     // Fast path: no characters in [pos..limit) required decoding.
1597     return encoded.substring(pos, limit);
1598   }
1599 
1600   static void percentDecode(Buffer out, String encoded, int pos, int limit, boolean plusIsSpace) {
1601     int codePoint;
1602     for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
1603       codePoint = encoded.codePointAt(i);
1604       if (codePoint == '%' && i + 2 < limit) {
1605         int d1 = decodeHexDigit(encoded.charAt(i + 1));
1606         int d2 = decodeHexDigit(encoded.charAt(i + 2));
1607         if (d1 != -1 && d2 != -1) {
1608           out.writeByte((d1 << 4) + d2);
1609           i += 2;
1610           continue;
1611         }
1612       } else if (codePoint == '+' && plusIsSpace) {
1613         out.writeByte(' ');
1614         continue;
1615       }
1616       out.writeUtf8CodePoint(codePoint);
1617     }
1618   }
1619 
1620   static boolean percentEncoded(String encoded, int pos, int limit) {
1621     return pos + 2 < limit
1622         && encoded.charAt(pos) == '%'
1623         && decodeHexDigit(encoded.charAt(pos + 1)) != -1
1624         && decodeHexDigit(encoded.charAt(pos + 2)) != -1;
1625   }
1626 
1627   static int decodeHexDigit(char c) {
1628     if (c >= '0' && c <= '9') return c - '0';
1629     if (c >= 'a' && c <= 'f') return c - 'a' + 10;
1630     if (c >= 'A' && c <= 'F') return c - 'A' + 10;
1631     return -1;
1632   }
1633 
1634   /**
1635    * Returns a substring of {@code input} on the range {@code [pos..limit)} with the following
1636    * transformations:
1637    * <ul>
1638    *   <li>Tabs, newlines, form feeds and carriage returns are skipped.
1639    *   <li>In queries, ' ' is encoded to '+' and '+' is encoded to "%2B".
1640    *   <li>Characters in {@code encodeSet} are percent-encoded.
1641    *   <li>Control characters and non-ASCII characters are percent-encoded.
1642    *   <li>All other characters are copied without transformation.
1643    * </ul>
1644    *
1645    * @param alreadyEncoded true to leave '%' as-is; false to convert it to '%25'.
1646    * @param strict true to encode '%' if it is not the prefix of a valid percent encoding.
1647    * @param plusIsSpace true to encode '+' as "%2B" if it is not already encoded
1648    * @param asciiOnly true to encode all non-ASCII codepoints.
1649    */
1650   static String canonicalize(String input, int pos, int limit, String encodeSet,
1651       boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) {
1652     int codePoint;
1653     for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
1654       codePoint = input.codePointAt(i);
1655       if (codePoint < 0x20
1656           || codePoint == 0x7f
1657           || codePoint >= 0x80 && asciiOnly
1658           || encodeSet.indexOf(codePoint) != -1
1659           || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))
1660           || codePoint == '+' && plusIsSpace) {
1661         // Slow path: the character at i requires encoding!
1662         Buffer out = new Buffer();
1663         out.writeUtf8(input, pos, i);
1664         canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, strict, plusIsSpace,
1665             asciiOnly);
1666         return out.readUtf8();
1667       }
1668     }
1669 
1670     // Fast path: no characters in [pos..limit) required encoding.
1671     return input.substring(pos, limit);
1672   }
1673 
1674   static void canonicalize(Buffer out, String input, int pos, int limit, String encodeSet,
1675       boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) {
1676     Buffer utf8Buffer = null; // Lazily allocated.
1677     int codePoint;
1678     for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
1679       codePoint = input.codePointAt(i);
1680       if (alreadyEncoded
1681           && (codePoint == '\t' || codePoint == '\n' || codePoint == '\f' || codePoint == '\r')) {
1682         // Skip this character.
1683       } else if (codePoint == '+' && plusIsSpace) {
1684         // Encode '+' as '%2B' since we permit ' ' to be encoded as either '+' or '%20'.
1685         out.writeUtf8(alreadyEncoded ? "+" : "%2B");
1686       } else if (codePoint < 0x20
1687           || codePoint == 0x7f
1688           || codePoint >= 0x80 && asciiOnly
1689           || encodeSet.indexOf(codePoint) != -1
1690           || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))) {
1691         // Percent encode this character.
1692         if (utf8Buffer == null) {
1693           utf8Buffer = new Buffer();
1694         }
1695         utf8Buffer.writeUtf8CodePoint(codePoint);
1696         while (!utf8Buffer.exhausted()) {
1697           int b = utf8Buffer.readByte() & 0xff;
1698           out.writeByte('%');
1699           out.writeByte(HEX_DIGITS[(b >> 4) & 0xf]);
1700           out.writeByte(HEX_DIGITS[b & 0xf]);
1701         }
1702       } else {
1703         // This character doesn't need encoding. Just copy it over.
1704         out.writeUtf8CodePoint(codePoint);
1705       }
1706     }
1707   }
1708 
1709   static String canonicalize(String input, String encodeSet, boolean alreadyEncoded, boolean strict,
1710       boolean plusIsSpace, boolean asciiOnly) {
1711     return canonicalize(
1712         input, 0, input.length(), encodeSet, alreadyEncoded, strict, plusIsSpace, asciiOnly);
1713   }
1714 }
1715