• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 Square, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.squareup.okhttp;
17 
18 import java.net.IDN;
19 import java.net.InetAddress;
20 import java.net.MalformedURLException;
21 import java.net.URI;
22 import java.net.URISyntaxException;
23 import java.net.URL;
24 import java.net.UnknownHostException;
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.Collections;
28 import java.util.LinkedHashSet;
29 import java.util.List;
30 import java.util.Locale;
31 import java.util.Set;
32 import okio.Buffer;
33 
34 /**
35  * A uniform resource locator (URL) with a scheme of either {@code http} or {@code https}. Use this
36  * class to compose and decompose Internet addresses. For example, this code will compose and print
37  * a URL for Google search: <pre>   {@code
38  *
39  *   HttpUrl url = new HttpUrl.Builder()
40  *       .scheme("https")
41  *       .host("www.google.com")
42  *       .addPathSegment("search")
43  *       .addQueryParameter("q", "polar bears")
44  *       .build();
45  *   System.out.println(url);
46  * }</pre>
47  *
48  * which prints: <pre>   {@code
49  *
50  *     https://www.google.com/search?q=polar%20bears
51  * }</pre>
52  *
53  * As another example, this code prints the human-readable query parameters of a Twitter search:
54  * <pre>   {@code
55  *
56  *   HttpUrl url = HttpUrl.parse("https://twitter.com/search?q=cute%20%23puppies&f=images");
57  *   for (int i = 0, size = url.querySize(); i < size; i++) {
58  *     System.out.println(url.queryParameterName(i) + ": " + url.queryParameterValue(i));
59  *   }
60  * }</pre>
61  *
62  * which prints: <pre>   {@code
63  *
64  *   q: cute #puppies
65  *   f: images
66  * }</pre>
67  *
68  * In addition to composing URLs from their component parts and decomposing URLs into their
69  * component parts, this class implements relative URL resolution: what address you'd reach by
70  * clicking a relative link on a specified page. For example: <pre>   {@code
71  *
72  *   HttpUrl base = HttpUrl.parse("https://www.youtube.com/user/WatchTheDaily/videos");
73  *   HttpUrl link = base.resolve("../../watch?v=cbP2N1BQdYc");
74  *   System.out.println(link);
75  * }</pre>
76  *
77  * which prints: <pre>   {@code
78  *
79  *   https://www.youtube.com/watch?v=cbP2N1BQdYc
80  * }</pre>
81  *
82  * <h3>What's in a URL?</h3>
83  *
84  * A URL has several components.
85  *
86  * <h4>Scheme</h4>
87  * Sometimes referred to as <i>protocol</i>, A URL's scheme describes what mechanism should be used
88  * to retrieve the resource. Although URLs have many schemes ({@code mailto}, {@code file}, {@code
89  * ftp}), this class only supports {@code http} and {@code https}. Use {@link URI java.net.URI} for
90  * URLs with arbitrary schemes.
91  *
92  * <h4>Username and Password</h4>
93  * Username and password are either present, or the empty string {@code ""} if absent. This class
94  * offers no mechanism to differentiate empty from absent. Neither of these components are popular
95  * in practice. Typically HTTP applications use other mechanisms for user identification and
96  * authentication.
97  *
98  * <h4>Host</h4>
99  * The host identifies the webserver that serves the URL's resource. It is either a hostname like
100  * {@code square.com} or {@code localhost}, an IPv4 address like {@code 192.168.0.1}, or an IPv6
101  * address like {@code ::1}.
102  *
103  * <p>Usually a webserver is reachable with multiple identifiers: its IP addresses, registered
104  * domain names, and even {@code localhost} when connecting from the server itself. Each of a
105  * webserver's names is a distinct URL and they are not interchangeable. For example, even if
106  * {@code http://square.github.io/dagger} and {@code http://google.github.io/dagger} are served by
107  * the same IP address, the two URLs identify different resources.
108  *
109  * <h4>Port</h4>
110  * The port used to connect to the webserver. By default this is 80 for HTTP and 443 for HTTPS. This
111  * class never returns -1 for the port: if no port is explicitly specified in the URL then the
112  * scheme's default is used.
113  *
114  * <h4>Path</h4>
115  * The path identifies a specific resource on the host. Paths have a hierarchical structure like
116  * "/square/okhttp/issues/1486". Each path segment is prefixed with "/". This class offers methods
117  * to compose and decompose paths by segment. If a path's last segment is the empty string, then the
118  * path ends with "/". This class always builds non-empty paths: if the path is omitted it defaults
119  * to "/", which is a path whose only segment is the empty string.
120  *
121  * <h4>Query</h4>
122  * The query is optional: it can be null, empty, or non-empty. For many HTTP URLs the query string
123  * is subdivided into a collection of name-value parameters. This class offers methods to set the
124  * query as the single string, or as individual name-value parameters. With name-value parameters
125  * the values are optional and names may be repeated.
126  *
127  * <h4>Fragment</h4>
128  * The fragment is optional: it can be null, empty, or non-empty. Unlike host, port, path, and query
129  * the fragment is not sent to the webserver: it's private to the client.
130  *
131  * <h3>Encoding</h3>
132  * Each component must be encoded before it is embedded in the complete URL. As we saw above, the
133  * string {@code cute #puppies} is encoded as {@code cute%20%23puppies} when used as a query
134  * parameter value.
135  *
136  * <h4>Percent encoding</h4>
137  * Percent encoding replaces a character (like {@code \ud83c\udf69}) with its UTF-8 hex bytes (like
138  * {@code %F0%9F%8D%A9}). This approach works for whitespace characters, control characters,
139  * non-ASCII characters, and characters that already have another meaning in a particular context.
140  *
141  * <p>Percent encoding is used in every URL component except for the hostname. But the set of
142  * characters that need to be encoded is different for each component. For example, the path
143  * component must escape all of its {@code ?} characters, otherwise it could be interpreted as the
144  * start of the URL's query. But within the query and fragment components, the {@code ?} character
145  * doesn't delimit anything and doesn't need to be escaped. <pre>   {@code
146  *
147  *   HttpUrl url = HttpUrl.parse("http://who-let-the-dogs.out").newBuilder()
148  *       .addPathSegment("_Who?_")
149  *       .query("_Who?_")
150  *       .fragment("_Who?_")
151  *       .build();
152  *   System.out.println(url);
153  * }</pre>
154  *
155  * This prints: <pre>   {@code
156  *
157  *   http://who-let-the-dogs.out/_Who%3F_?_Who?_#_Who?_
158  * }</pre>
159  *
160  * When parsing URLs that lack percent encoding where it is required, this class will percent encode
161  * the offending characters.
162  *
163  * <h4>IDNA Mapping and Punycode encoding</h4>
164  * Hostnames have different requirements and use a different encoding scheme. It consists of IDNA
165  * mapping and Punycode encoding.
166  *
167  * <p>In order to avoid confusion and discourage phishing attacks,
168  * <a href="http://www.unicode.org/reports/tr46/#ToASCII">IDNA Mapping</a> transforms names to avoid
169  * confusing characters. This includes basic case folding: transforming shouting {@code SQUARE.COM}
170  * into cool and casual {@code square.com}. It also handles more exotic characters. For example, the
171  * Unicode trademark sign (™) could be confused for the letters "TM" in {@code http://ho™mail.com}.
172  * To mitigate this, the single character (™) maps to the string (tm). There is similar policy for
173  * all of the 1.1 million Unicode code points. Note that some code points such as "\ud83c\udf69" are
174  * not mapped and cannot be used in a hostname.
175  *
176  * <p><a href="http://ietf.org/rfc/rfc3492.txt">Punycode</a> converts a Unicode string to an ASCII
177  * string to make international domain names work everywhere. For example, "σ" encodes as
178  * "xn--4xa". The encoded string is not human readable, but can be used with classes like {@link
179  * InetAddress} to establish connections.
180  *
181  * <h3>Why another URL model?</h3>
182  * Java includes both {@link URL java.net.URL} and {@link URI java.net.URI}. We offer a new URL
183  * model to address problems that the others don't.
184  *
185  * <h4>Different URLs should be different</h4>
186  * Although they have different content, {@code java.net.URL} considers the following two URLs
187  * equal, and the {@link Object#equals equals()} method between them returns true:
188  * <ul>
189  *   <li>http://square.github.io/
190  *   <li>http://google.github.io/
191  * </ul>
192  * This is because those two hosts share the same IP address. This is an old, bad design decision
193  * that makes {@code java.net.URL} unusable for many things. It shouldn't be used as a {@link
194  * java.util.Map Map} key or in a {@link Set}. Doing so is both inefficient because equality may
195  * require a DNS lookup, and incorrect because unequal URLs may be equal because of how they are
196  * hosted.
197  *
198  * <h4>Equal URLs should be equal</h4>
199  * These two URLs are semantically identical, but {@code java.net.URI} disagrees:
200  * <ul>
201  *   <li>http://host:80/
202  *   <li>http://host
203  * </ul>
204  * Both the unnecessary port specification ({@code :80}) and the absent trailing slash ({@code /})
205  * cause URI to bucket the two URLs separately. This harms URI's usefulness in collections. Any
206  * application that stores information-per-URL will need to either canonicalize manually, or suffer
207  * unnecessary redundancy for such URLs.
208  *
209  * <p>Because they don't attempt canonical form, these classes are surprisingly difficult to use
210  * securely. Suppose you're building a webservice that checks that incoming paths are prefixed
211  * "/static/images/" before serving the corresponding assets from the filesystem. <pre>   {@code
212  *
213  *   String attack = "http://example.com/static/images/../../../../../etc/passwd";
214  *   System.out.println(new URL(attack).getPath());
215  *   System.out.println(new URI(attack).getPath());
216  *   System.out.println(HttpUrl.parse(attack).path());
217  * }</pre>
218  *
219  * By canonicalizing the input paths, they are complicit in directory traversal attacks. Code that
220  * checks only the path prefix may suffer!
221  * <pre>   {@code
222  *
223  *    /static/images/../../../../../etc/passwd
224  *    /static/images/../../../../../etc/passwd
225  *    /etc/passwd
226  * }</pre>
227  *
228  * <h4>If it works on the web, it should work in your application</h4>
229  * The {@code java.net.URI} class is strict around what URLs it accepts. It rejects URLs like
230  * "http://example.com/abc|def" because the '|' character is unsupported. This class is more
231  * forgiving: it will automatically percent-encode the '|', yielding "http://example.com/abc%7Cdef".
232  * This kind behavior is consistent with web browsers. {@code HttpUrl} prefers consistency with
233  * major web browsers over consistency with obsolete specifications.
234  *
235  * <h4>Paths and Queries should decompose</h4>
236  * Neither of the built-in URL models offer direct access to path segments or query parameters.
237  * Manually using {@code StringBuilder} to assemble these components is cumbersome: do '+'
238  * characters get silently replaced with spaces? If a query parameter contains a '&amp;', does that
239  * get escaped? By offering methods to read and write individual query parameters directly,
240  * application developers are saved from the hassles of encoding and decoding.
241  *
242  * <h4>Plus a modern API</h4>
243  * The URL (JDK1.0) and URI (Java 1.4) classes predate builders and instead use telescoping
244  * constructors. For example, there's no API to compose a URI with a custom port without also
245  * providing a query and fragment.
246  *
247  * <p>Instances of {@link HttpUrl} are well-formed and always have a scheme, host, and path. With
248  * {@code java.net.URL} it's possible to create an awkward URL like {@code http:/} with scheme and
249  * path but no hostname. Building APIs that consume such malformed values is difficult!
250  *
251  * <p>This class has a modern API. It avoids punitive checked exceptions: {@link #parse parse()}
252  * returns null if the input is an invalid URL. You can even be explicit about whether each
253  * component has been encoded already.
254  */
255 public final class HttpUrl {
256   private static final char[] HEX_DIGITS =
257       { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
258   static final String USERNAME_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#";
259   static final String PASSWORD_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#";
260   static final String PATH_SEGMENT_ENCODE_SET = " \"<>^`{}|/\\?#";
261   static final String PATH_SEGMENT_ENCODE_SET_URI = "[]";
262   static final String QUERY_ENCODE_SET = " \"'<>#";
263   static final String QUERY_COMPONENT_ENCODE_SET = " \"'<>#&=";
264   static final String QUERY_COMPONENT_ENCODE_SET_URI = "\\^`{|}";
265   static final String FORM_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#&!$(),~";
266   static final String FRAGMENT_ENCODE_SET = "";
267   static final String FRAGMENT_ENCODE_SET_URI = " \"#<>\\^`{|}";
268 
269   /** Either "http" or "https". */
270   private final String scheme;
271 
272   /** Decoded username. */
273   private final String username;
274 
275   /** Decoded password. */
276   private final String password;
277 
278   /** Canonical hostname. */
279   private final String host;
280 
281   /** Either 80, 443 or a user-specified port. In range [1..65535]. */
282   private final int port;
283 
284   /**
285    * A list of canonical path segments. This list always contains at least one element, which may
286    * be the empty string. Each segment is formatted with a leading '/', so if path segments were
287    * ["a", "b", ""], then the encoded path would be "/a/b/".
288    */
289   private final List<String> pathSegments;
290 
291   /**
292    * Alternating, decoded query names and values, or null for no query. Names may be empty or
293    * non-empty, but never null. Values are null if the name has no corresponding '=' separator, or
294    * empty, or non-empty.
295    */
296   private final List<String> queryNamesAndValues;
297 
298   /** Decoded fragment. */
299   private final String fragment;
300 
301   /** Canonical URL. */
302   private final String url;
303 
HttpUrl(Builder builder)304   private HttpUrl(Builder builder) {
305     this.scheme = builder.scheme;
306     this.username = percentDecode(builder.encodedUsername, false);
307     this.password = percentDecode(builder.encodedPassword, false);
308     this.host = builder.host;
309     this.port = builder.effectivePort();
310     this.pathSegments = percentDecode(builder.encodedPathSegments, false);
311     this.queryNamesAndValues = builder.encodedQueryNamesAndValues != null
312         ? percentDecode(builder.encodedQueryNamesAndValues, true)
313         : null;
314     this.fragment = builder.encodedFragment != null
315         ? percentDecode(builder.encodedFragment, false)
316         : null;
317     this.url = builder.toString();
318   }
319 
320   /** Returns this URL as a {@link URL java.net.URL}. */
url()321   public URL url() {
322     try {
323       return new URL(url);
324     } catch (MalformedURLException e) {
325       throw new RuntimeException(e); // Unexpected!
326     }
327   }
328 
329   /**
330    * Returns this URL as a {@link URI java.net.URI}. Because {@code URI} is more strict than this
331    * class, the returned URI may be semantically different from this URL:
332    * <ul>
333    *   <li>Characters forbidden by URI like {@code [} and {@code |} will be escaped.
334    *   <li>Invalid percent-encoded sequences like {@code %xx} will be encoded like {@code %25xx}.
335    *   <li>Whitespace and control characters in the fragment will be stripped.
336    * </ul>
337    *
338    * <p>These differences may have a significant consequence when the URI is interpretted by a
339    * webserver. For this reason the {@linkplain URI URI class} and this method should be avoided.
340    */
uri()341   public URI uri() {
342     String uri = newBuilder().reencodeForUri().toString();
343     try {
344       return new URI(uri);
345     } catch (URISyntaxException e) {
346       // Unlikely edge case: the URI has a forbidden character in the fragment. Strip it & retry.
347       try {
348         String stripped = uri.replaceAll("[\\u0000-\\u001F\\u007F-\\u009F\\p{javaWhitespace}]", "");
349         return URI.create(stripped);
350       } catch (Exception e1) {
351         throw new RuntimeException(e); // Unexpected!
352       }
353     }
354   }
355 
356   /** Returns either "http" or "https". */
scheme()357   public String scheme() {
358     return scheme;
359   }
360 
isHttps()361   public boolean isHttps() {
362     return scheme.equals("https");
363   }
364 
365   /** Returns the username, or an empty string if none is set. */
encodedUsername()366   public String encodedUsername() {
367     if (username.isEmpty()) return "";
368     int usernameStart = scheme.length() + 3; // "://".length() == 3.
369     int usernameEnd = delimiterOffset(url, usernameStart, url.length(), ":@");
370     return url.substring(usernameStart, usernameEnd);
371   }
372 
username()373   public String username() {
374     return username;
375   }
376 
377   /** Returns the password, or an empty string if none is set. */
encodedPassword()378   public String encodedPassword() {
379     if (password.isEmpty()) return "";
380     int passwordStart = url.indexOf(':', scheme.length() + 3) + 1;
381     int passwordEnd = url.indexOf('@');
382     return url.substring(passwordStart, passwordEnd);
383   }
384 
385   /** Returns the decoded password, or an empty string if none is present. */
password()386   public String password() {
387     return password;
388   }
389 
390   /**
391    * Returns the host address suitable for use with {@link InetAddress#getAllByName(String)}. May
392    * be:
393    * <ul>
394    *   <li>A regular host name, like {@code android.com}.
395    *   <li>An IPv4 address, like {@code 127.0.0.1}.
396    *   <li>An IPv6 address, like {@code ::1}. Note that there are no square braces.
397    *   <li>An encoded IDN, like {@code xn--n3h.net}.
398    * </ul>
399    */
host()400   public String host() {
401     return host;
402   }
403 
404   /**
405    * Same as {@link #host} except that literal IPv6 addresses are surrounding by square
406    * braces. For example, this method will return {@code [::1]} where {@code host} returns
407    * {@code ::1}.
408    */
rfc2732host()409   public String rfc2732host() {
410     if (host.indexOf(':') == -1) {
411       return host;
412     }
413 
414     return "[" + host + "]";
415   }
416 
417   /**
418    * Returns the explicitly-specified port if one was provided, or the default port for this URL's
419    * scheme. For example, this returns 8443 for {@code https://square.com:8443/} and 443 for {@code
420    * https://square.com/}. The result is in {@code [1..65535]}.
421    */
port()422   public int port() {
423     return port;
424   }
425 
426   /**
427    * Returns 80 if {@code scheme.equals("http")}, 443 if {@code scheme.equals("https")} and -1
428    * otherwise.
429    */
defaultPort(String scheme)430   public static int defaultPort(String scheme) {
431     if (scheme.equals("http")) {
432       return 80;
433     } else if (scheme.equals("https")) {
434       return 443;
435     } else {
436       return -1;
437     }
438   }
439 
pathSize()440   public int pathSize() {
441     return pathSegments.size();
442   }
443 
444   /**
445    * Returns the entire path of this URL, encoded for use in HTTP resource resolution.
446    // ANDROID-BEGIN: http://b/29983827
447    //   * The returned path is always nonempty and is prefixed with {@code /}.
448    // ANDROID-END: http://b/29983827
449    */
encodedPath()450   public String encodedPath() {
451     int pathStart = url.indexOf('/', scheme.length() + 3); // "://".length() == 3.
452     // ANDROID-BEGIN: http://b/29983827
453     if (pathStart == -1) {
454       return "";
455     }
456     // ANDROID-END: http://b/29983827
457     int pathEnd = delimiterOffset(url, pathStart, url.length(), "?#");
458     return url.substring(pathStart, pathEnd);
459   }
460 
pathSegmentsToString(StringBuilder out, List<String> pathSegments)461   static void pathSegmentsToString(StringBuilder out, List<String> pathSegments) {
462     for (int i = 0, size = pathSegments.size(); i < size; i++) {
463       out.append('/');
464       out.append(pathSegments.get(i));
465     }
466   }
467 
encodedPathSegments()468   public List<String> encodedPathSegments() {
469     int pathStart = url.indexOf('/', scheme.length() + 3);
470     // ANDROID-BEGIN: http://b/29983827
471     if (pathStart == -1) {
472       return new ArrayList<>();
473     }
474     // ANDROID-END: http://b/29983827
475 
476     int pathEnd = delimiterOffset(url, pathStart, url.length(), "?#");
477     List<String> result = new ArrayList<>();
478     for (int i = pathStart; i < pathEnd; ) {
479       i++; // Skip the '/'.
480       int segmentEnd = delimiterOffset(url, i, pathEnd, "/");
481       result.add(url.substring(i, segmentEnd));
482       i = segmentEnd;
483     }
484     return result;
485   }
486 
pathSegments()487   public List<String> pathSegments() {
488     return pathSegments;
489   }
490 
491   /**
492    * Returns the query of this URL, encoded for use in HTTP resource resolution. The returned string
493    * may be null (for URLs with no query), empty (for URLs with an empty query) or non-empty (all
494    * other URLs).
495    */
encodedQuery()496   public String encodedQuery() {
497     if (queryNamesAndValues == null) return null; // No query.
498     int queryStart = url.indexOf('?') + 1;
499     int queryEnd = delimiterOffset(url, queryStart + 1, url.length(), "#");
500     return url.substring(queryStart, queryEnd);
501   }
502 
namesAndValuesToQueryString(StringBuilder out, List<String> namesAndValues)503   static void namesAndValuesToQueryString(StringBuilder out, List<String> namesAndValues) {
504     for (int i = 0, size = namesAndValues.size(); i < size; i += 2) {
505       String name = namesAndValues.get(i);
506       String value = namesAndValues.get(i + 1);
507       if (i > 0) out.append('&');
508       out.append(name);
509       if (value != null) {
510         out.append('=');
511         out.append(value);
512       }
513     }
514   }
515 
516   /**
517    * Cuts {@code encodedQuery} up into alternating parameter names and values. This divides a
518    * query string like {@code subject=math&easy&problem=5-2=3} into the list {@code ["subject",
519    * "math", "easy", null, "problem", "5-2=3"]}. Note that values may be null and may contain
520    * '=' characters.
521    */
queryStringToNamesAndValues(String encodedQuery)522   static List<String> queryStringToNamesAndValues(String encodedQuery) {
523     List<String> result = new ArrayList<>();
524     for (int pos = 0; pos <= encodedQuery.length(); ) {
525       int ampersandOffset = encodedQuery.indexOf('&', pos);
526       if (ampersandOffset == -1) ampersandOffset = encodedQuery.length();
527 
528       int equalsOffset = encodedQuery.indexOf('=', pos);
529       if (equalsOffset == -1 || equalsOffset > ampersandOffset) {
530         result.add(encodedQuery.substring(pos, ampersandOffset));
531         result.add(null); // No value for this name.
532       } else {
533         result.add(encodedQuery.substring(pos, equalsOffset));
534         result.add(encodedQuery.substring(equalsOffset + 1, ampersandOffset));
535       }
536       pos = ampersandOffset + 1;
537     }
538     return result;
539   }
540 
query()541   public String query() {
542     if (queryNamesAndValues == null) return null; // No query.
543     StringBuilder result = new StringBuilder();
544     namesAndValuesToQueryString(result, queryNamesAndValues);
545     return result.toString();
546   }
547 
querySize()548   public int querySize() {
549     return queryNamesAndValues != null ? queryNamesAndValues.size() / 2 : 0;
550   }
551 
552   /**
553    * Returns the first query parameter named {@code name} decoded using UTF-8, or null if there is
554    * no such query parameter.
555    */
queryParameter(String name)556   public String queryParameter(String name) {
557     if (queryNamesAndValues == null) return null;
558     for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) {
559       if (name.equals(queryNamesAndValues.get(i))) {
560         return queryNamesAndValues.get(i + 1);
561       }
562     }
563     return null;
564   }
565 
queryParameterNames()566   public Set<String> queryParameterNames() {
567     if (queryNamesAndValues == null) return Collections.emptySet();
568     Set<String> result = new LinkedHashSet<>();
569     for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) {
570       result.add(queryNamesAndValues.get(i));
571     }
572     return Collections.unmodifiableSet(result);
573   }
574 
queryParameterValues(String name)575   public List<String> queryParameterValues(String name) {
576     if (queryNamesAndValues == null) return Collections.emptyList();
577     List<String> result = new ArrayList<>();
578     for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) {
579       if (name.equals(queryNamesAndValues.get(i))) {
580         result.add(queryNamesAndValues.get(i + 1));
581       }
582     }
583     return Collections.unmodifiableList(result);
584   }
585 
queryParameterName(int index)586   public String queryParameterName(int index) {
587     return queryNamesAndValues.get(index * 2);
588   }
589 
queryParameterValue(int index)590   public String queryParameterValue(int index) {
591     return queryNamesAndValues.get(index * 2 + 1);
592   }
593 
encodedFragment()594   public String encodedFragment() {
595     if (fragment == null) return null;
596     int fragmentStart = url.indexOf('#') + 1;
597     return url.substring(fragmentStart);
598   }
599 
fragment()600   public String fragment() {
601     return fragment;
602   }
603 
604   /** Returns the URL that would be retrieved by following {@code link} from this URL. */
resolve(String link)605   public HttpUrl resolve(String link) {
606     // ANDROID-BEGIN: http://b/29983827
607     // Builder builder = new Builder();
608     Builder builder = new Builder(false);
609     // ANDROID-END: http://b/29983827
610     Builder.ParseResult result = builder.parse(this, link);
611     return result == Builder.ParseResult.SUCCESS ? builder.build() : null;
612   }
613 
newBuilder()614   public Builder newBuilder() {
615     // ANDROID-BEGIN: http://b/29983827
616     // Builder builder = new Builder();
617     Builder result = new Builder(false);
618     // ANDROID-END: http://b/29983827
619     result.scheme = scheme;
620     result.encodedUsername = encodedUsername();
621     result.encodedPassword = encodedPassword();
622     result.host = host;
623     // If we're set to a default port, unset it in case of a scheme change.
624     result.port = port != defaultPort(scheme) ? port : -1;
625     result.encodedPathSegments.clear();
626     result.encodedPathSegments.addAll(encodedPathSegments());
627     result.encodedQuery(encodedQuery());
628     result.encodedFragment = encodedFragment();
629     return result;
630   }
631 
632   /**
633    * Returns a new {@code HttpUrl} representing {@code url} if it is a well-formed HTTP or HTTPS
634    * URL, or null if it isn't.
635    */
parse(String url)636   public static HttpUrl parse(String url) {
637     // ANDROID-BEGIN: http://b/29983827
638     // Builder builder = new Builder();
639     Builder builder = new Builder(false);
640     // ANDROID-END: http://b/29983827
641     Builder.ParseResult result = builder.parse(null, url);
642     return result == Builder.ParseResult.SUCCESS ? builder.build() : null;
643   }
644 
645   /**
646    * Returns an {@link HttpUrl} for {@code url} if its protocol is {@code http} or {@code https}, or
647    * null if it has any other protocol.
648    */
get(URL url)649   public static HttpUrl get(URL url) {
650     return parse(url.toString());
651   }
652 
653   /**
654    * Returns a new {@code HttpUrl} representing {@code url} if it is a well-formed HTTP or HTTPS
655    * URL, or throws an exception if it isn't.
656    *
657    * @throws MalformedURLException if there was a non-host related URL issue
658    * @throws UnknownHostException if the host was invalid
659    */
getChecked(String url)660   static HttpUrl getChecked(String url) throws MalformedURLException, UnknownHostException {
661     // ANDROID-END: http://b/29983827
662     // Builder builder = new Builder();
663     Builder builder = new Builder(false);
664     // ANDROID-END: http://b/29983827
665     Builder.ParseResult result = builder.parse(null, url);
666     switch (result) {
667       case SUCCESS:
668         return builder.build();
669       case INVALID_HOST:
670         throw new UnknownHostException("Invalid host: " + url);
671       case UNSUPPORTED_SCHEME:
672       case MISSING_SCHEME:
673       case INVALID_PORT:
674       default:
675         throw new MalformedURLException("Invalid URL: " + result + " for " + url);
676     }
677   }
678 
get(URI uri)679   public static HttpUrl get(URI uri) {
680     return parse(uri.toString());
681   }
682 
equals(Object o)683   @Override public boolean equals(Object o) {
684     return o instanceof HttpUrl && ((HttpUrl) o).url.equals(url);
685   }
686 
hashCode()687   @Override public int hashCode() {
688     return url.hashCode();
689   }
690 
toString()691   @Override public String toString() {
692     return url;
693   }
694 
695   public static final class Builder {
696     String scheme;
697     String encodedUsername = "";
698     String encodedPassword = "";
699     String host;
700     int port = -1;
701     final List<String> encodedPathSegments = new ArrayList<>();
702     List<String> encodedQueryNamesAndValues;
703     String encodedFragment;
704 
705     // ANDROID-BEGIN: http://b/29983827
706     // public Builder() {
707     //   encodedPathSegments.add(""); // The default path is '/' which needs a trailing space.
708     // }
709 
Builder()710     public Builder() {
711       this(true); // // The default path is '/' which needs a trailing space.
712     }
713 
Builder(boolean startWithSlash)714     private Builder(boolean startWithSlash) {
715       if (startWithSlash) {
716         encodedPathSegments.add("");
717       }
718     }
719     // ANDROID-END: http://b/29983827
720 
scheme(String scheme)721     public Builder scheme(String scheme) {
722       if (scheme == null) {
723         throw new IllegalArgumentException("scheme == null");
724       } else if (scheme.equalsIgnoreCase("http")) {
725         this.scheme = "http";
726       } else if (scheme.equalsIgnoreCase("https")) {
727         this.scheme = "https";
728       } else {
729         throw new IllegalArgumentException("unexpected scheme: " + scheme);
730       }
731       return this;
732     }
733 
username(String username)734     public Builder username(String username) {
735       if (username == null) throw new IllegalArgumentException("username == null");
736       this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false, false, true);
737       return this;
738     }
739 
encodedUsername(String encodedUsername)740     public Builder encodedUsername(String encodedUsername) {
741       if (encodedUsername == null) throw new IllegalArgumentException("encodedUsername == null");
742       this.encodedUsername = canonicalize(
743           encodedUsername, USERNAME_ENCODE_SET, true, false, false, true);
744       return this;
745     }
746 
password(String password)747     public Builder password(String password) {
748       if (password == null) throw new IllegalArgumentException("password == null");
749       this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false, false, true);
750       return this;
751     }
752 
encodedPassword(String encodedPassword)753     public Builder encodedPassword(String encodedPassword) {
754       if (encodedPassword == null) throw new IllegalArgumentException("encodedPassword == null");
755       this.encodedPassword = canonicalize(
756           encodedPassword, PASSWORD_ENCODE_SET, true, false, false, true);
757       return this;
758     }
759 
760     /**
761      * @param host either a regular hostname, International Domain Name, IPv4 address, or IPv6
762      *     address.
763      */
host(String host)764     public Builder host(String host) {
765       if (host == null) throw new IllegalArgumentException("host == null");
766       String encoded = canonicalizeHost(host, 0, host.length());
767       if (encoded == null) throw new IllegalArgumentException("unexpected host: " + host);
768       this.host = encoded;
769       return this;
770     }
771 
port(int port)772     public Builder port(int port) {
773       if (port <= 0 || port > 65535) throw new IllegalArgumentException("unexpected port: " + port);
774       this.port = port;
775       return this;
776     }
777 
effectivePort()778     int effectivePort() {
779       return port != -1 ? port : defaultPort(scheme);
780     }
781 
addPathSegment(String pathSegment)782     public Builder addPathSegment(String pathSegment) {
783       if (pathSegment == null) throw new IllegalArgumentException("pathSegment == null");
784       push(pathSegment, 0, pathSegment.length(), false, false);
785       return this;
786     }
787 
addEncodedPathSegment(String encodedPathSegment)788     public Builder addEncodedPathSegment(String encodedPathSegment) {
789       if (encodedPathSegment == null) {
790         throw new IllegalArgumentException("encodedPathSegment == null");
791       }
792       push(encodedPathSegment, 0, encodedPathSegment.length(), false, true);
793       return this;
794     }
795 
setPathSegment(int index, String pathSegment)796     public Builder setPathSegment(int index, String pathSegment) {
797       if (pathSegment == null) throw new IllegalArgumentException("pathSegment == null");
798       String canonicalPathSegment = canonicalize(
799           pathSegment, 0, pathSegment.length(), PATH_SEGMENT_ENCODE_SET, false, false, false, true);
800       if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) {
801         throw new IllegalArgumentException("unexpected path segment: " + pathSegment);
802       }
803       encodedPathSegments.set(index, canonicalPathSegment);
804       return this;
805     }
806 
setEncodedPathSegment(int index, String encodedPathSegment)807     public Builder setEncodedPathSegment(int index, String encodedPathSegment) {
808       if (encodedPathSegment == null) {
809         throw new IllegalArgumentException("encodedPathSegment == null");
810       }
811       String canonicalPathSegment = canonicalize(encodedPathSegment,
812           0, encodedPathSegment.length(), PATH_SEGMENT_ENCODE_SET, true, false, false, true);
813       encodedPathSegments.set(index, canonicalPathSegment);
814       if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) {
815         throw new IllegalArgumentException("unexpected path segment: " + encodedPathSegment);
816       }
817       return this;
818     }
819 
removePathSegment(int index)820     public Builder removePathSegment(int index) {
821       encodedPathSegments.remove(index);
822       // ANDROID-BEGIN: http://b/29983827. Note this method only used from tests.
823       // Only changed for consistency.
824       //      if (encodedPathSegments.isEmpty()) {
825       //        encodedPathSegments.add(""); // Always leave at least one '/'.
826       //      }
827       // ANDROID-END: http://b/29983827 - only used from tests
828       return this;
829     }
830 
encodedPath(String encodedPath)831     public Builder encodedPath(String encodedPath) {
832       if (encodedPath == null) throw new IllegalArgumentException("encodedPath == null");
833       if (!encodedPath.startsWith("/")) {
834         throw new IllegalArgumentException("unexpected encodedPath: " + encodedPath);
835       }
836       resolvePath(encodedPath, 0, encodedPath.length());
837       return this;
838     }
839 
query(String query)840     public Builder query(String query) {
841       this.encodedQueryNamesAndValues = query != null
842           ? queryStringToNamesAndValues(canonicalize(
843               query, QUERY_ENCODE_SET, false, false, true, true))
844           : null;
845       return this;
846     }
847 
encodedQuery(String encodedQuery)848     public Builder encodedQuery(String encodedQuery) {
849       this.encodedQueryNamesAndValues = encodedQuery != null
850           ? queryStringToNamesAndValues(
851               canonicalize(encodedQuery, QUERY_ENCODE_SET, true, false, true, true))
852           : null;
853       return this;
854     }
855 
856     /** Encodes the query parameter using UTF-8 and adds it to this URL's query string. */
addQueryParameter(String name, String value)857     public Builder addQueryParameter(String name, String value) {
858       if (name == null) throw new IllegalArgumentException("name == null");
859       if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>();
860       encodedQueryNamesAndValues.add(
861           canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true));
862       encodedQueryNamesAndValues.add(value != null
863           ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, false, true, true)
864           : null);
865       return this;
866     }
867 
868     /** Adds the pre-encoded query parameter to this URL's query string. */
addEncodedQueryParameter(String encodedName, String encodedValue)869     public Builder addEncodedQueryParameter(String encodedName, String encodedValue) {
870       if (encodedName == null) throw new IllegalArgumentException("encodedName == null");
871       if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>();
872       encodedQueryNamesAndValues.add(
873           canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, false, true, true));
874       encodedQueryNamesAndValues.add(encodedValue != null
875           ? canonicalize(encodedValue, QUERY_COMPONENT_ENCODE_SET, true, false, true, true)
876           : null);
877       return this;
878     }
879 
setQueryParameter(String name, String value)880     public Builder setQueryParameter(String name, String value) {
881       removeAllQueryParameters(name);
882       addQueryParameter(name, value);
883       return this;
884     }
885 
setEncodedQueryParameter(String encodedName, String encodedValue)886     public Builder setEncodedQueryParameter(String encodedName, String encodedValue) {
887       removeAllEncodedQueryParameters(encodedName);
888       addEncodedQueryParameter(encodedName, encodedValue);
889       return this;
890     }
891 
removeAllQueryParameters(String name)892     public Builder removeAllQueryParameters(String name) {
893       if (name == null) throw new IllegalArgumentException("name == null");
894       if (encodedQueryNamesAndValues == null) return this;
895       String nameToRemove = canonicalize(
896           name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true);
897       removeAllCanonicalQueryParameters(nameToRemove);
898       return this;
899     }
900 
removeAllEncodedQueryParameters(String encodedName)901     public Builder removeAllEncodedQueryParameters(String encodedName) {
902       if (encodedName == null) throw new IllegalArgumentException("encodedName == null");
903       if (encodedQueryNamesAndValues == null) return this;
904       removeAllCanonicalQueryParameters(
905           canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, false, true, true));
906       return this;
907     }
908 
removeAllCanonicalQueryParameters(String canonicalName)909     private void removeAllCanonicalQueryParameters(String canonicalName) {
910       for (int i = encodedQueryNamesAndValues.size() - 2; i >= 0; i -= 2) {
911         if (canonicalName.equals(encodedQueryNamesAndValues.get(i))) {
912           encodedQueryNamesAndValues.remove(i + 1);
913           encodedQueryNamesAndValues.remove(i);
914           if (encodedQueryNamesAndValues.isEmpty()) {
915             encodedQueryNamesAndValues = null;
916             return;
917           }
918         }
919       }
920     }
921 
fragment(String fragment)922     public Builder fragment(String fragment) {
923       this.encodedFragment = fragment != null
924           ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false, false, false)
925           : null;
926       return this;
927     }
928 
encodedFragment(String encodedFragment)929     public Builder encodedFragment(String encodedFragment) {
930       this.encodedFragment = encodedFragment != null
931           ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false, false, false)
932           : null;
933       return this;
934     }
935 
936     /**
937      * Re-encodes the components of this URL so that it satisfies (obsolete) RFC 2396, which is
938      * particularly strict for certain components.
939      */
reencodeForUri()940     Builder reencodeForUri() {
941       for (int i = 0, size = encodedPathSegments.size(); i < size; i++) {
942         String pathSegment = encodedPathSegments.get(i);
943         encodedPathSegments.set(i,
944             canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, true, false, true));
945       }
946       if (encodedQueryNamesAndValues != null) {
947         for (int i = 0, size = encodedQueryNamesAndValues.size(); i < size; i++) {
948           String component = encodedQueryNamesAndValues.get(i);
949           if (component != null) {
950             encodedQueryNamesAndValues.set(i,
951                 canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true, true, true));
952           }
953         }
954       }
955       if (encodedFragment != null) {
956         encodedFragment = canonicalize(
957             encodedFragment, FRAGMENT_ENCODE_SET_URI, true, true, false, false);
958       }
959       return this;
960     }
961 
build()962     public HttpUrl build() {
963       if (scheme == null) throw new IllegalStateException("scheme == null");
964       if (host == null) throw new IllegalStateException("host == null");
965       return new HttpUrl(this);
966     }
967 
toString()968     @Override public String toString() {
969       StringBuilder result = new StringBuilder();
970       result.append(scheme);
971       result.append("://");
972 
973       if (!encodedUsername.isEmpty() || !encodedPassword.isEmpty()) {
974         result.append(encodedUsername);
975         if (!encodedPassword.isEmpty()) {
976           result.append(':');
977           result.append(encodedPassword);
978         }
979         result.append('@');
980       }
981 
982       if (host.indexOf(':') != -1) {
983         // Host is an IPv6 address.
984         result.append('[');
985         result.append(host);
986         result.append(']');
987       } else {
988         result.append(host);
989       }
990 
991       int effectivePort = effectivePort();
992       if (effectivePort != defaultPort(scheme)) {
993         result.append(':');
994         result.append(effectivePort);
995       }
996 
997       pathSegmentsToString(result, encodedPathSegments);
998 
999       if (encodedQueryNamesAndValues != null) {
1000         result.append('?');
1001         namesAndValuesToQueryString(result, encodedQueryNamesAndValues);
1002       }
1003 
1004       if (encodedFragment != null) {
1005         result.append('#');
1006         result.append(encodedFragment);
1007       }
1008 
1009       return result.toString();
1010     }
1011 
1012     enum ParseResult {
1013       SUCCESS,
1014       MISSING_SCHEME,
1015       UNSUPPORTED_SCHEME,
1016       INVALID_PORT,
1017       INVALID_HOST,
1018     }
1019 
parse(HttpUrl base, String input)1020     ParseResult parse(HttpUrl base, String input) {
1021       int pos = skipLeadingAsciiWhitespace(input, 0, input.length());
1022       int limit = skipTrailingAsciiWhitespace(input, pos, input.length());
1023 
1024       // Scheme.
1025       int schemeDelimiterOffset = schemeDelimiterOffset(input, pos, limit);
1026       if (schemeDelimiterOffset != -1) {
1027         if (input.regionMatches(true, pos, "https:", 0, 6)) {
1028           this.scheme = "https";
1029           pos += "https:".length();
1030         } else if (input.regionMatches(true, pos, "http:", 0, 5)) {
1031           this.scheme = "http";
1032           pos += "http:".length();
1033         } else {
1034           return ParseResult.UNSUPPORTED_SCHEME; // Not an HTTP scheme.
1035         }
1036       } else if (base != null) {
1037         this.scheme = base.scheme;
1038       } else {
1039         return ParseResult.MISSING_SCHEME; // No scheme.
1040       }
1041 
1042       // Authority.
1043       boolean hasUsername = false;
1044       boolean hasPassword = false;
1045       int slashCount = slashCount(input, pos, limit);
1046       if (slashCount >= 2 || base == null || !base.scheme.equals(this.scheme)) {
1047         // Read an authority if either:
1048         //  * The input starts with 2 or more slashes. These follow the scheme if it exists.
1049         //  * The input scheme exists and is different from the base URL's scheme.
1050         //
1051         // The structure of an authority is:
1052         //   username:password@host:port
1053         //
1054         // Username, password and port are optional.
1055         //   [username[:password]@]host[:port]
1056         pos += slashCount;
1057         authority:
1058         while (true) {
1059           int componentDelimiterOffset = delimiterOffset(input, pos, limit, "@/\\?#");
1060           int c = componentDelimiterOffset != limit
1061               ? input.charAt(componentDelimiterOffset)
1062               : -1;
1063           switch (c) {
1064             case '@':
1065               // User info precedes.
1066               if (!hasPassword) {
1067                 int passwordColonOffset = delimiterOffset(
1068                     input, pos, componentDelimiterOffset, ":");
1069                 String canonicalUsername = canonicalize(
1070                     input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false, false, true);
1071                 this.encodedUsername = hasUsername
1072                     ? this.encodedUsername + "%40" + canonicalUsername
1073                     : canonicalUsername;
1074                 if (passwordColonOffset != componentDelimiterOffset) {
1075                   hasPassword = true;
1076                   this.encodedPassword = canonicalize(input, passwordColonOffset + 1,
1077                       componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true);
1078                 }
1079                 hasUsername = true;
1080               } else {
1081                 this.encodedPassword = this.encodedPassword + "%40" + canonicalize(input, pos,
1082                     componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true);
1083               }
1084               pos = componentDelimiterOffset + 1;
1085               break;
1086 
1087             case -1:
1088             case '/':
1089             case '\\':
1090             case '?':
1091             case '#':
1092               // Host info precedes.
1093               int portColonOffset = portColonOffset(input, pos, componentDelimiterOffset);
1094               if (portColonOffset + 1 < componentDelimiterOffset) {
1095                 this.host = canonicalizeHost(input, pos, portColonOffset);
1096                 this.port = parsePort(input, portColonOffset + 1, componentDelimiterOffset);
1097                 if (this.port == -1) return ParseResult.INVALID_PORT; // Invalid port.
1098               } else {
1099                 this.host = canonicalizeHost(input, pos, portColonOffset);
1100                 this.port = defaultPort(this.scheme);
1101               }
1102               if (this.host == null) return ParseResult.INVALID_HOST; // Invalid host.
1103               pos = componentDelimiterOffset;
1104               break authority;
1105           }
1106         }
1107       } else {
1108         // This is a relative link. Copy over all authority components. Also maybe the path & query.
1109         this.encodedUsername = base.encodedUsername();
1110         this.encodedPassword = base.encodedPassword();
1111         this.host = base.host;
1112         this.port = base.port;
1113         this.encodedPathSegments.clear();
1114         this.encodedPathSegments.addAll(base.encodedPathSegments());
1115         if (pos == limit || input.charAt(pos) == '#') {
1116           encodedQuery(base.encodedQuery());
1117         }
1118       }
1119 
1120       // Resolve the relative path.
1121       int pathDelimiterOffset = delimiterOffset(input, pos, limit, "?#");
1122       resolvePath(input, pos, pathDelimiterOffset);
1123       pos = pathDelimiterOffset;
1124 
1125       // Query.
1126       if (pos < limit && input.charAt(pos) == '?') {
1127         int queryDelimiterOffset = delimiterOffset(input, pos, limit, "#");
1128         this.encodedQueryNamesAndValues = queryStringToNamesAndValues(canonicalize(
1129             input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, false, true, true));
1130         pos = queryDelimiterOffset;
1131       }
1132 
1133       // Fragment.
1134       if (pos < limit && input.charAt(pos) == '#') {
1135         this.encodedFragment = canonicalize(
1136             input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false, false);
1137       }
1138 
1139       return ParseResult.SUCCESS;
1140     }
1141 
resolvePath(String input, int pos, int limit)1142     private void resolvePath(String input, int pos, int limit) {
1143       // Read a delimiter.
1144       if (pos == limit) {
1145         // Empty path: keep the base path as-is.
1146         return;
1147       }
1148       char c = input.charAt(pos);
1149       if (c == '/' || c == '\\') {
1150         // Absolute path: reset to the default "/".
1151         encodedPathSegments.clear();
1152         encodedPathSegments.add("");
1153         pos++;
1154       } else {
1155         // ANDROID-BEGIN: http://b/29983827
1156         // // Relative path: clear everything after the last '/'.
1157         // encodedPathSegments.set(encodedPathSegments.size() - 1, "");
1158         // Relative path: clear everything after the last '/' (if there is one).
1159         if (!encodedPathSegments.isEmpty()) {
1160           encodedPathSegments.set(encodedPathSegments.size() - 1, "");
1161         }
1162         // ANDROID-END: http://b/29983827
1163       }
1164 
1165       // Read path segments.
1166       for (int i = pos; i < limit; ) {
1167         int pathSegmentDelimiterOffset = delimiterOffset(input, i, limit, "/\\");
1168         boolean segmentHasTrailingSlash = pathSegmentDelimiterOffset < limit;
1169         push(input, i, pathSegmentDelimiterOffset, segmentHasTrailingSlash, true);
1170         i = pathSegmentDelimiterOffset;
1171         if (segmentHasTrailingSlash) i++;
1172       }
1173     }
1174 
1175     /** Adds a path segment. If the input is ".." or equivalent, this pops a path segment. */
1176     private void push(String input, int pos, int limit, boolean addTrailingSlash,
1177         boolean alreadyEncoded) {
1178       String segment = canonicalize(
1179           input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false, false, true);
1180       if (isDot(segment)) {
1181         return; // Skip '.' path segments.
1182       }
1183       if (isDotDot(segment)) {
1184         pop();
1185         return;
1186       }
1187 
1188       // ANDROID-BEGIN: http://b/29983827
1189       // If the encodedPathSegments doesn't even include "/" then add the leading "/" before
1190       // pushing more segments or modifying existing segments.
1191       if (encodedPathSegments.isEmpty()) {
1192         encodedPathSegments.add("");
1193       }
1194       // ANDROID-END: http://b/29983827
1195 
1196       if (encodedPathSegments.get(encodedPathSegments.size() - 1).isEmpty()) {
1197         encodedPathSegments.set(encodedPathSegments.size() - 1, segment);
1198       } else {
1199         encodedPathSegments.add(segment);
1200       }
1201       if (addTrailingSlash) {
1202         encodedPathSegments.add("");
1203       }
1204     }
1205 
1206     private boolean isDot(String input) {
1207       return input.equals(".") || input.equalsIgnoreCase("%2e");
1208     }
1209 
1210     private boolean isDotDot(String input) {
1211       return input.equals("..")
1212           || input.equalsIgnoreCase("%2e.")
1213           || input.equalsIgnoreCase(".%2e")
1214           || input.equalsIgnoreCase("%2e%2e");
1215     }
1216 
1217     /**
1218      * Removes a path segment. When this method returns the last segment is always "", which means
1219      * the encoded path will have a trailing '/'.
1220      *
1221      * <p>Popping "/a/b/c/" yields "/a/b/". In this case the list of path segments goes from
1222      * ["a", "b", "c", ""] to ["a", "b", ""].
1223      *
1224      * <p>Popping "/a/b/c" also yields "/a/b/". The list of path segments goes from ["a", "b", "c"]
1225      * to ["a", "b", ""].
1226      */
1227     private void pop() {
1228       // ANDROID-BEGIN: http://b/29983827
1229       // Cannot pop() if there isn't even a "/". Leave the path as is. This method is only used
1230       // from push(). push() handles the empty case explicitly.
1231       if (encodedPathSegments.isEmpty()) {
1232         return;
1233       }
1234       // ANDROID-END: http://b/29983827
1235 
1236       String removed = encodedPathSegments.remove(encodedPathSegments.size() - 1);
1237 
1238       // Make sure the path ends with a '/' by either adding an empty string or clearing a segment.
1239       if (removed.isEmpty() && !encodedPathSegments.isEmpty()) {
1240         encodedPathSegments.set(encodedPathSegments.size() - 1, "");
1241       } else {
1242         encodedPathSegments.add("");
1243       }
1244     }
1245 
1246     /**
1247      * Increments {@code pos} until {@code input[pos]} is not ASCII whitespace. Stops at {@code
1248      * limit}.
1249      */
1250     private int skipLeadingAsciiWhitespace(String input, int pos, int limit) {
1251       for (int i = pos; i < limit; i++) {
1252         switch (input.charAt(i)) {
1253           case '\t':
1254           case '\n':
1255           case '\f':
1256           case '\r':
1257           case ' ':
1258             continue;
1259           default:
1260             return i;
1261         }
1262       }
1263       return limit;
1264     }
1265 
1266     /**
1267      * Decrements {@code limit} until {@code input[limit - 1]} is not ASCII whitespace. Stops at
1268      * {@code pos}.
1269      */
1270     private int skipTrailingAsciiWhitespace(String input, int pos, int limit) {
1271       for (int i = limit - 1; i >= pos; i--) {
1272         switch (input.charAt(i)) {
1273           case '\t':
1274           case '\n':
1275           case '\f':
1276           case '\r':
1277           case ' ':
1278             continue;
1279           default:
1280             return i + 1;
1281         }
1282       }
1283       return pos;
1284     }
1285 
1286     /**
1287      * Returns the index of the ':' in {@code input} that is after scheme characters. Returns -1 if
1288      * {@code input} does not have a scheme that starts at {@code pos}.
1289      */
1290     private static int schemeDelimiterOffset(String input, int pos, int limit) {
1291       if (limit - pos < 2) return -1;
1292 
1293       char c0 = input.charAt(pos);
1294       if ((c0 < 'a' || c0 > 'z') && (c0 < 'A' || c0 > 'Z')) return -1; // Not a scheme start char.
1295 
1296       for (int i = pos + 1; i < limit; i++) {
1297         char c = input.charAt(i);
1298 
1299         if ((c >= 'a' && c <= 'z')
1300             || (c >= 'A' && c <= 'Z')
1301             || (c >= '0' && c <= '9')
1302             || c == '+'
1303             || c == '-'
1304             || c == '.') {
1305           continue; // Scheme character. Keep going.
1306         } else if (c == ':') {
1307           return i; // Scheme prefix!
1308         } else {
1309           return -1; // Non-scheme character before the first ':'.
1310         }
1311       }
1312 
1313       return -1; // No ':'; doesn't start with a scheme.
1314     }
1315 
1316     /** Returns the number of '/' and '\' slashes in {@code input}, starting at {@code pos}. */
1317     private static int slashCount(String input, int pos, int limit) {
1318       int slashCount = 0;
1319       while (pos < limit) {
1320         char c = input.charAt(pos);
1321         if (c == '\\' || c == '/') {
1322           slashCount++;
1323           pos++;
1324         } else {
1325           break;
1326         }
1327       }
1328       return slashCount;
1329     }
1330 
1331     /** Finds the first ':' in {@code input}, skipping characters between square braces "[...]". */
1332     private static int portColonOffset(String input, int pos, int limit) {
1333       for (int i = pos; i < limit; i++) {
1334         switch (input.charAt(i)) {
1335           case '[':
1336             while (++i < limit) {
1337               if (input.charAt(i) == ']') break;
1338             }
1339             break;
1340           case ':':
1341             return i;
1342         }
1343       }
1344       return limit; // No colon.
1345     }
1346 
1347     private static String canonicalizeHost(String input, int pos, int limit) {
1348       // Start by percent decoding the host. The WHATWG spec suggests doing this only after we've
1349       // checked for IPv6 square braces. But Chrome does it first, and that's more lenient.
1350       String percentDecoded = percentDecode(input, pos, limit, false);
1351 
1352       // If the input is encased in square braces "[...]", drop 'em. We have an IPv6 address.
1353       if (percentDecoded.startsWith("[") && percentDecoded.endsWith("]")) {
1354         InetAddress inetAddress = decodeIpv6(percentDecoded, 1, percentDecoded.length() - 1);
1355         if (inetAddress == null) return null;
1356         byte[] address = inetAddress.getAddress();
1357         if (address.length == 16) return inet6AddressToAscii(address);
1358         throw new AssertionError();
1359       }
1360 
1361       return domainToAscii(percentDecoded);
1362     }
1363 
1364     /** Decodes an IPv6 address like 1111:2222:3333:4444:5555:6666:7777:8888 or ::1. */
1365     private static InetAddress decodeIpv6(String input, int pos, int limit) {
1366       byte[] address = new byte[16];
1367       int b = 0;
1368       int compress = -1;
1369       int groupOffset = -1;
1370 
1371       for (int i = pos; i < limit; ) {
1372         if (b == address.length) return null; // Too many groups.
1373 
1374         // Read a delimiter.
1375         if (i + 2 <= limit && input.regionMatches(i, "::", 0, 2)) {
1376           // Compression "::" delimiter, which is anywhere in the input, including its prefix.
1377           if (compress != -1) return null; // Multiple "::" delimiters.
1378           i += 2;
1379           b += 2;
1380           compress = b;
1381           if (i == limit) break;
1382         } else if (b != 0) {
1383           // Group separator ":" delimiter.
1384           if (input.regionMatches(i, ":", 0, 1)) {
1385             i++;
1386           } else if (input.regionMatches(i, ".", 0, 1)) {
1387             // If we see a '.', rewind to the beginning of the previous group and parse as IPv4.
1388             if (!decodeIpv4Suffix(input, groupOffset, limit, address, b - 2)) return null;
1389             b += 2; // We rewound two bytes and then added four.
1390             break;
1391           } else {
1392             return null; // Wrong delimiter.
1393           }
1394         }
1395 
1396         // Read a group, one to four hex digits.
1397         int value = 0;
1398         groupOffset = i;
1399         for (; i < limit; i++) {
1400           char c = input.charAt(i);
1401           int hexDigit = decodeHexDigit(c);
1402           if (hexDigit == -1) break;
1403           value = (value << 4) + hexDigit;
1404         }
1405         int groupLength = i - groupOffset;
1406         if (groupLength == 0 || groupLength > 4) return null; // Group is the wrong size.
1407 
1408         // We've successfully read a group. Assign its value to our byte array.
1409         address[b++] = (byte) ((value >>> 8) & 0xff);
1410         address[b++] = (byte) (value & 0xff);
1411       }
1412 
1413       // All done. If compression happened, we need to move bytes to the right place in the
1414       // address. Here's a sample:
1415       //
1416       //      input: "1111:2222:3333::7777:8888"
1417       //     before: { 11, 11, 22, 22, 33, 33, 00, 00, 77, 77, 88, 88, 00, 00, 00, 00  }
1418       //   compress: 6
1419       //          b: 10
1420       //      after: { 11, 11, 22, 22, 33, 33, 00, 00, 00, 00, 00, 00, 77, 77, 88, 88 }
1421       //
1422       if (b != address.length) {
1423         if (compress == -1) return null; // Address didn't have compression or enough groups.
1424         System.arraycopy(address, compress, address, address.length - (b - compress), b - compress);
1425         Arrays.fill(address, compress, compress + (address.length - b), (byte) 0);
1426       }
1427 
1428       try {
1429         return InetAddress.getByAddress(address);
1430       } catch (UnknownHostException e) {
1431         throw new AssertionError();
1432       }
1433     }
1434 
1435     /** Decodes an IPv4 address suffix of an IPv6 address, like 1111::5555:6666:192.168.0.1. */
1436     private static boolean decodeIpv4Suffix(
1437         String input, int pos, int limit, byte[] address, int addressOffset) {
1438       int b = addressOffset;
1439 
1440       for (int i = pos; i < limit; ) {
1441         if (b == address.length) return false; // Too many groups.
1442 
1443         // Read a delimiter.
1444         if (b != addressOffset) {
1445           if (input.charAt(i) != '.') return false; // Wrong delimiter.
1446           i++;
1447         }
1448 
1449         // Read 1 or more decimal digits for a value in 0..255.
1450         int value = 0;
1451         int groupOffset = i;
1452         for (; i < limit; i++) {
1453           char c = input.charAt(i);
1454           if (c < '0' || c > '9') break;
1455           if (value == 0 && groupOffset != i) return false; // Reject unnecessary leading '0's.
1456           value = (value * 10) + c - '0';
1457           if (value > 255) return false; // Value out of range.
1458         }
1459         int groupLength = i - groupOffset;
1460         if (groupLength == 0) return false; // No digits.
1461 
1462         // We've successfully read a byte.
1463         address[b++] = (byte) value;
1464       }
1465 
1466       if (b != addressOffset + 4) return false; // Too few groups. We wanted exactly four.
1467       return true; // Success.
1468     }
1469 
1470     /**
1471      * Performs IDN ToASCII encoding and canonicalize the result to lowercase. e.g. This converts
1472      * {@code ☃.net} to {@code xn--n3h.net}, and {@code WwW.GoOgLe.cOm} to {@code www.google.com}.
1473      * {@code null} will be returned if the input cannot be ToASCII encoded or if the result
1474      * contains unsupported ASCII characters.
1475      */
1476     private static String domainToAscii(String input) {
1477       try {
1478         String result = IDN.toASCII(input).toLowerCase(Locale.US);
1479         if (result.isEmpty()) return null;
1480 
1481         // Confirm that the IDN ToASCII result doesn't contain any illegal characters.
1482         if (containsInvalidHostnameAsciiCodes(result)) {
1483           return null;
1484         }
1485         // TODO: implement all label limits.
1486         return result;
1487       } catch (IllegalArgumentException e) {
1488         return null;
1489       }
1490     }
1491 
1492     private static boolean containsInvalidHostnameAsciiCodes(String hostnameAscii) {
1493       for (int i = 0; i < hostnameAscii.length(); i++) {
1494         char c = hostnameAscii.charAt(i);
1495         // The WHATWG Host parsing rules accepts some character codes which are invalid by
1496         // definition for OkHttp's host header checks (and the WHATWG Host syntax definition). Here
1497         // we rule out characters that would cause problems in host headers.
1498         if (c <= '\u001f' || c >= '\u007f') {
1499           return true;
1500         }
1501         // Check for the characters mentioned in the WHATWG Host parsing spec:
1502         // U+0000, U+0009, U+000A, U+000D, U+0020, "#", "%", "/", ":", "?", "@", "[", "\", and "]"
1503         // (excluding the characters covered above).
1504         if (" #%/:?@[\\]".indexOf(c) != -1) {
1505           return true;
1506         }
1507       }
1508       return false;
1509     }
1510 
1511     private static String inet6AddressToAscii(byte[] address) {
1512       // Go through the address looking for the longest run of 0s. Each group is 2-bytes.
1513       int longestRunOffset = -1;
1514       int longestRunLength = 0;
1515       for (int i = 0; i < address.length; i += 2) {
1516         int currentRunOffset = i;
1517         while (i < 16 && address[i] == 0 && address[i + 1] == 0) {
1518           i += 2;
1519         }
1520         int currentRunLength = i - currentRunOffset;
1521         if (currentRunLength > longestRunLength) {
1522           longestRunOffset = currentRunOffset;
1523           longestRunLength = currentRunLength;
1524         }
1525       }
1526 
1527       // Emit each 2-byte group in hex, separated by ':'. The longest run of zeroes is "::".
1528       Buffer result = new Buffer();
1529       for (int i = 0; i < address.length; ) {
1530         if (i == longestRunOffset) {
1531           result.writeByte(':');
1532           i += longestRunLength;
1533           if (i == 16) result.writeByte(':');
1534         } else {
1535           if (i > 0) result.writeByte(':');
1536           int group = (address[i] & 0xff) << 8 | address[i + 1] & 0xff;
1537           result.writeHexadecimalUnsignedLong(group);
1538           i += 2;
1539         }
1540       }
1541       return result.readUtf8();
1542     }
1543 
1544     private static int parsePort(String input, int pos, int limit) {
1545       try {
1546         // Canonicalize the port string to skip '\n' etc.
1547         String portString = canonicalize(input, pos, limit, "", false, false, false, true);
1548         int i = Integer.parseInt(portString);
1549         if (i > 0 && i <= 65535) return i;
1550         return -1;
1551       } catch (NumberFormatException e) {
1552         return -1; // Invalid port.
1553       }
1554     }
1555   }
1556 
1557   /**
1558    * Returns the index of the first character in {@code input} that contains a character in {@code
1559    * delimiters}. Returns limit if there is no such character.
1560    */
1561   private static int delimiterOffset(String input, int pos, int limit, String delimiters) {
1562     for (int i = pos; i < limit; i++) {
1563       if (delimiters.indexOf(input.charAt(i)) != -1) return i;
1564     }
1565     return limit;
1566   }
1567 
1568   static String percentDecode(String encoded, boolean plusIsSpace) {
1569     return percentDecode(encoded, 0, encoded.length(), plusIsSpace);
1570   }
1571 
1572   private List<String> percentDecode(List<String> list, boolean plusIsSpace) {
1573     List<String> result = new ArrayList<>(list.size());
1574     for (String s : list) {
1575       result.add(s != null ? percentDecode(s, plusIsSpace) : null);
1576     }
1577     return Collections.unmodifiableList(result);
1578   }
1579 
1580   static String percentDecode(String encoded, int pos, int limit, boolean plusIsSpace) {
1581     for (int i = pos; i < limit; i++) {
1582       char c = encoded.charAt(i);
1583       if (c == '%' || (c == '+' && plusIsSpace)) {
1584         // Slow path: the character at i requires decoding!
1585         Buffer out = new Buffer();
1586         out.writeUtf8(encoded, pos, i);
1587         percentDecode(out, encoded, i, limit, plusIsSpace);
1588         return out.readUtf8();
1589       }
1590     }
1591 
1592     // Fast path: no characters in [pos..limit) required decoding.
1593     return encoded.substring(pos, limit);
1594   }
1595 
1596   static void percentDecode(Buffer out, String encoded, int pos, int limit, boolean plusIsSpace) {
1597     int codePoint;
1598     for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
1599       codePoint = encoded.codePointAt(i);
1600       if (codePoint == '%' && i + 2 < limit) {
1601         int d1 = decodeHexDigit(encoded.charAt(i + 1));
1602         int d2 = decodeHexDigit(encoded.charAt(i + 2));
1603         if (d1 != -1 && d2 != -1) {
1604           out.writeByte((d1 << 4) + d2);
1605           i += 2;
1606           continue;
1607         }
1608       } else if (codePoint == '+' && plusIsSpace) {
1609         out.writeByte(' ');
1610         continue;
1611       }
1612       out.writeUtf8CodePoint(codePoint);
1613     }
1614   }
1615 
1616   static boolean percentEncoded(String encoded, int pos, int limit) {
1617     return pos + 2 < limit
1618         && encoded.charAt(pos) == '%'
1619         && decodeHexDigit(encoded.charAt(pos + 1)) != -1
1620         && decodeHexDigit(encoded.charAt(pos + 2)) != -1;
1621   }
1622 
1623   static int decodeHexDigit(char c) {
1624     if (c >= '0' && c <= '9') return c - '0';
1625     if (c >= 'a' && c <= 'f') return c - 'a' + 10;
1626     if (c >= 'A' && c <= 'F') return c - 'A' + 10;
1627     return -1;
1628   }
1629 
1630   /**
1631    * Returns a substring of {@code input} on the range {@code [pos..limit)} with the following
1632    * transformations:
1633    * <ul>
1634    *   <li>Tabs, newlines, form feeds and carriage returns are skipped.
1635    *   <li>In queries, ' ' is encoded to '+' and '+' is encoded to "%2B".
1636    *   <li>Characters in {@code encodeSet} are percent-encoded.
1637    *   <li>Control characters and non-ASCII characters are percent-encoded.
1638    *   <li>All other characters are copied without transformation.
1639    * </ul>
1640    *
1641    * @param alreadyEncoded true to leave '%' as-is; false to convert it to '%25'.
1642    * @param strict true to encode '%' if it is not the prefix of a valid percent encoding.
1643    * @param plusIsSpace true to encode '+' as "%2B" if it is not already encoded
1644    * @param asciiOnly true to encode all non-ASCII codepoints.
1645    */
1646   static String canonicalize(String input, int pos, int limit, String encodeSet,
1647       boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) {
1648     int codePoint;
1649     for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
1650       codePoint = input.codePointAt(i);
1651       if (codePoint < 0x20
1652           || codePoint == 0x7f
1653           || codePoint >= 0x80 && asciiOnly
1654           || encodeSet.indexOf(codePoint) != -1
1655           || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))
1656           || codePoint == '+' && plusIsSpace) {
1657         // Slow path: the character at i requires encoding!
1658         Buffer out = new Buffer();
1659         out.writeUtf8(input, pos, i);
1660         canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, strict, plusIsSpace,
1661             asciiOnly);
1662         return out.readUtf8();
1663       }
1664     }
1665 
1666     // Fast path: no characters in [pos..limit) required encoding.
1667     return input.substring(pos, limit);
1668   }
1669 
1670   static void canonicalize(Buffer out, String input, int pos, int limit, String encodeSet,
1671       boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) {
1672     Buffer utf8Buffer = null; // Lazily allocated.
1673     int codePoint;
1674     for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
1675       codePoint = input.codePointAt(i);
1676       if (alreadyEncoded
1677           && (codePoint == '\t' || codePoint == '\n' || codePoint == '\f' || codePoint == '\r')) {
1678         // Skip this character.
1679       } else if (codePoint == '+' && plusIsSpace) {
1680         // Encode '+' as '%2B' since we permit ' ' to be encoded as either '+' or '%20'.
1681         out.writeUtf8(alreadyEncoded ? "+" : "%2B");
1682       } else if (codePoint < 0x20
1683           || codePoint == 0x7f
1684           || codePoint >= 0x80 && asciiOnly
1685           || encodeSet.indexOf(codePoint) != -1
1686           || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))) {
1687         // Percent encode this character.
1688         if (utf8Buffer == null) {
1689           utf8Buffer = new Buffer();
1690         }
1691         utf8Buffer.writeUtf8CodePoint(codePoint);
1692         while (!utf8Buffer.exhausted()) {
1693           int b = utf8Buffer.readByte() & 0xff;
1694           out.writeByte('%');
1695           out.writeByte(HEX_DIGITS[(b >> 4) & 0xf]);
1696           out.writeByte(HEX_DIGITS[b & 0xf]);
1697         }
1698       } else {
1699         // This character doesn't need encoding. Just copy it over.
1700         out.writeUtf8CodePoint(codePoint);
1701       }
1702     }
1703   }
1704 
1705   static String canonicalize(String input, String encodeSet, boolean alreadyEncoded, boolean strict,
1706       boolean plusIsSpace, boolean asciiOnly) {
1707     return canonicalize(
1708         input, 0, input.length(), encodeSet, alreadyEncoded, strict, plusIsSpace, asciiOnly);
1709   }
1710 }
1711