1 /* 2 * Copyright (C) 2015 Square, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.squareup.okhttp; 17 18 import java.net.IDN; 19 import java.net.InetAddress; 20 import java.net.MalformedURLException; 21 import java.net.URI; 22 import java.net.URISyntaxException; 23 import java.net.URL; 24 import java.net.UnknownHostException; 25 import java.util.ArrayList; 26 import java.util.Arrays; 27 import java.util.Collections; 28 import java.util.LinkedHashSet; 29 import java.util.List; 30 import java.util.Locale; 31 import java.util.Set; 32 import okio.Buffer; 33 34 /** 35 * A uniform resource locator (URL) with a scheme of either {@code http} or {@code https}. Use this 36 * class to compose and decompose Internet addresses. For example, this code will compose and print 37 * a URL for Google search: <pre> {@code 38 * 39 * HttpUrl url = new HttpUrl.Builder() 40 * .scheme("https") 41 * .host("www.google.com") 42 * .addPathSegment("search") 43 * .addQueryParameter("q", "polar bears") 44 * .build(); 45 * System.out.println(url); 46 * }</pre> 47 * 48 * which prints: <pre> {@code 49 * 50 * https://www.google.com/search?q=polar%20bears 51 * }</pre> 52 * 53 * As another example, this code prints the human-readable query parameters of a Twitter search: 54 * <pre> {@code 55 * 56 * HttpUrl url = HttpUrl.parse("https://twitter.com/search?q=cute%20%23puppies&f=images"); 57 * for (int i = 0, size = url.querySize(); i < size; i++) { 58 * System.out.println(url.queryParameterName(i) + ": " + url.queryParameterValue(i)); 59 * } 60 * }</pre> 61 * 62 * which prints: <pre> {@code 63 * 64 * q: cute #puppies 65 * f: images 66 * }</pre> 67 * 68 * In addition to composing URLs from their component parts and decomposing URLs into their 69 * component parts, this class implements relative URL resolution: what address you'd reach by 70 * clicking a relative link on a specified page. For example: <pre> {@code 71 * 72 * HttpUrl base = HttpUrl.parse("https://www.youtube.com/user/WatchTheDaily/videos"); 73 * HttpUrl link = base.resolve("../../watch?v=cbP2N1BQdYc"); 74 * System.out.println(link); 75 * }</pre> 76 * 77 * which prints: <pre> {@code 78 * 79 * https://www.youtube.com/watch?v=cbP2N1BQdYc 80 * }</pre> 81 * 82 * <h3>What's in a URL?</h3> 83 * 84 * A URL has several components. 85 * 86 * <h4>Scheme</h4> 87 * Sometimes referred to as <i>protocol</i>, A URL's scheme describes what mechanism should be used 88 * to retrieve the resource. Although URLs have many schemes ({@code mailto}, {@code file}, {@code 89 * ftp}), this class only supports {@code http} and {@code https}. Use {@link URI java.net.URI} for 90 * URLs with arbitrary schemes. 91 * 92 * <h4>Username and Password</h4> 93 * Username and password are either present, or the empty string {@code ""} if absent. This class 94 * offers no mechanism to differentiate empty from absent. Neither of these components are popular 95 * in practice. Typically HTTP applications use other mechanisms for user identification and 96 * authentication. 97 * 98 * <h4>Host</h4> 99 * The host identifies the webserver that serves the URL's resource. It is either a hostname like 100 * {@code square.com} or {@code localhost}, an IPv4 address like {@code 192.168.0.1}, or an IPv6 101 * address like {@code ::1}. 102 * 103 * <p>Usually a webserver is reachable with multiple identifiers: its IP addresses, registered 104 * domain names, and even {@code localhost} when connecting from the server itself. Each of a 105 * webserver's names is a distinct URL and they are not interchangeable. For example, even if 106 * {@code http://square.github.io/dagger} and {@code http://google.github.io/dagger} are served by 107 * the same IP address, the two URLs identify different resources. 108 * 109 * <h4>Port</h4> 110 * The port used to connect to the webserver. By default this is 80 for HTTP and 443 for HTTPS. This 111 * class never returns -1 for the port: if no port is explicitly specified in the URL then the 112 * scheme's default is used. 113 * 114 * <h4>Path</h4> 115 * The path identifies a specific resource on the host. Paths have a hierarchical structure like 116 * "/square/okhttp/issues/1486". Each path segment is prefixed with "/". This class offers methods 117 * to compose and decompose paths by segment. If a path's last segment is the empty string, then the 118 * path ends with "/". This class always builds non-empty paths: if the path is omitted it defaults 119 * to "/", which is a path whose only segment is the empty string. 120 * 121 * <h4>Query</h4> 122 * The query is optional: it can be null, empty, or non-empty. For many HTTP URLs the query string 123 * is subdivided into a collection of name-value parameters. This class offers methods to set the 124 * query as the single string, or as individual name-value parameters. With name-value parameters 125 * the values are optional and names may be repeated. 126 * 127 * <h4>Fragment</h4> 128 * The fragment is optional: it can be null, empty, or non-empty. Unlike host, port, path, and query 129 * the fragment is not sent to the webserver: it's private to the client. 130 * 131 * <h3>Encoding</h3> 132 * Each component must be encoded before it is embedded in the complete URL. As we saw above, the 133 * string {@code cute #puppies} is encoded as {@code cute%20%23puppies} when used as a query 134 * parameter value. 135 * 136 * <h4>Percent encoding</h4> 137 * Percent encoding replaces a character (like {@code \ud83c\udf69}) with its UTF-8 hex bytes (like 138 * {@code %F0%9F%8D%A9}). This approach works for whitespace characters, control characters, 139 * non-ASCII characters, and characters that already have another meaning in a particular context. 140 * 141 * <p>Percent encoding is used in every URL component except for the hostname. But the set of 142 * characters that need to be encoded is different for each component. For example, the path 143 * component must escape all of its {@code ?} characters, otherwise it could be interpreted as the 144 * start of the URL's query. But within the query and fragment components, the {@code ?} character 145 * doesn't delimit anything and doesn't need to be escaped. <pre> {@code 146 * 147 * HttpUrl url = HttpUrl.parse("http://who-let-the-dogs.out").newBuilder() 148 * .addPathSegment("_Who?_") 149 * .query("_Who?_") 150 * .fragment("_Who?_") 151 * .build(); 152 * System.out.println(url); 153 * }</pre> 154 * 155 * This prints: <pre> {@code 156 * 157 * http://who-let-the-dogs.out/_Who%3F_?_Who?_#_Who?_ 158 * }</pre> 159 * 160 * When parsing URLs that lack percent encoding where it is required, this class will percent encode 161 * the offending characters. 162 * 163 * <h4>IDNA Mapping and Punycode encoding</h4> 164 * Hostnames have different requirements and use a different encoding scheme. It consists of IDNA 165 * mapping and Punycode encoding. 166 * 167 * <p>In order to avoid confusion and discourage phishing attacks, 168 * <a href="http://www.unicode.org/reports/tr46/#ToASCII">IDNA Mapping</a> transforms names to avoid 169 * confusing characters. This includes basic case folding: transforming shouting {@code SQUARE.COM} 170 * into cool and casual {@code square.com}. It also handles more exotic characters. For example, the 171 * Unicode trademark sign (™) could be confused for the letters "TM" in {@code http://ho™mail.com}. 172 * To mitigate this, the single character (™) maps to the string (tm). There is similar policy for 173 * all of the 1.1 million Unicode code points. Note that some code points such as "\ud83c\udf69" are 174 * not mapped and cannot be used in a hostname. 175 * 176 * <p><a href="http://ietf.org/rfc/rfc3492.txt">Punycode</a> converts a Unicode string to an ASCII 177 * string to make international domain names work everywhere. For example, "σ" encodes as 178 * "xn--4xa". The encoded string is not human readable, but can be used with classes like {@link 179 * InetAddress} to establish connections. 180 * 181 * <h3>Why another URL model?</h3> 182 * Java includes both {@link URL java.net.URL} and {@link URI java.net.URI}. We offer a new URL 183 * model to address problems that the others don't. 184 * 185 * <h4>Different URLs should be different</h4> 186 * Although they have different content, {@code java.net.URL} considers the following two URLs 187 * equal, and the {@link Object#equals equals()} method between them returns true: 188 * <ul> 189 * <li>http://square.github.io/ 190 * <li>http://google.github.io/ 191 * </ul> 192 * This is because those two hosts share the same IP address. This is an old, bad design decision 193 * that makes {@code java.net.URL} unusable for many things. It shouldn't be used as a {@link 194 * java.util.Map Map} key or in a {@link Set}. Doing so is both inefficient because equality may 195 * require a DNS lookup, and incorrect because unequal URLs may be equal because of how they are 196 * hosted. 197 * 198 * <h4>Equal URLs should be equal</h4> 199 * These two URLs are semantically identical, but {@code java.net.URI} disagrees: 200 * <ul> 201 * <li>http://host:80/ 202 * <li>http://host 203 * </ul> 204 * Both the unnecessary port specification ({@code :80}) and the absent trailing slash ({@code /}) 205 * cause URI to bucket the two URLs separately. This harms URI's usefulness in collections. Any 206 * application that stores information-per-URL will need to either canonicalize manually, or suffer 207 * unnecessary redundancy for such URLs. 208 * 209 * <p>Because they don't attempt canonical form, these classes are surprisingly difficult to use 210 * securely. Suppose you're building a webservice that checks that incoming paths are prefixed 211 * "/static/images/" before serving the corresponding assets from the filesystem. <pre> {@code 212 * 213 * String attack = "http://example.com/static/images/../../../../../etc/passwd"; 214 * System.out.println(new URL(attack).getPath()); 215 * System.out.println(new URI(attack).getPath()); 216 * System.out.println(HttpUrl.parse(attack).path()); 217 * }</pre> 218 * 219 * By canonicalizing the input paths, they are complicit in directory traversal attacks. Code that 220 * checks only the path prefix may suffer! 221 * <pre> {@code 222 * 223 * /static/images/../../../../../etc/passwd 224 * /static/images/../../../../../etc/passwd 225 * /etc/passwd 226 * }</pre> 227 * 228 * <h4>If it works on the web, it should work in your application</h4> 229 * The {@code java.net.URI} class is strict around what URLs it accepts. It rejects URLs like 230 * "http://example.com/abc|def" because the '|' character is unsupported. This class is more 231 * forgiving: it will automatically percent-encode the '|', yielding "http://example.com/abc%7Cdef". 232 * This kind behavior is consistent with web browsers. {@code HttpUrl} prefers consistency with 233 * major web browsers over consistency with obsolete specifications. 234 * 235 * <h4>Paths and Queries should decompose</h4> 236 * Neither of the built-in URL models offer direct access to path segments or query parameters. 237 * Manually using {@code StringBuilder} to assemble these components is cumbersome: do '+' 238 * characters get silently replaced with spaces? If a query parameter contains a '&', does that 239 * get escaped? By offering methods to read and write individual query parameters directly, 240 * application developers are saved from the hassles of encoding and decoding. 241 * 242 * <h4>Plus a modern API</h4> 243 * The URL (JDK1.0) and URI (Java 1.4) classes predate builders and instead use telescoping 244 * constructors. For example, there's no API to compose a URI with a custom port without also 245 * providing a query and fragment. 246 * 247 * <p>Instances of {@link HttpUrl} are well-formed and always have a scheme, host, and path. With 248 * {@code java.net.URL} it's possible to create an awkward URL like {@code http:/} with scheme and 249 * path but no hostname. Building APIs that consume such malformed values is difficult! 250 * 251 * <p>This class has a modern API. It avoids punitive checked exceptions: {@link #parse parse()} 252 * returns null if the input is an invalid URL. You can even be explicit about whether each 253 * component has been encoded already. 254 */ 255 public final class HttpUrl { 256 private static final char[] HEX_DIGITS = 257 { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; 258 static final String USERNAME_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#"; 259 static final String PASSWORD_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#"; 260 static final String PATH_SEGMENT_ENCODE_SET = " \"<>^`{}|/\\?#"; 261 static final String PATH_SEGMENT_ENCODE_SET_URI = "[]"; 262 // ANDROID-CHANGED: http://b/30405333 - we do not encode single quote as %27 in query strings. 263 // static final String QUERY_ENCODE_SET = " \"'<>#"; 264 // static final String QUERY_COMPONENT_ENCODE_SET = " \"'<>#&="; 265 static final String QUERY_ENCODE_SET = " \"<>#"; 266 static final String QUERY_COMPONENT_ENCODE_SET = " \"<>#&="; 267 // ANDROID-CHANGED end. 268 static final String QUERY_COMPONENT_ENCODE_SET_URI = "\\^`{|}"; 269 static final String FORM_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#&!$(),~"; 270 static final String FRAGMENT_ENCODE_SET = ""; 271 static final String FRAGMENT_ENCODE_SET_URI = " \"#<>\\^`{|}"; 272 273 /** Either "http" or "https". */ 274 private final String scheme; 275 276 /** Decoded username. */ 277 private final String username; 278 279 /** Decoded password. */ 280 private final String password; 281 282 /** Canonical hostname. */ 283 private final String host; 284 285 /** Either 80, 443 or a user-specified port. In range [1..65535]. */ 286 private final int port; 287 288 /** 289 * A list of canonical path segments. This list always contains at least one element, which may 290 * be the empty string. Each segment is formatted with a leading '/', so if path segments were 291 * ["a", "b", ""], then the encoded path would be "/a/b/". 292 */ 293 private final List<String> pathSegments; 294 295 /** 296 * Alternating, decoded query names and values, or null for no query. Names may be empty or 297 * non-empty, but never null. Values are null if the name has no corresponding '=' separator, or 298 * empty, or non-empty. 299 */ 300 private final List<String> queryNamesAndValues; 301 302 /** Decoded fragment. */ 303 private final String fragment; 304 305 /** Canonical URL. */ 306 private final String url; 307 HttpUrl(Builder builder)308 private HttpUrl(Builder builder) { 309 this.scheme = builder.scheme; 310 this.username = percentDecode(builder.encodedUsername, false); 311 this.password = percentDecode(builder.encodedPassword, false); 312 this.host = builder.host; 313 this.port = builder.effectivePort(); 314 this.pathSegments = percentDecode(builder.encodedPathSegments, false); 315 this.queryNamesAndValues = builder.encodedQueryNamesAndValues != null 316 ? percentDecode(builder.encodedQueryNamesAndValues, true) 317 : null; 318 this.fragment = builder.encodedFragment != null 319 ? percentDecode(builder.encodedFragment, false) 320 : null; 321 this.url = builder.toString(); 322 } 323 324 /** Returns this URL as a {@link URL java.net.URL}. */ url()325 public URL url() { 326 try { 327 return new URL(url); 328 } catch (MalformedURLException e) { 329 throw new RuntimeException(e); // Unexpected! 330 } 331 } 332 333 /** 334 * Returns this URL as a {@link URI java.net.URI}. Because {@code URI} is more strict than this 335 * class, the returned URI may be semantically different from this URL: 336 * <ul> 337 * <li>Characters forbidden by URI like {@code [} and {@code |} will be escaped. 338 * <li>Invalid percent-encoded sequences like {@code %xx} will be encoded like {@code %25xx}. 339 * <li>Whitespace and control characters in the fragment will be stripped. 340 * </ul> 341 * 342 * <p>These differences may have a significant consequence when the URI is interpretted by a 343 * webserver. For this reason the {@linkplain URI URI class} and this method should be avoided. 344 */ uri()345 public URI uri() { 346 String uri = newBuilder().reencodeForUri().toString(); 347 try { 348 return new URI(uri); 349 } catch (URISyntaxException e) { 350 // Unlikely edge case: the URI has a forbidden character in the fragment. Strip it & retry. 351 try { 352 String stripped = uri.replaceAll("[\\u0000-\\u001F\\u007F-\\u009F\\p{javaWhitespace}]", ""); 353 return URI.create(stripped); 354 } catch (Exception e1) { 355 throw new RuntimeException(e); // Unexpected! 356 } 357 } 358 } 359 360 /** Returns either "http" or "https". */ scheme()361 public String scheme() { 362 return scheme; 363 } 364 isHttps()365 public boolean isHttps() { 366 return scheme.equals("https"); 367 } 368 369 /** Returns the username, or an empty string if none is set. */ encodedUsername()370 public String encodedUsername() { 371 if (username.isEmpty()) return ""; 372 int usernameStart = scheme.length() + 3; // "://".length() == 3. 373 int usernameEnd = delimiterOffset(url, usernameStart, url.length(), ":@"); 374 return url.substring(usernameStart, usernameEnd); 375 } 376 username()377 public String username() { 378 return username; 379 } 380 381 /** Returns the password, or an empty string if none is set. */ encodedPassword()382 public String encodedPassword() { 383 if (password.isEmpty()) return ""; 384 int passwordStart = url.indexOf(':', scheme.length() + 3) + 1; 385 int passwordEnd = url.indexOf('@'); 386 return url.substring(passwordStart, passwordEnd); 387 } 388 389 /** Returns the decoded password, or an empty string if none is present. */ password()390 public String password() { 391 return password; 392 } 393 394 /** 395 * Returns the host address suitable for use with {@link InetAddress#getAllByName(String)}. May 396 * be: 397 * <ul> 398 * <li>A regular host name, like {@code android.com}. 399 * <li>An IPv4 address, like {@code 127.0.0.1}. 400 * <li>An IPv6 address, like {@code ::1}. Note that there are no square braces. 401 * <li>An encoded IDN, like {@code xn--n3h.net}. 402 * </ul> 403 */ host()404 public String host() { 405 return host; 406 } 407 408 /** 409 * Same as {@link #host} except that literal IPv6 addresses are surrounding by square 410 * braces. For example, this method will return {@code [::1]} where {@code host} returns 411 * {@code ::1}. 412 */ rfc2732host()413 public String rfc2732host() { 414 if (host.indexOf(':') == -1) { 415 return host; 416 } 417 418 return "[" + host + "]"; 419 } 420 421 /** 422 * Returns the explicitly-specified port if one was provided, or the default port for this URL's 423 * scheme. For example, this returns 8443 for {@code https://square.com:8443/} and 443 for {@code 424 * https://square.com/}. The result is in {@code [1..65535]}. 425 */ port()426 public int port() { 427 return port; 428 } 429 430 /** 431 * Returns 80 if {@code scheme.equals("http")}, 443 if {@code scheme.equals("https")} and -1 432 * otherwise. 433 */ defaultPort(String scheme)434 public static int defaultPort(String scheme) { 435 if (scheme.equals("http")) { 436 return 80; 437 } else if (scheme.equals("https")) { 438 return 443; 439 } else { 440 return -1; 441 } 442 } 443 pathSize()444 public int pathSize() { 445 return pathSegments.size(); 446 } 447 448 /** 449 * Returns the entire path of this URL, encoded for use in HTTP resource resolution. 450 // ANDROID-BEGIN: http://b/29983827 451 // * The returned path is always nonempty and is prefixed with {@code /}. 452 // ANDROID-END: http://b/29983827 453 */ encodedPath()454 public String encodedPath() { 455 int pathStart = url.indexOf('/', scheme.length() + 3); // "://".length() == 3. 456 // ANDROID-BEGIN: http://b/29983827 457 if (pathStart == -1) { 458 return ""; 459 } 460 // ANDROID-END: http://b/29983827 461 int pathEnd = delimiterOffset(url, pathStart, url.length(), "?#"); 462 return url.substring(pathStart, pathEnd); 463 } 464 pathSegmentsToString(StringBuilder out, List<String> pathSegments)465 static void pathSegmentsToString(StringBuilder out, List<String> pathSegments) { 466 for (int i = 0, size = pathSegments.size(); i < size; i++) { 467 out.append('/'); 468 out.append(pathSegments.get(i)); 469 } 470 } 471 encodedPathSegments()472 public List<String> encodedPathSegments() { 473 int pathStart = url.indexOf('/', scheme.length() + 3); 474 // ANDROID-BEGIN: http://b/29983827 475 if (pathStart == -1) { 476 return new ArrayList<>(); 477 } 478 // ANDROID-END: http://b/29983827 479 480 int pathEnd = delimiterOffset(url, pathStart, url.length(), "?#"); 481 List<String> result = new ArrayList<>(); 482 for (int i = pathStart; i < pathEnd; ) { 483 i++; // Skip the '/'. 484 int segmentEnd = delimiterOffset(url, i, pathEnd, "/"); 485 result.add(url.substring(i, segmentEnd)); 486 i = segmentEnd; 487 } 488 return result; 489 } 490 pathSegments()491 public List<String> pathSegments() { 492 return pathSegments; 493 } 494 495 /** 496 * Returns the query of this URL, encoded for use in HTTP resource resolution. The returned string 497 * may be null (for URLs with no query), empty (for URLs with an empty query) or non-empty (all 498 * other URLs). 499 */ encodedQuery()500 public String encodedQuery() { 501 if (queryNamesAndValues == null) return null; // No query. 502 int queryStart = url.indexOf('?') + 1; 503 int queryEnd = delimiterOffset(url, queryStart + 1, url.length(), "#"); 504 return url.substring(queryStart, queryEnd); 505 } 506 namesAndValuesToQueryString(StringBuilder out, List<String> namesAndValues)507 static void namesAndValuesToQueryString(StringBuilder out, List<String> namesAndValues) { 508 for (int i = 0, size = namesAndValues.size(); i < size; i += 2) { 509 String name = namesAndValues.get(i); 510 String value = namesAndValues.get(i + 1); 511 if (i > 0) out.append('&'); 512 out.append(name); 513 if (value != null) { 514 out.append('='); 515 out.append(value); 516 } 517 } 518 } 519 520 /** 521 * Cuts {@code encodedQuery} up into alternating parameter names and values. This divides a 522 * query string like {@code subject=math&easy&problem=5-2=3} into the list {@code ["subject", 523 * "math", "easy", null, "problem", "5-2=3"]}. Note that values may be null and may contain 524 * '=' characters. 525 */ queryStringToNamesAndValues(String encodedQuery)526 static List<String> queryStringToNamesAndValues(String encodedQuery) { 527 List<String> result = new ArrayList<>(); 528 for (int pos = 0; pos <= encodedQuery.length(); ) { 529 int ampersandOffset = encodedQuery.indexOf('&', pos); 530 if (ampersandOffset == -1) ampersandOffset = encodedQuery.length(); 531 532 int equalsOffset = encodedQuery.indexOf('=', pos); 533 if (equalsOffset == -1 || equalsOffset > ampersandOffset) { 534 result.add(encodedQuery.substring(pos, ampersandOffset)); 535 result.add(null); // No value for this name. 536 } else { 537 result.add(encodedQuery.substring(pos, equalsOffset)); 538 result.add(encodedQuery.substring(equalsOffset + 1, ampersandOffset)); 539 } 540 pos = ampersandOffset + 1; 541 } 542 return result; 543 } 544 query()545 public String query() { 546 if (queryNamesAndValues == null) return null; // No query. 547 StringBuilder result = new StringBuilder(); 548 namesAndValuesToQueryString(result, queryNamesAndValues); 549 return result.toString(); 550 } 551 querySize()552 public int querySize() { 553 return queryNamesAndValues != null ? queryNamesAndValues.size() / 2 : 0; 554 } 555 556 /** 557 * Returns the first query parameter named {@code name} decoded using UTF-8, or null if there is 558 * no such query parameter. 559 */ queryParameter(String name)560 public String queryParameter(String name) { 561 if (queryNamesAndValues == null) return null; 562 for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) { 563 if (name.equals(queryNamesAndValues.get(i))) { 564 return queryNamesAndValues.get(i + 1); 565 } 566 } 567 return null; 568 } 569 queryParameterNames()570 public Set<String> queryParameterNames() { 571 if (queryNamesAndValues == null) return Collections.emptySet(); 572 Set<String> result = new LinkedHashSet<>(); 573 for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) { 574 result.add(queryNamesAndValues.get(i)); 575 } 576 return Collections.unmodifiableSet(result); 577 } 578 queryParameterValues(String name)579 public List<String> queryParameterValues(String name) { 580 if (queryNamesAndValues == null) return Collections.emptyList(); 581 List<String> result = new ArrayList<>(); 582 for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) { 583 if (name.equals(queryNamesAndValues.get(i))) { 584 result.add(queryNamesAndValues.get(i + 1)); 585 } 586 } 587 return Collections.unmodifiableList(result); 588 } 589 queryParameterName(int index)590 public String queryParameterName(int index) { 591 return queryNamesAndValues.get(index * 2); 592 } 593 queryParameterValue(int index)594 public String queryParameterValue(int index) { 595 return queryNamesAndValues.get(index * 2 + 1); 596 } 597 encodedFragment()598 public String encodedFragment() { 599 if (fragment == null) return null; 600 int fragmentStart = url.indexOf('#') + 1; 601 return url.substring(fragmentStart); 602 } 603 fragment()604 public String fragment() { 605 return fragment; 606 } 607 608 /** Returns the URL that would be retrieved by following {@code link} from this URL. */ resolve(String link)609 public HttpUrl resolve(String link) { 610 // ANDROID-BEGIN: http://b/29983827 611 // Builder builder = new Builder(); 612 Builder builder = new Builder(false); 613 // ANDROID-END: http://b/29983827 614 Builder.ParseResult result = builder.parse(this, link); 615 return result == Builder.ParseResult.SUCCESS ? builder.build() : null; 616 } 617 newBuilder()618 public Builder newBuilder() { 619 // ANDROID-BEGIN: http://b/29983827 620 // Builder builder = new Builder(); 621 Builder result = new Builder(false); 622 // ANDROID-END: http://b/29983827 623 result.scheme = scheme; 624 result.encodedUsername = encodedUsername(); 625 result.encodedPassword = encodedPassword(); 626 result.host = host; 627 // If we're set to a default port, unset it in case of a scheme change. 628 result.port = port != defaultPort(scheme) ? port : -1; 629 result.encodedPathSegments.clear(); 630 result.encodedPathSegments.addAll(encodedPathSegments()); 631 result.encodedQuery(encodedQuery()); 632 result.encodedFragment = encodedFragment(); 633 return result; 634 } 635 636 /** 637 * Returns a new {@code HttpUrl} representing {@code url} if it is a well-formed HTTP or HTTPS 638 * URL, or null if it isn't. 639 */ parse(String url)640 public static HttpUrl parse(String url) { 641 // ANDROID-BEGIN: http://b/29983827 642 // Builder builder = new Builder(); 643 Builder builder = new Builder(false); 644 // ANDROID-END: http://b/29983827 645 Builder.ParseResult result = builder.parse(null, url); 646 return result == Builder.ParseResult.SUCCESS ? builder.build() : null; 647 } 648 649 /** 650 * Returns an {@link HttpUrl} for {@code url} if its protocol is {@code http} or {@code https}, or 651 * null if it has any other protocol. 652 */ get(URL url)653 public static HttpUrl get(URL url) { 654 return parse(url.toString()); 655 } 656 657 /** 658 * Returns a new {@code HttpUrl} representing {@code url} if it is a well-formed HTTP or HTTPS 659 * URL, or throws an exception if it isn't. 660 * 661 * @throws MalformedURLException if there was a non-host related URL issue 662 * @throws UnknownHostException if the host was invalid 663 */ getChecked(String url)664 static HttpUrl getChecked(String url) throws MalformedURLException, UnknownHostException { 665 // ANDROID-END: http://b/29983827 666 // Builder builder = new Builder(); 667 Builder builder = new Builder(false); 668 // ANDROID-END: http://b/29983827 669 Builder.ParseResult result = builder.parse(null, url); 670 switch (result) { 671 case SUCCESS: 672 return builder.build(); 673 case INVALID_HOST: 674 throw new UnknownHostException("Invalid host: " + url); 675 case UNSUPPORTED_SCHEME: 676 case MISSING_SCHEME: 677 case INVALID_PORT: 678 default: 679 throw new MalformedURLException("Invalid URL: " + result + " for " + url); 680 } 681 } 682 get(URI uri)683 public static HttpUrl get(URI uri) { 684 return parse(uri.toString()); 685 } 686 equals(Object o)687 @Override public boolean equals(Object o) { 688 return o instanceof HttpUrl && ((HttpUrl) o).url.equals(url); 689 } 690 hashCode()691 @Override public int hashCode() { 692 return url.hashCode(); 693 } 694 toString()695 @Override public String toString() { 696 return url; 697 } 698 699 public static final class Builder { 700 String scheme; 701 String encodedUsername = ""; 702 String encodedPassword = ""; 703 String host; 704 int port = -1; 705 final List<String> encodedPathSegments = new ArrayList<>(); 706 List<String> encodedQueryNamesAndValues; 707 String encodedFragment; 708 709 // ANDROID-BEGIN: http://b/29983827 710 // public Builder() { 711 // encodedPathSegments.add(""); // The default path is '/' which needs a trailing space. 712 // } 713 Builder()714 public Builder() { 715 this(true); // // The default path is '/' which needs a trailing space. 716 } 717 Builder(boolean startWithSlash)718 private Builder(boolean startWithSlash) { 719 if (startWithSlash) { 720 encodedPathSegments.add(""); 721 } 722 } 723 // ANDROID-END: http://b/29983827 724 scheme(String scheme)725 public Builder scheme(String scheme) { 726 if (scheme == null) { 727 throw new IllegalArgumentException("scheme == null"); 728 } else if (scheme.equalsIgnoreCase("http")) { 729 this.scheme = "http"; 730 } else if (scheme.equalsIgnoreCase("https")) { 731 this.scheme = "https"; 732 } else { 733 throw new IllegalArgumentException("unexpected scheme: " + scheme); 734 } 735 return this; 736 } 737 username(String username)738 public Builder username(String username) { 739 if (username == null) throw new IllegalArgumentException("username == null"); 740 this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false, false, true); 741 return this; 742 } 743 encodedUsername(String encodedUsername)744 public Builder encodedUsername(String encodedUsername) { 745 if (encodedUsername == null) throw new IllegalArgumentException("encodedUsername == null"); 746 this.encodedUsername = canonicalize( 747 encodedUsername, USERNAME_ENCODE_SET, true, false, false, true); 748 return this; 749 } 750 password(String password)751 public Builder password(String password) { 752 if (password == null) throw new IllegalArgumentException("password == null"); 753 this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false, false, true); 754 return this; 755 } 756 encodedPassword(String encodedPassword)757 public Builder encodedPassword(String encodedPassword) { 758 if (encodedPassword == null) throw new IllegalArgumentException("encodedPassword == null"); 759 this.encodedPassword = canonicalize( 760 encodedPassword, PASSWORD_ENCODE_SET, true, false, false, true); 761 return this; 762 } 763 764 /** 765 * @param host either a regular hostname, International Domain Name, IPv4 address, or IPv6 766 * address. 767 */ host(String host)768 public Builder host(String host) { 769 if (host == null) throw new IllegalArgumentException("host == null"); 770 String encoded = canonicalizeHost(host, 0, host.length()); 771 if (encoded == null) throw new IllegalArgumentException("unexpected host: " + host); 772 this.host = encoded; 773 return this; 774 } 775 port(int port)776 public Builder port(int port) { 777 if (port <= 0 || port > 65535) throw new IllegalArgumentException("unexpected port: " + port); 778 this.port = port; 779 return this; 780 } 781 effectivePort()782 int effectivePort() { 783 return port != -1 ? port : defaultPort(scheme); 784 } 785 addPathSegment(String pathSegment)786 public Builder addPathSegment(String pathSegment) { 787 if (pathSegment == null) throw new IllegalArgumentException("pathSegment == null"); 788 push(pathSegment, 0, pathSegment.length(), false, false); 789 return this; 790 } 791 addEncodedPathSegment(String encodedPathSegment)792 public Builder addEncodedPathSegment(String encodedPathSegment) { 793 if (encodedPathSegment == null) { 794 throw new IllegalArgumentException("encodedPathSegment == null"); 795 } 796 push(encodedPathSegment, 0, encodedPathSegment.length(), false, true); 797 return this; 798 } 799 setPathSegment(int index, String pathSegment)800 public Builder setPathSegment(int index, String pathSegment) { 801 if (pathSegment == null) throw new IllegalArgumentException("pathSegment == null"); 802 String canonicalPathSegment = canonicalize( 803 pathSegment, 0, pathSegment.length(), PATH_SEGMENT_ENCODE_SET, false, false, false, true); 804 if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) { 805 throw new IllegalArgumentException("unexpected path segment: " + pathSegment); 806 } 807 encodedPathSegments.set(index, canonicalPathSegment); 808 return this; 809 } 810 setEncodedPathSegment(int index, String encodedPathSegment)811 public Builder setEncodedPathSegment(int index, String encodedPathSegment) { 812 if (encodedPathSegment == null) { 813 throw new IllegalArgumentException("encodedPathSegment == null"); 814 } 815 String canonicalPathSegment = canonicalize(encodedPathSegment, 816 0, encodedPathSegment.length(), PATH_SEGMENT_ENCODE_SET, true, false, false, true); 817 encodedPathSegments.set(index, canonicalPathSegment); 818 if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) { 819 throw new IllegalArgumentException("unexpected path segment: " + encodedPathSegment); 820 } 821 return this; 822 } 823 removePathSegment(int index)824 public Builder removePathSegment(int index) { 825 encodedPathSegments.remove(index); 826 // ANDROID-BEGIN: http://b/29983827. Note this method only used from tests. 827 // Only changed for consistency. 828 // if (encodedPathSegments.isEmpty()) { 829 // encodedPathSegments.add(""); // Always leave at least one '/'. 830 // } 831 // ANDROID-END: http://b/29983827 - only used from tests 832 return this; 833 } 834 encodedPath(String encodedPath)835 public Builder encodedPath(String encodedPath) { 836 if (encodedPath == null) throw new IllegalArgumentException("encodedPath == null"); 837 if (!encodedPath.startsWith("/")) { 838 throw new IllegalArgumentException("unexpected encodedPath: " + encodedPath); 839 } 840 resolvePath(encodedPath, 0, encodedPath.length()); 841 return this; 842 } 843 query(String query)844 public Builder query(String query) { 845 this.encodedQueryNamesAndValues = query != null 846 ? queryStringToNamesAndValues(canonicalize( 847 query, QUERY_ENCODE_SET, false, false, true, true)) 848 : null; 849 return this; 850 } 851 encodedQuery(String encodedQuery)852 public Builder encodedQuery(String encodedQuery) { 853 this.encodedQueryNamesAndValues = encodedQuery != null 854 ? queryStringToNamesAndValues( 855 canonicalize(encodedQuery, QUERY_ENCODE_SET, true, false, true, true)) 856 : null; 857 return this; 858 } 859 860 /** Encodes the query parameter using UTF-8 and adds it to this URL's query string. */ addQueryParameter(String name, String value)861 public Builder addQueryParameter(String name, String value) { 862 if (name == null) throw new IllegalArgumentException("name == null"); 863 if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>(); 864 encodedQueryNamesAndValues.add( 865 canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true)); 866 encodedQueryNamesAndValues.add(value != null 867 ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, false, true, true) 868 : null); 869 return this; 870 } 871 872 /** Adds the pre-encoded query parameter to this URL's query string. */ addEncodedQueryParameter(String encodedName, String encodedValue)873 public Builder addEncodedQueryParameter(String encodedName, String encodedValue) { 874 if (encodedName == null) throw new IllegalArgumentException("encodedName == null"); 875 if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>(); 876 encodedQueryNamesAndValues.add( 877 canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, false, true, true)); 878 encodedQueryNamesAndValues.add(encodedValue != null 879 ? canonicalize(encodedValue, QUERY_COMPONENT_ENCODE_SET, true, false, true, true) 880 : null); 881 return this; 882 } 883 setQueryParameter(String name, String value)884 public Builder setQueryParameter(String name, String value) { 885 removeAllQueryParameters(name); 886 addQueryParameter(name, value); 887 return this; 888 } 889 setEncodedQueryParameter(String encodedName, String encodedValue)890 public Builder setEncodedQueryParameter(String encodedName, String encodedValue) { 891 removeAllEncodedQueryParameters(encodedName); 892 addEncodedQueryParameter(encodedName, encodedValue); 893 return this; 894 } 895 removeAllQueryParameters(String name)896 public Builder removeAllQueryParameters(String name) { 897 if (name == null) throw new IllegalArgumentException("name == null"); 898 if (encodedQueryNamesAndValues == null) return this; 899 String nameToRemove = canonicalize( 900 name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true); 901 removeAllCanonicalQueryParameters(nameToRemove); 902 return this; 903 } 904 removeAllEncodedQueryParameters(String encodedName)905 public Builder removeAllEncodedQueryParameters(String encodedName) { 906 if (encodedName == null) throw new IllegalArgumentException("encodedName == null"); 907 if (encodedQueryNamesAndValues == null) return this; 908 removeAllCanonicalQueryParameters( 909 canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, false, true, true)); 910 return this; 911 } 912 removeAllCanonicalQueryParameters(String canonicalName)913 private void removeAllCanonicalQueryParameters(String canonicalName) { 914 for (int i = encodedQueryNamesAndValues.size() - 2; i >= 0; i -= 2) { 915 if (canonicalName.equals(encodedQueryNamesAndValues.get(i))) { 916 encodedQueryNamesAndValues.remove(i + 1); 917 encodedQueryNamesAndValues.remove(i); 918 if (encodedQueryNamesAndValues.isEmpty()) { 919 encodedQueryNamesAndValues = null; 920 return; 921 } 922 } 923 } 924 } 925 fragment(String fragment)926 public Builder fragment(String fragment) { 927 this.encodedFragment = fragment != null 928 ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false, false, false) 929 : null; 930 return this; 931 } 932 encodedFragment(String encodedFragment)933 public Builder encodedFragment(String encodedFragment) { 934 this.encodedFragment = encodedFragment != null 935 ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false, false, false) 936 : null; 937 return this; 938 } 939 940 /** 941 * Re-encodes the components of this URL so that it satisfies (obsolete) RFC 2396, which is 942 * particularly strict for certain components. 943 */ reencodeForUri()944 Builder reencodeForUri() { 945 for (int i = 0, size = encodedPathSegments.size(); i < size; i++) { 946 String pathSegment = encodedPathSegments.get(i); 947 encodedPathSegments.set(i, 948 canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, true, false, true)); 949 } 950 if (encodedQueryNamesAndValues != null) { 951 for (int i = 0, size = encodedQueryNamesAndValues.size(); i < size; i++) { 952 String component = encodedQueryNamesAndValues.get(i); 953 if (component != null) { 954 encodedQueryNamesAndValues.set(i, 955 canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true, true, true)); 956 } 957 } 958 } 959 if (encodedFragment != null) { 960 encodedFragment = canonicalize( 961 encodedFragment, FRAGMENT_ENCODE_SET_URI, true, true, false, false); 962 } 963 return this; 964 } 965 build()966 public HttpUrl build() { 967 if (scheme == null) throw new IllegalStateException("scheme == null"); 968 if (host == null) throw new IllegalStateException("host == null"); 969 return new HttpUrl(this); 970 } 971 toString()972 @Override public String toString() { 973 StringBuilder result = new StringBuilder(); 974 result.append(scheme); 975 result.append("://"); 976 977 if (!encodedUsername.isEmpty() || !encodedPassword.isEmpty()) { 978 result.append(encodedUsername); 979 if (!encodedPassword.isEmpty()) { 980 result.append(':'); 981 result.append(encodedPassword); 982 } 983 result.append('@'); 984 } 985 986 if (host.indexOf(':') != -1) { 987 // Host is an IPv6 address. 988 result.append('['); 989 result.append(host); 990 result.append(']'); 991 } else { 992 result.append(host); 993 } 994 995 int effectivePort = effectivePort(); 996 if (effectivePort != defaultPort(scheme)) { 997 result.append(':'); 998 result.append(effectivePort); 999 } 1000 1001 pathSegmentsToString(result, encodedPathSegments); 1002 1003 if (encodedQueryNamesAndValues != null) { 1004 result.append('?'); 1005 namesAndValuesToQueryString(result, encodedQueryNamesAndValues); 1006 } 1007 1008 if (encodedFragment != null) { 1009 result.append('#'); 1010 result.append(encodedFragment); 1011 } 1012 1013 return result.toString(); 1014 } 1015 1016 enum ParseResult { 1017 SUCCESS, 1018 MISSING_SCHEME, 1019 UNSUPPORTED_SCHEME, 1020 INVALID_PORT, 1021 INVALID_HOST, 1022 } 1023 parse(HttpUrl base, String input)1024 ParseResult parse(HttpUrl base, String input) { 1025 int pos = skipLeadingAsciiWhitespace(input, 0, input.length()); 1026 int limit = skipTrailingAsciiWhitespace(input, pos, input.length()); 1027 1028 // Scheme. 1029 int schemeDelimiterOffset = schemeDelimiterOffset(input, pos, limit); 1030 if (schemeDelimiterOffset != -1) { 1031 if (input.regionMatches(true, pos, "https:", 0, 6)) { 1032 this.scheme = "https"; 1033 pos += "https:".length(); 1034 } else if (input.regionMatches(true, pos, "http:", 0, 5)) { 1035 this.scheme = "http"; 1036 pos += "http:".length(); 1037 } else { 1038 return ParseResult.UNSUPPORTED_SCHEME; // Not an HTTP scheme. 1039 } 1040 } else if (base != null) { 1041 this.scheme = base.scheme; 1042 } else { 1043 return ParseResult.MISSING_SCHEME; // No scheme. 1044 } 1045 1046 // Authority. 1047 boolean hasUsername = false; 1048 boolean hasPassword = false; 1049 int slashCount = slashCount(input, pos, limit); 1050 if (slashCount >= 2 || base == null || !base.scheme.equals(this.scheme)) { 1051 // Read an authority if either: 1052 // * The input starts with 2 or more slashes. These follow the scheme if it exists. 1053 // * The input scheme exists and is different from the base URL's scheme. 1054 // 1055 // The structure of an authority is: 1056 // username:password@host:port 1057 // 1058 // Username, password and port are optional. 1059 // [username[:password]@]host[:port] 1060 pos += slashCount; 1061 authority: 1062 while (true) { 1063 int componentDelimiterOffset = delimiterOffset(input, pos, limit, "@/\\?#"); 1064 int c = componentDelimiterOffset != limit 1065 ? input.charAt(componentDelimiterOffset) 1066 : -1; 1067 switch (c) { 1068 case '@': 1069 // User info precedes. 1070 if (!hasPassword) { 1071 int passwordColonOffset = delimiterOffset( 1072 input, pos, componentDelimiterOffset, ":"); 1073 String canonicalUsername = canonicalize( 1074 input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false, false, true); 1075 this.encodedUsername = hasUsername 1076 ? this.encodedUsername + "%40" + canonicalUsername 1077 : canonicalUsername; 1078 if (passwordColonOffset != componentDelimiterOffset) { 1079 hasPassword = true; 1080 this.encodedPassword = canonicalize(input, passwordColonOffset + 1, 1081 componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true); 1082 } 1083 hasUsername = true; 1084 } else { 1085 this.encodedPassword = this.encodedPassword + "%40" + canonicalize(input, pos, 1086 componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true); 1087 } 1088 pos = componentDelimiterOffset + 1; 1089 break; 1090 1091 case -1: 1092 case '/': 1093 case '\\': 1094 case '?': 1095 case '#': 1096 // Host info precedes. 1097 int portColonOffset = portColonOffset(input, pos, componentDelimiterOffset); 1098 if (portColonOffset + 1 < componentDelimiterOffset) { 1099 this.host = canonicalizeHost(input, pos, portColonOffset); 1100 this.port = parsePort(input, portColonOffset + 1, componentDelimiterOffset); 1101 if (this.port == -1) return ParseResult.INVALID_PORT; // Invalid port. 1102 } else { 1103 this.host = canonicalizeHost(input, pos, portColonOffset); 1104 this.port = defaultPort(this.scheme); 1105 } 1106 if (this.host == null) return ParseResult.INVALID_HOST; // Invalid host. 1107 pos = componentDelimiterOffset; 1108 break authority; 1109 } 1110 } 1111 } else { 1112 // This is a relative link. Copy over all authority components. Also maybe the path & query. 1113 this.encodedUsername = base.encodedUsername(); 1114 this.encodedPassword = base.encodedPassword(); 1115 this.host = base.host; 1116 this.port = base.port; 1117 this.encodedPathSegments.clear(); 1118 this.encodedPathSegments.addAll(base.encodedPathSegments()); 1119 if (pos == limit || input.charAt(pos) == '#') { 1120 encodedQuery(base.encodedQuery()); 1121 } 1122 } 1123 1124 // Resolve the relative path. 1125 int pathDelimiterOffset = delimiterOffset(input, pos, limit, "?#"); 1126 resolvePath(input, pos, pathDelimiterOffset); 1127 pos = pathDelimiterOffset; 1128 1129 // Query. 1130 if (pos < limit && input.charAt(pos) == '?') { 1131 int queryDelimiterOffset = delimiterOffset(input, pos, limit, "#"); 1132 this.encodedQueryNamesAndValues = queryStringToNamesAndValues(canonicalize( 1133 input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, false, true, true)); 1134 pos = queryDelimiterOffset; 1135 } 1136 1137 // Fragment. 1138 if (pos < limit && input.charAt(pos) == '#') { 1139 this.encodedFragment = canonicalize( 1140 input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false, false); 1141 } 1142 1143 return ParseResult.SUCCESS; 1144 } 1145 resolvePath(String input, int pos, int limit)1146 private void resolvePath(String input, int pos, int limit) { 1147 // Read a delimiter. 1148 if (pos == limit) { 1149 // Empty path: keep the base path as-is. 1150 return; 1151 } 1152 char c = input.charAt(pos); 1153 if (c == '/' || c == '\\') { 1154 // Absolute path: reset to the default "/". 1155 encodedPathSegments.clear(); 1156 encodedPathSegments.add(""); 1157 pos++; 1158 } else { 1159 // ANDROID-BEGIN: http://b/29983827 1160 // // Relative path: clear everything after the last '/'. 1161 // encodedPathSegments.set(encodedPathSegments.size() - 1, ""); 1162 // Relative path: clear everything after the last '/' (if there is one). 1163 if (!encodedPathSegments.isEmpty()) { 1164 encodedPathSegments.set(encodedPathSegments.size() - 1, ""); 1165 } 1166 // ANDROID-END: http://b/29983827 1167 } 1168 1169 // Read path segments. 1170 for (int i = pos; i < limit; ) { 1171 int pathSegmentDelimiterOffset = delimiterOffset(input, i, limit, "/\\"); 1172 boolean segmentHasTrailingSlash = pathSegmentDelimiterOffset < limit; 1173 push(input, i, pathSegmentDelimiterOffset, segmentHasTrailingSlash, true); 1174 i = pathSegmentDelimiterOffset; 1175 if (segmentHasTrailingSlash) i++; 1176 } 1177 } 1178 1179 /** Adds a path segment. If the input is ".." or equivalent, this pops a path segment. */ 1180 private void push(String input, int pos, int limit, boolean addTrailingSlash, 1181 boolean alreadyEncoded) { 1182 String segment = canonicalize( 1183 input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false, false, true); 1184 if (isDot(segment)) { 1185 return; // Skip '.' path segments. 1186 } 1187 if (isDotDot(segment)) { 1188 pop(); 1189 return; 1190 } 1191 1192 // ANDROID-BEGIN: http://b/29983827 1193 // If the encodedPathSegments doesn't even include "/" then add the leading "/" before 1194 // pushing more segments or modifying existing segments. 1195 if (encodedPathSegments.isEmpty()) { 1196 encodedPathSegments.add(""); 1197 } 1198 // ANDROID-END: http://b/29983827 1199 1200 if (encodedPathSegments.get(encodedPathSegments.size() - 1).isEmpty()) { 1201 encodedPathSegments.set(encodedPathSegments.size() - 1, segment); 1202 } else { 1203 encodedPathSegments.add(segment); 1204 } 1205 if (addTrailingSlash) { 1206 encodedPathSegments.add(""); 1207 } 1208 } 1209 1210 private boolean isDot(String input) { 1211 return input.equals(".") || input.equalsIgnoreCase("%2e"); 1212 } 1213 1214 private boolean isDotDot(String input) { 1215 return input.equals("..") 1216 || input.equalsIgnoreCase("%2e.") 1217 || input.equalsIgnoreCase(".%2e") 1218 || input.equalsIgnoreCase("%2e%2e"); 1219 } 1220 1221 /** 1222 * Removes a path segment. When this method returns the last segment is always "", which means 1223 * the encoded path will have a trailing '/'. 1224 * 1225 * <p>Popping "/a/b/c/" yields "/a/b/". In this case the list of path segments goes from 1226 * ["a", "b", "c", ""] to ["a", "b", ""]. 1227 * 1228 * <p>Popping "/a/b/c" also yields "/a/b/". The list of path segments goes from ["a", "b", "c"] 1229 * to ["a", "b", ""]. 1230 */ 1231 private void pop() { 1232 // ANDROID-BEGIN: http://b/29983827 1233 // Cannot pop() if there isn't even a "/". Leave the path as is. This method is only used 1234 // from push(). push() handles the empty case explicitly. 1235 if (encodedPathSegments.isEmpty()) { 1236 return; 1237 } 1238 // ANDROID-END: http://b/29983827 1239 1240 String removed = encodedPathSegments.remove(encodedPathSegments.size() - 1); 1241 1242 // Make sure the path ends with a '/' by either adding an empty string or clearing a segment. 1243 if (removed.isEmpty() && !encodedPathSegments.isEmpty()) { 1244 encodedPathSegments.set(encodedPathSegments.size() - 1, ""); 1245 } else { 1246 encodedPathSegments.add(""); 1247 } 1248 } 1249 1250 /** 1251 * Increments {@code pos} until {@code input[pos]} is not ASCII whitespace. Stops at {@code 1252 * limit}. 1253 */ 1254 private int skipLeadingAsciiWhitespace(String input, int pos, int limit) { 1255 for (int i = pos; i < limit; i++) { 1256 switch (input.charAt(i)) { 1257 case '\t': 1258 case '\n': 1259 case '\f': 1260 case '\r': 1261 case ' ': 1262 continue; 1263 default: 1264 return i; 1265 } 1266 } 1267 return limit; 1268 } 1269 1270 /** 1271 * Decrements {@code limit} until {@code input[limit - 1]} is not ASCII whitespace. Stops at 1272 * {@code pos}. 1273 */ 1274 private int skipTrailingAsciiWhitespace(String input, int pos, int limit) { 1275 for (int i = limit - 1; i >= pos; i--) { 1276 switch (input.charAt(i)) { 1277 case '\t': 1278 case '\n': 1279 case '\f': 1280 case '\r': 1281 case ' ': 1282 continue; 1283 default: 1284 return i + 1; 1285 } 1286 } 1287 return pos; 1288 } 1289 1290 /** 1291 * Returns the index of the ':' in {@code input} that is after scheme characters. Returns -1 if 1292 * {@code input} does not have a scheme that starts at {@code pos}. 1293 */ 1294 private static int schemeDelimiterOffset(String input, int pos, int limit) { 1295 if (limit - pos < 2) return -1; 1296 1297 char c0 = input.charAt(pos); 1298 if ((c0 < 'a' || c0 > 'z') && (c0 < 'A' || c0 > 'Z')) return -1; // Not a scheme start char. 1299 1300 for (int i = pos + 1; i < limit; i++) { 1301 char c = input.charAt(i); 1302 1303 if ((c >= 'a' && c <= 'z') 1304 || (c >= 'A' && c <= 'Z') 1305 || (c >= '0' && c <= '9') 1306 || c == '+' 1307 || c == '-' 1308 || c == '.') { 1309 continue; // Scheme character. Keep going. 1310 } else if (c == ':') { 1311 return i; // Scheme prefix! 1312 } else { 1313 return -1; // Non-scheme character before the first ':'. 1314 } 1315 } 1316 1317 return -1; // No ':'; doesn't start with a scheme. 1318 } 1319 1320 /** Returns the number of '/' and '\' slashes in {@code input}, starting at {@code pos}. */ 1321 private static int slashCount(String input, int pos, int limit) { 1322 int slashCount = 0; 1323 while (pos < limit) { 1324 char c = input.charAt(pos); 1325 if (c == '\\' || c == '/') { 1326 slashCount++; 1327 pos++; 1328 } else { 1329 break; 1330 } 1331 } 1332 return slashCount; 1333 } 1334 1335 /** Finds the first ':' in {@code input}, skipping characters between square braces "[...]". */ 1336 private static int portColonOffset(String input, int pos, int limit) { 1337 for (int i = pos; i < limit; i++) { 1338 switch (input.charAt(i)) { 1339 case '[': 1340 while (++i < limit) { 1341 if (input.charAt(i) == ']') break; 1342 } 1343 break; 1344 case ':': 1345 return i; 1346 } 1347 } 1348 return limit; // No colon. 1349 } 1350 1351 private static String canonicalizeHost(String input, int pos, int limit) { 1352 // Start by percent decoding the host. The WHATWG spec suggests doing this only after we've 1353 // checked for IPv6 square braces. But Chrome does it first, and that's more lenient. 1354 String percentDecoded = percentDecode(input, pos, limit, false); 1355 1356 // If the input is encased in square braces "[...]", drop 'em. We have an IPv6 address. 1357 if (percentDecoded.startsWith("[") && percentDecoded.endsWith("]")) { 1358 InetAddress inetAddress = decodeIpv6(percentDecoded, 1, percentDecoded.length() - 1); 1359 if (inetAddress == null) return null; 1360 byte[] address = inetAddress.getAddress(); 1361 if (address.length == 16) return inet6AddressToAscii(address); 1362 throw new AssertionError(); 1363 } 1364 1365 return domainToAscii(percentDecoded); 1366 } 1367 1368 /** Decodes an IPv6 address like 1111:2222:3333:4444:5555:6666:7777:8888 or ::1. */ 1369 private static InetAddress decodeIpv6(String input, int pos, int limit) { 1370 byte[] address = new byte[16]; 1371 int b = 0; 1372 int compress = -1; 1373 int groupOffset = -1; 1374 1375 for (int i = pos; i < limit; ) { 1376 if (b == address.length) return null; // Too many groups. 1377 1378 // Read a delimiter. 1379 if (i + 2 <= limit && input.regionMatches(i, "::", 0, 2)) { 1380 // Compression "::" delimiter, which is anywhere in the input, including its prefix. 1381 if (compress != -1) return null; // Multiple "::" delimiters. 1382 i += 2; 1383 b += 2; 1384 compress = b; 1385 if (i == limit) break; 1386 } else if (b != 0) { 1387 // Group separator ":" delimiter. 1388 if (input.regionMatches(i, ":", 0, 1)) { 1389 i++; 1390 } else if (input.regionMatches(i, ".", 0, 1)) { 1391 // If we see a '.', rewind to the beginning of the previous group and parse as IPv4. 1392 if (!decodeIpv4Suffix(input, groupOffset, limit, address, b - 2)) return null; 1393 b += 2; // We rewound two bytes and then added four. 1394 break; 1395 } else { 1396 return null; // Wrong delimiter. 1397 } 1398 } 1399 1400 // Read a group, one to four hex digits. 1401 int value = 0; 1402 groupOffset = i; 1403 for (; i < limit; i++) { 1404 char c = input.charAt(i); 1405 int hexDigit = decodeHexDigit(c); 1406 if (hexDigit == -1) break; 1407 value = (value << 4) + hexDigit; 1408 } 1409 int groupLength = i - groupOffset; 1410 if (groupLength == 0 || groupLength > 4) return null; // Group is the wrong size. 1411 1412 // We've successfully read a group. Assign its value to our byte array. 1413 address[b++] = (byte) ((value >>> 8) & 0xff); 1414 address[b++] = (byte) (value & 0xff); 1415 } 1416 1417 // All done. If compression happened, we need to move bytes to the right place in the 1418 // address. Here's a sample: 1419 // 1420 // input: "1111:2222:3333::7777:8888" 1421 // before: { 11, 11, 22, 22, 33, 33, 00, 00, 77, 77, 88, 88, 00, 00, 00, 00 } 1422 // compress: 6 1423 // b: 10 1424 // after: { 11, 11, 22, 22, 33, 33, 00, 00, 00, 00, 00, 00, 77, 77, 88, 88 } 1425 // 1426 if (b != address.length) { 1427 if (compress == -1) return null; // Address didn't have compression or enough groups. 1428 System.arraycopy(address, compress, address, address.length - (b - compress), b - compress); 1429 Arrays.fill(address, compress, compress + (address.length - b), (byte) 0); 1430 } 1431 1432 try { 1433 return InetAddress.getByAddress(address); 1434 } catch (UnknownHostException e) { 1435 throw new AssertionError(); 1436 } 1437 } 1438 1439 /** Decodes an IPv4 address suffix of an IPv6 address, like 1111::5555:6666:192.168.0.1. */ 1440 private static boolean decodeIpv4Suffix( 1441 String input, int pos, int limit, byte[] address, int addressOffset) { 1442 int b = addressOffset; 1443 1444 for (int i = pos; i < limit; ) { 1445 if (b == address.length) return false; // Too many groups. 1446 1447 // Read a delimiter. 1448 if (b != addressOffset) { 1449 if (input.charAt(i) != '.') return false; // Wrong delimiter. 1450 i++; 1451 } 1452 1453 // Read 1 or more decimal digits for a value in 0..255. 1454 int value = 0; 1455 int groupOffset = i; 1456 for (; i < limit; i++) { 1457 char c = input.charAt(i); 1458 if (c < '0' || c > '9') break; 1459 if (value == 0 && groupOffset != i) return false; // Reject unnecessary leading '0's. 1460 value = (value * 10) + c - '0'; 1461 if (value > 255) return false; // Value out of range. 1462 } 1463 int groupLength = i - groupOffset; 1464 if (groupLength == 0) return false; // No digits. 1465 1466 // We've successfully read a byte. 1467 address[b++] = (byte) value; 1468 } 1469 1470 if (b != addressOffset + 4) return false; // Too few groups. We wanted exactly four. 1471 return true; // Success. 1472 } 1473 1474 /** 1475 * Performs IDN ToASCII encoding and canonicalize the result to lowercase. e.g. This converts 1476 * {@code ☃.net} to {@code xn--n3h.net}, and {@code WwW.GoOgLe.cOm} to {@code www.google.com}. 1477 * {@code null} will be returned if the input cannot be ToASCII encoded or if the result 1478 * contains unsupported ASCII characters. 1479 */ 1480 private static String domainToAscii(String input) { 1481 try { 1482 String result = IDN.toASCII(input).toLowerCase(Locale.US); 1483 if (result.isEmpty()) return null; 1484 1485 // Confirm that the IDN ToASCII result doesn't contain any illegal characters. 1486 if (containsInvalidHostnameAsciiCodes(result)) { 1487 return null; 1488 } 1489 // TODO: implement all label limits. 1490 return result; 1491 } catch (IllegalArgumentException e) { 1492 return null; 1493 } 1494 } 1495 1496 private static boolean containsInvalidHostnameAsciiCodes(String hostnameAscii) { 1497 for (int i = 0; i < hostnameAscii.length(); i++) { 1498 char c = hostnameAscii.charAt(i); 1499 // The WHATWG Host parsing rules accepts some character codes which are invalid by 1500 // definition for OkHttp's host header checks (and the WHATWG Host syntax definition). Here 1501 // we rule out characters that would cause problems in host headers. 1502 if (c <= '\u001f' || c >= '\u007f') { 1503 return true; 1504 } 1505 // Check for the characters mentioned in the WHATWG Host parsing spec: 1506 // U+0000, U+0009, U+000A, U+000D, U+0020, "#", "%", "/", ":", "?", "@", "[", "\", and "]" 1507 // (excluding the characters covered above). 1508 if (" #%/:?@[\\]".indexOf(c) != -1) { 1509 return true; 1510 } 1511 } 1512 return false; 1513 } 1514 1515 private static String inet6AddressToAscii(byte[] address) { 1516 // Go through the address looking for the longest run of 0s. Each group is 2-bytes. 1517 int longestRunOffset = -1; 1518 int longestRunLength = 0; 1519 for (int i = 0; i < address.length; i += 2) { 1520 int currentRunOffset = i; 1521 while (i < 16 && address[i] == 0 && address[i + 1] == 0) { 1522 i += 2; 1523 } 1524 int currentRunLength = i - currentRunOffset; 1525 if (currentRunLength > longestRunLength) { 1526 longestRunOffset = currentRunOffset; 1527 longestRunLength = currentRunLength; 1528 } 1529 } 1530 1531 // Emit each 2-byte group in hex, separated by ':'. The longest run of zeroes is "::". 1532 Buffer result = new Buffer(); 1533 for (int i = 0; i < address.length; ) { 1534 if (i == longestRunOffset) { 1535 result.writeByte(':'); 1536 i += longestRunLength; 1537 if (i == 16) result.writeByte(':'); 1538 } else { 1539 if (i > 0) result.writeByte(':'); 1540 int group = (address[i] & 0xff) << 8 | address[i + 1] & 0xff; 1541 result.writeHexadecimalUnsignedLong(group); 1542 i += 2; 1543 } 1544 } 1545 return result.readUtf8(); 1546 } 1547 1548 private static int parsePort(String input, int pos, int limit) { 1549 try { 1550 // Canonicalize the port string to skip '\n' etc. 1551 String portString = canonicalize(input, pos, limit, "", false, false, false, true); 1552 int i = Integer.parseInt(portString); 1553 if (i > 0 && i <= 65535) return i; 1554 return -1; 1555 } catch (NumberFormatException e) { 1556 return -1; // Invalid port. 1557 } 1558 } 1559 } 1560 1561 /** 1562 * Returns the index of the first character in {@code input} that contains a character in {@code 1563 * delimiters}. Returns limit if there is no such character. 1564 */ 1565 private static int delimiterOffset(String input, int pos, int limit, String delimiters) { 1566 for (int i = pos; i < limit; i++) { 1567 if (delimiters.indexOf(input.charAt(i)) != -1) return i; 1568 } 1569 return limit; 1570 } 1571 1572 static String percentDecode(String encoded, boolean plusIsSpace) { 1573 return percentDecode(encoded, 0, encoded.length(), plusIsSpace); 1574 } 1575 1576 private List<String> percentDecode(List<String> list, boolean plusIsSpace) { 1577 List<String> result = new ArrayList<>(list.size()); 1578 for (String s : list) { 1579 result.add(s != null ? percentDecode(s, plusIsSpace) : null); 1580 } 1581 return Collections.unmodifiableList(result); 1582 } 1583 1584 static String percentDecode(String encoded, int pos, int limit, boolean plusIsSpace) { 1585 for (int i = pos; i < limit; i++) { 1586 char c = encoded.charAt(i); 1587 if (c == '%' || (c == '+' && plusIsSpace)) { 1588 // Slow path: the character at i requires decoding! 1589 Buffer out = new Buffer(); 1590 out.writeUtf8(encoded, pos, i); 1591 percentDecode(out, encoded, i, limit, plusIsSpace); 1592 return out.readUtf8(); 1593 } 1594 } 1595 1596 // Fast path: no characters in [pos..limit) required decoding. 1597 return encoded.substring(pos, limit); 1598 } 1599 1600 static void percentDecode(Buffer out, String encoded, int pos, int limit, boolean plusIsSpace) { 1601 int codePoint; 1602 for (int i = pos; i < limit; i += Character.charCount(codePoint)) { 1603 codePoint = encoded.codePointAt(i); 1604 if (codePoint == '%' && i + 2 < limit) { 1605 int d1 = decodeHexDigit(encoded.charAt(i + 1)); 1606 int d2 = decodeHexDigit(encoded.charAt(i + 2)); 1607 if (d1 != -1 && d2 != -1) { 1608 out.writeByte((d1 << 4) + d2); 1609 i += 2; 1610 continue; 1611 } 1612 } else if (codePoint == '+' && plusIsSpace) { 1613 out.writeByte(' '); 1614 continue; 1615 } 1616 out.writeUtf8CodePoint(codePoint); 1617 } 1618 } 1619 1620 static boolean percentEncoded(String encoded, int pos, int limit) { 1621 return pos + 2 < limit 1622 && encoded.charAt(pos) == '%' 1623 && decodeHexDigit(encoded.charAt(pos + 1)) != -1 1624 && decodeHexDigit(encoded.charAt(pos + 2)) != -1; 1625 } 1626 1627 static int decodeHexDigit(char c) { 1628 if (c >= '0' && c <= '9') return c - '0'; 1629 if (c >= 'a' && c <= 'f') return c - 'a' + 10; 1630 if (c >= 'A' && c <= 'F') return c - 'A' + 10; 1631 return -1; 1632 } 1633 1634 /** 1635 * Returns a substring of {@code input} on the range {@code [pos..limit)} with the following 1636 * transformations: 1637 * <ul> 1638 * <li>Tabs, newlines, form feeds and carriage returns are skipped. 1639 * <li>In queries, ' ' is encoded to '+' and '+' is encoded to "%2B". 1640 * <li>Characters in {@code encodeSet} are percent-encoded. 1641 * <li>Control characters and non-ASCII characters are percent-encoded. 1642 * <li>All other characters are copied without transformation. 1643 * </ul> 1644 * 1645 * @param alreadyEncoded true to leave '%' as-is; false to convert it to '%25'. 1646 * @param strict true to encode '%' if it is not the prefix of a valid percent encoding. 1647 * @param plusIsSpace true to encode '+' as "%2B" if it is not already encoded 1648 * @param asciiOnly true to encode all non-ASCII codepoints. 1649 */ 1650 static String canonicalize(String input, int pos, int limit, String encodeSet, 1651 boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) { 1652 int codePoint; 1653 for (int i = pos; i < limit; i += Character.charCount(codePoint)) { 1654 codePoint = input.codePointAt(i); 1655 if (codePoint < 0x20 1656 || codePoint == 0x7f 1657 || codePoint >= 0x80 && asciiOnly 1658 || encodeSet.indexOf(codePoint) != -1 1659 || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit)) 1660 || codePoint == '+' && plusIsSpace) { 1661 // Slow path: the character at i requires encoding! 1662 Buffer out = new Buffer(); 1663 out.writeUtf8(input, pos, i); 1664 canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, strict, plusIsSpace, 1665 asciiOnly); 1666 return out.readUtf8(); 1667 } 1668 } 1669 1670 // Fast path: no characters in [pos..limit) required encoding. 1671 return input.substring(pos, limit); 1672 } 1673 1674 static void canonicalize(Buffer out, String input, int pos, int limit, String encodeSet, 1675 boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) { 1676 Buffer utf8Buffer = null; // Lazily allocated. 1677 int codePoint; 1678 for (int i = pos; i < limit; i += Character.charCount(codePoint)) { 1679 codePoint = input.codePointAt(i); 1680 if (alreadyEncoded 1681 && (codePoint == '\t' || codePoint == '\n' || codePoint == '\f' || codePoint == '\r')) { 1682 // Skip this character. 1683 } else if (codePoint == '+' && plusIsSpace) { 1684 // Encode '+' as '%2B' since we permit ' ' to be encoded as either '+' or '%20'. 1685 out.writeUtf8(alreadyEncoded ? "+" : "%2B"); 1686 } else if (codePoint < 0x20 1687 || codePoint == 0x7f 1688 || codePoint >= 0x80 && asciiOnly 1689 || encodeSet.indexOf(codePoint) != -1 1690 || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))) { 1691 // Percent encode this character. 1692 if (utf8Buffer == null) { 1693 utf8Buffer = new Buffer(); 1694 } 1695 utf8Buffer.writeUtf8CodePoint(codePoint); 1696 while (!utf8Buffer.exhausted()) { 1697 int b = utf8Buffer.readByte() & 0xff; 1698 out.writeByte('%'); 1699 out.writeByte(HEX_DIGITS[(b >> 4) & 0xf]); 1700 out.writeByte(HEX_DIGITS[b & 0xf]); 1701 } 1702 } else { 1703 // This character doesn't need encoding. Just copy it over. 1704 out.writeUtf8CodePoint(codePoint); 1705 } 1706 } 1707 } 1708 1709 static String canonicalize(String input, String encodeSet, boolean alreadyEncoded, boolean strict, 1710 boolean plusIsSpace, boolean asciiOnly) { 1711 return canonicalize( 1712 input, 0, input.length(), encodeSet, alreadyEncoded, strict, plusIsSpace, asciiOnly); 1713 } 1714 } 1715