1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package java.net; 19 20 import java.io.IOException; 21 import java.io.ObjectInputStream; 22 import java.io.ObjectOutputStream; 23 import java.io.Serializable; 24 import java.util.Locale; 25 import libcore.net.UriCodec; 26 import libcore.net.url.UrlUtils; 27 28 /** 29 * A Uniform Resource Identifier that identifies an abstract or physical 30 * resource, as specified by <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 31 * 2396</a>. 32 * 33 * <h3>Parts of a URI</h3> 34 * A URI is composed of many parts. This class can both parse URI strings into 35 * parts and compose URI strings from parts. For example, consider the parts of 36 * this URI: 37 * {@code http://username:password@host:8080/directory/file?query#fragment} 38 * <table> 39 * <tr><th>Component </th><th>Example value </th><th>Also known as</th></tr> 40 * <tr><td>{@link #getScheme() Scheme} </td><td>{@code http} </td><td>protocol</td></tr> 41 * <tr><td>{@link #getSchemeSpecificPart() Scheme-specific part}</td><td>{@code //username:password@host:8080/directory/file?query#fragment}</td><td></td></tr> 42 * <tr><td>{@link #getAuthority() Authority} </td><td>{@code username:password@host:8080} </td><td></td></tr> 43 * <tr><td>{@link #getUserInfo() User Info} </td><td>{@code username:password} </td><td></td></tr> 44 * <tr><td>{@link #getHost() Host} </td><td>{@code host} </td><td></td></tr> 45 * <tr><td>{@link #getPort() Port} </td><td>{@code 8080} </td><td></td></tr> 46 * <tr><td>{@link #getPath() Path} </td><td>{@code /directory/file} </td><td></td></tr> 47 * <tr><td>{@link #getQuery() Query} </td><td>{@code query} </td><td></td></tr> 48 * <tr><td>{@link #getFragment() Fragment} </td><td>{@code fragment} </td><td>ref</td></tr> 49 * </table> 50 * 51 * <h3>Absolute vs. Relative URIs</h3> 52 * URIs are either {@link #isAbsolute() absolute or relative}. 53 * <ul> 54 * <li><strong>Absolute:</strong> {@code http://android.com/robots.txt} 55 * <li><strong>Relative:</strong> {@code robots.txt} 56 * </ul> 57 * 58 * <p>Absolute URIs always have a scheme. If its scheme is supported by {@link 59 * URL}, you can use {@link #toURL} to convert an absolute URI to a URL. 60 * 61 * <p>Relative URIs do not have a scheme and cannot be converted to URLs. If you 62 * have the absolute URI that a relative URI is relative to, you can use {@link 63 * #resolve} to compute the referenced absolute URI. Symmetrically, you can use 64 * {@link #relativize} to compute the relative URI from one URI to another. 65 * <pre> {@code 66 * URI absolute = new URI("http://android.com/"); 67 * URI relative = new URI("robots.txt"); 68 * URI resolved = new URI("http://android.com/robots.txt"); 69 * 70 * // print "http://android.com/robots.txt" 71 * System.out.println(absolute.resolve(relative)); 72 * 73 * // print "robots.txt" 74 * System.out.println(absolute.relativize(resolved)); 75 * }</pre> 76 * 77 * <h3>Opaque vs. Hierarchical URIs</h3> 78 * Absolute URIs are either {@link #isOpaque() opaque or hierarchical}. Relative 79 * URIs are always hierarchical. 80 * <ul> 81 * <li><strong>Hierarchical:</strong> {@code http://android.com/robots.txt} 82 * <li><strong>Opaque:</strong> {@code mailto:robots@example.com} 83 * </ul> 84 * 85 * <p>Opaque URIs have both a scheme and a scheme-specific part that does not 86 * begin with the slash character: {@code /}. The contents of the 87 * scheme-specific part of an opaque URI is not parsed so an opaque URI never 88 * has an authority, user info, host, port, path or query. An opaque URIs may 89 * have a fragment, however. A typical opaque URI is 90 * {@code mailto:robots@example.com}. 91 * <table> 92 * <tr><th>Component </th><th>Example value </th></tr> 93 * <tr><td>Scheme </td><td>{@code mailto} </td></tr> 94 * <tr><td>Scheme-specific part</td><td>{@code robots@example.com}</td></tr> 95 * <tr><td>Fragment </td><td> </td></tr> 96 * </table> 97 * <p>Hierarchical URIs may have values for any URL component. They always 98 * have a non-null path, though that path may be the empty string. 99 * 100 * <h3>Encoding and Decoding URI Components</h3> 101 * Each component of a URI permits a limited set of legal characters. Other 102 * characters must first be <i>encoded</i> before they can be embedded in a URI. 103 * To recover the original characters from a URI, they may be <i>decoded</i>. 104 * <strong>Contrary to what you might expect,</strong> this class uses the 105 * term <i>raw</i> to refer to encoded strings. The non-<i>raw</i> accessors 106 * return decoded strings. For example, consider how this URI is decoded: 107 * {@code http://user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots#over%206%22} 108 * <table> 109 * <tr><th>Component </th><th>Legal Characters </th><th>Other Constraints </th><th>Raw Value </th><th>Value</th></tr> 110 * <tr><td>Scheme </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code +-.} </td><td>First character must be in {@code a-z}, {@code A-Z}</td><td> </td><td>{@code http}</td></tr> 111 * <tr><td>Scheme-specific part</td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay </td><td>{@code //user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots}</td><td>{@code //user:pa55w?rd@host:80/doc|search?q=green robots}</td></tr> 112 * <tr><td>Authority </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=@[]} </td><td>Non-ASCII characters okay </td><td>{@code user:pa55w%3Frd@host:80} </td><td>{@code user:pa55w?rd@host:80}</td></tr> 113 * <tr><td>User Info </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=} </td><td>Non-ASCII characters okay </td><td>{@code user:pa55w%3Frd} </td><td>{@code user:pa55w?rd}</td></tr> 114 * <tr><td>Host </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code -.[]} </td><td>Domain name, IPv4 address or [IPv6 address] </td><td> </td><td>host</td></tr> 115 * <tr><td>Port </td><td>{@code 0-9} </td><td> </td><td> </td><td>{@code 80}</td></tr> 116 * <tr><td>Path </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=/@} </td><td>Non-ASCII characters okay </td><td>{@code /doc%7Csearch} </td><td>{@code /doc|search}</td></tr> 117 * <tr><td>Query </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay </td><td>{@code q=green%20robots} </td><td>{@code q=green robots}</td></tr> 118 * <tr><td>Fragment </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay </td><td>{@code over%206%22} </td><td>{@code over 6"}</td></tr> 119 * </table> 120 * A URI's host, port and scheme are not eligible for encoding and must not 121 * contain illegal characters. 122 * 123 * <p>To encode a URI, invoke any of the multiple-parameter constructors of this 124 * class. These constructors accept your original strings and encode them into 125 * their raw form. 126 * 127 * <p>To decode a URI, invoke the single-string constructor, and then use the 128 * appropriate accessor methods to get the decoded components. 129 * 130 * <p>The {@link URL} class can be used to retrieve resources by their URI. 131 */ 132 public final class URI implements Comparable<URI>, Serializable { 133 134 private static final long serialVersionUID = -6052424284110960213l; 135 136 static final String UNRESERVED = "_-!.~\'()*"; 137 static final String PUNCTUATION = ",;:$&+="; 138 139 static final UriCodec USER_INFO_ENCODER = new PartEncoder(""); 140 static final UriCodec PATH_ENCODER = new PartEncoder("/@"); 141 static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]"); 142 143 /** for java.net.URL, which foolishly combines these two parts */ 144 static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?"); 145 146 /** for query, fragment, and scheme-specific part */ 147 static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@"); 148 149 /** Retains all ASCII chars including delimiters. */ 150 private static final UriCodec ASCII_ONLY = new UriCodec() { 151 @Override protected boolean isRetained(char c) { 152 return c <= 127; 153 } 154 }; 155 156 /** 157 * Encodes the unescaped characters of {@code s} that are not permitted. 158 * Permitted characters are: 159 * <ul> 160 * <li>Unreserved characters in <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>. 161 * <li>{@code extraOkayChars}, 162 * <li>non-ASCII, non-control, non-whitespace characters 163 * </ul> 164 */ 165 private static class PartEncoder extends UriCodec { 166 private final String extraLegalCharacters; 167 PartEncoder(String extraLegalCharacters)168 PartEncoder(String extraLegalCharacters) { 169 this.extraLegalCharacters = extraLegalCharacters; 170 } 171 isRetained(char c)172 @Override protected boolean isRetained(char c) { 173 return UNRESERVED.indexOf(c) != -1 174 || PUNCTUATION.indexOf(c) != -1 175 || extraLegalCharacters.indexOf(c) != -1 176 || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c)); 177 } 178 } 179 180 private String string; 181 private transient String scheme; 182 private transient String schemeSpecificPart; 183 private transient String authority; 184 private transient String userInfo; 185 private transient String host; 186 private transient int port = -1; 187 private transient String path; 188 private transient String query; 189 private transient String fragment; 190 private transient boolean opaque; 191 private transient boolean absolute; 192 private transient boolean serverAuthority = false; 193 194 private transient int hash = -1; 195 URI()196 private URI() {} 197 198 /** 199 * Creates a new URI instance by parsing {@code spec}. 200 * 201 * @param spec a URI whose illegal characters have all been encoded. 202 */ URI(String spec)203 public URI(String spec) throws URISyntaxException { 204 parseURI(spec, false); 205 } 206 207 /** 208 * Creates a new URI instance of the given unencoded component parts. 209 * 210 * @param scheme the URI scheme, or null for a non-absolute URI. 211 */ URI(String scheme, String schemeSpecificPart, String fragment)212 public URI(String scheme, String schemeSpecificPart, String fragment) 213 throws URISyntaxException { 214 StringBuilder uri = new StringBuilder(); 215 if (scheme != null) { 216 uri.append(scheme); 217 uri.append(':'); 218 } 219 if (schemeSpecificPart != null) { 220 ALL_LEGAL_ENCODER.appendEncoded(uri, schemeSpecificPart); 221 } 222 if (fragment != null) { 223 uri.append('#'); 224 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 225 } 226 227 parseURI(uri.toString(), false); 228 } 229 230 /** 231 * Creates a new URI instance of the given unencoded component parts. 232 * 233 * @param scheme the URI scheme, or null for a non-absolute URI. 234 */ URI(String scheme, String userInfo, String host, int port, String path, String query, String fragment)235 public URI(String scheme, String userInfo, String host, int port, String path, String query, 236 String fragment) throws URISyntaxException { 237 if (scheme == null && userInfo == null && host == null && path == null 238 && query == null && fragment == null) { 239 this.path = ""; 240 return; 241 } 242 243 if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') { 244 throw new URISyntaxException(path, "Relative path"); 245 } 246 247 StringBuilder uri = new StringBuilder(); 248 if (scheme != null) { 249 uri.append(scheme); 250 uri.append(':'); 251 } 252 253 if (userInfo != null || host != null || port != -1) { 254 uri.append("//"); 255 } 256 257 if (userInfo != null) { 258 USER_INFO_ENCODER.appendEncoded(uri, userInfo); 259 uri.append('@'); 260 } 261 262 if (host != null) { 263 // check for IPv6 addresses that hasn't been enclosed in square brackets 264 if (host.indexOf(':') != -1 && host.indexOf(']') == -1 && host.indexOf('[') == -1) { 265 host = "[" + host + "]"; 266 } 267 uri.append(host); 268 } 269 270 if (port != -1) { 271 uri.append(':'); 272 uri.append(port); 273 } 274 275 if (path != null) { 276 PATH_ENCODER.appendEncoded(uri, path); 277 } 278 279 if (query != null) { 280 uri.append('?'); 281 ALL_LEGAL_ENCODER.appendEncoded(uri, query); 282 } 283 284 if (fragment != null) { 285 uri.append('#'); 286 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 287 } 288 289 parseURI(uri.toString(), true); 290 } 291 292 /** 293 * Creates a new URI instance of the given unencoded component parts. 294 * 295 * @param scheme the URI scheme, or null for a non-absolute URI. 296 */ URI(String scheme, String host, String path, String fragment)297 public URI(String scheme, String host, String path, String fragment) throws URISyntaxException { 298 this(scheme, null, host, -1, path, null, fragment); 299 } 300 301 /** 302 * Creates a new URI instance of the given unencoded component parts. 303 * 304 * @param scheme the URI scheme, or null for a non-absolute URI. 305 */ URI(String scheme, String authority, String path, String query, String fragment)306 public URI(String scheme, String authority, String path, String query, 307 String fragment) throws URISyntaxException { 308 if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') { 309 throw new URISyntaxException(path, "Relative path"); 310 } 311 312 StringBuilder uri = new StringBuilder(); 313 if (scheme != null) { 314 uri.append(scheme); 315 uri.append(':'); 316 } 317 if (authority != null) { 318 uri.append("//"); 319 AUTHORITY_ENCODER.appendEncoded(uri, authority); 320 } 321 322 if (path != null) { 323 PATH_ENCODER.appendEncoded(uri, path); 324 } 325 if (query != null) { 326 uri.append('?'); 327 ALL_LEGAL_ENCODER.appendEncoded(uri, query); 328 } 329 if (fragment != null) { 330 uri.append('#'); 331 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 332 } 333 334 parseURI(uri.toString(), false); 335 } 336 337 /** 338 * Breaks uri into its component parts. This first splits URI into scheme, 339 * scheme-specific part and fragment: 340 * [scheme:][scheme-specific part][#fragment] 341 * 342 * Then it breaks the scheme-specific part into authority, path and query: 343 * [//authority][path][?query] 344 * 345 * Finally it delegates to parseAuthority to break the authority into user 346 * info, host and port: 347 * [user-info@][host][:port] 348 */ parseURI(String uri, boolean forceServer)349 private void parseURI(String uri, boolean forceServer) throws URISyntaxException { 350 string = uri; 351 352 // "#fragment" 353 int fragmentStart = UrlUtils.findFirstOf(uri, "#", 0, uri.length()); 354 if (fragmentStart < uri.length()) { 355 fragment = ALL_LEGAL_ENCODER.validate(uri, fragmentStart + 1, uri.length(), "fragment"); 356 } 357 358 // scheme: 359 int start; 360 int colon = UrlUtils.findFirstOf(uri, ":", 0, fragmentStart); 361 if (colon < UrlUtils.findFirstOf(uri, "/?#", 0, fragmentStart)) { 362 absolute = true; 363 scheme = validateScheme(uri, colon); 364 start = colon + 1; 365 366 if (start == fragmentStart) { 367 throw new URISyntaxException(uri, "Scheme-specific part expected", start); 368 } 369 370 // URIs with schemes followed by a non-/ char are opaque and need no further parsing. 371 if (!uri.regionMatches(start, "/", 0, 1)) { 372 opaque = true; 373 schemeSpecificPart = ALL_LEGAL_ENCODER.validate( 374 uri, start, fragmentStart, "scheme specific part"); 375 return; 376 } 377 } else { 378 absolute = false; 379 start = 0; 380 } 381 382 opaque = false; 383 schemeSpecificPart = uri.substring(start, fragmentStart); 384 385 // "//authority" 386 int fileStart; 387 if (uri.regionMatches(start, "//", 0, 2)) { 388 int authorityStart = start + 2; 389 fileStart = UrlUtils.findFirstOf(uri, "/?", authorityStart, fragmentStart); 390 if (authorityStart == uri.length()) { 391 throw new URISyntaxException(uri, "Authority expected", uri.length()); 392 } 393 if (authorityStart < fileStart) { 394 authority = AUTHORITY_ENCODER.validate(uri, authorityStart, fileStart, "authority"); 395 } 396 } else { 397 fileStart = start; 398 } 399 400 // "path" 401 int queryStart = UrlUtils.findFirstOf(uri, "?", fileStart, fragmentStart); 402 path = PATH_ENCODER.validate(uri, fileStart, queryStart, "path"); 403 404 // "?query" 405 if (queryStart < fragmentStart) { 406 query = ALL_LEGAL_ENCODER.validate(uri, queryStart + 1, fragmentStart, "query"); 407 } 408 409 parseAuthority(forceServer); 410 } 411 validateScheme(String uri, int end)412 private String validateScheme(String uri, int end) throws URISyntaxException { 413 if (end == 0) { 414 throw new URISyntaxException(uri, "Scheme expected", 0); 415 } 416 417 for (int i = 0; i < end; i++) { 418 if (!UrlUtils.isValidSchemeChar(i, uri.charAt(i))) { 419 throw new URISyntaxException(uri, "Illegal character in scheme", 0); 420 } 421 } 422 423 return uri.substring(0, end); 424 } 425 426 /** 427 * Breaks this URI's authority into user info, host and port parts. 428 * [user-info@][host][:port] 429 * If any part of this fails this method will give up and potentially leave 430 * these fields with their default values. 431 * 432 * @param forceServer true to always throw if the authority cannot be 433 * parsed. If false, this method may still throw for some kinds of 434 * errors; this unpredictable behavior is consistent with the RI. 435 */ parseAuthority(boolean forceServer)436 private void parseAuthority(boolean forceServer) throws URISyntaxException { 437 if (authority == null) { 438 return; 439 } 440 441 String tempUserInfo = null; 442 String temp = authority; 443 int index = temp.indexOf('@'); 444 int hostIndex = 0; 445 if (index != -1) { 446 // remove user info 447 tempUserInfo = temp.substring(0, index); 448 validateUserInfo(authority, tempUserInfo, 0); 449 temp = temp.substring(index + 1); // host[:port] is left 450 hostIndex = index + 1; 451 } 452 453 index = temp.lastIndexOf(':'); 454 int endIndex = temp.indexOf(']'); 455 456 String tempHost; 457 int tempPort = -1; 458 if (index != -1 && endIndex < index) { 459 // determine port and host 460 tempHost = temp.substring(0, index); 461 462 if (index < (temp.length() - 1)) { // port part is not empty 463 try { 464 char firstPortChar = temp.charAt(index + 1); 465 if (firstPortChar >= '0' && firstPortChar <= '9') { 466 // allow only digits, no signs 467 tempPort = Integer.parseInt(temp.substring(index + 1)); 468 } else { 469 if (forceServer) { 470 throw new URISyntaxException(authority, 471 "Invalid port number", hostIndex + index + 1); 472 } 473 return; 474 } 475 } catch (NumberFormatException e) { 476 if (forceServer) { 477 throw new URISyntaxException(authority, 478 "Invalid port number", hostIndex + index + 1); 479 } 480 return; 481 } 482 } 483 } else { 484 tempHost = temp; 485 } 486 487 if (tempHost.isEmpty()) { 488 if (forceServer) { 489 throw new URISyntaxException(authority, "Expected host", hostIndex); 490 } 491 return; 492 } 493 494 if (!isValidHost(forceServer, tempHost)) { 495 return; 496 } 497 498 // this is a server based uri, 499 // fill in the userInfo, host and port fields 500 userInfo = tempUserInfo; 501 host = tempHost; 502 port = tempPort; 503 serverAuthority = true; 504 } 505 validateUserInfo(String uri, String userInfo, int index)506 private void validateUserInfo(String uri, String userInfo, int index) 507 throws URISyntaxException { 508 for (int i = 0; i < userInfo.length(); i++) { 509 char ch = userInfo.charAt(i); 510 if (ch == ']' || ch == '[') { 511 throw new URISyntaxException(uri, "Illegal character in userInfo", index + i); 512 } 513 } 514 } 515 516 /** 517 * Returns true if {@code host} is a well-formed host name or IP address. 518 * 519 * @param forceServer true to always throw if the host cannot be parsed. If 520 * false, this method may still throw for some kinds of errors; this 521 * unpredictable behavior is consistent with the RI. 522 */ isValidHost(boolean forceServer, String host)523 private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException { 524 if (host.startsWith("[")) { 525 // IPv6 address 526 if (!host.endsWith("]")) { 527 throw new URISyntaxException(host, 528 "Expected a closing square bracket for IPv6 address", 0); 529 } 530 if (InetAddress.isNumeric(host)) { 531 // If it's numeric, the presence of square brackets guarantees 532 // that it's a numeric IPv6 address. 533 return true; 534 } 535 throw new URISyntaxException(host, "Malformed IPv6 address"); 536 } 537 538 // '[' and ']' can only be the first char and last char 539 // of the host name 540 if (host.indexOf('[') != -1 || host.indexOf(']') != -1) { 541 throw new URISyntaxException(host, "Illegal character in host name", 0); 542 } 543 544 int index = host.lastIndexOf('.'); 545 if (index < 0 || index == host.length() - 1 546 || !Character.isDigit(host.charAt(index + 1))) { 547 // domain name 548 if (isValidDomainName(host)) { 549 return true; 550 } 551 if (forceServer) { 552 throw new URISyntaxException(host, "Illegal character in host name", 0); 553 } 554 return false; 555 } 556 557 // IPv4 address? 558 try { 559 InetAddress ia = InetAddress.parseNumericAddress(host); 560 if (ia instanceof Inet4Address) { 561 return true; 562 } 563 } catch (IllegalArgumentException ignored) { 564 } 565 566 if (forceServer) { 567 throw new URISyntaxException(host, "Malformed IPv4 address", 0); 568 } 569 return false; 570 } 571 isValidDomainName(String host)572 private boolean isValidDomainName(String host) { 573 try { 574 UriCodec.validateSimple(host, "-."); 575 } catch (URISyntaxException e) { 576 return false; 577 } 578 579 String lastLabel = null; 580 for (String token : host.split("\\.")) { 581 lastLabel = token; 582 if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) { 583 return false; 584 } 585 } 586 587 if (lastLabel == null) { 588 return false; 589 } 590 591 if (!lastLabel.equals(host)) { 592 char ch = lastLabel.charAt(0); 593 if (ch >= '0' && ch <= '9') { 594 return false; 595 } 596 } 597 return true; 598 } 599 600 /** 601 * Compares this URI with the given argument {@code uri}. This method will 602 * return a negative value if this URI instance is less than the given 603 * argument and a positive value if this URI instance is greater than the 604 * given argument. The return value {@code 0} indicates that the two 605 * instances represent the same URI. To define the order the single parts of 606 * the URI are compared with each other. String components will be ordered 607 * in the natural case-sensitive way. A hierarchical URI is less than an 608 * opaque URI and if one part is {@code null} the URI with the undefined 609 * part is less than the other one. 610 * 611 * @param uri 612 * the URI this instance has to compare with. 613 * @return the value representing the order of the two instances. 614 */ compareTo(URI uri)615 public int compareTo(URI uri) { 616 int ret; 617 618 // compare schemes 619 if (scheme == null && uri.scheme != null) { 620 return -1; 621 } else if (scheme != null && uri.scheme == null) { 622 return 1; 623 } else if (scheme != null && uri.scheme != null) { 624 ret = scheme.compareToIgnoreCase(uri.scheme); 625 if (ret != 0) { 626 return ret; 627 } 628 } 629 630 // compare opacities 631 if (!opaque && uri.opaque) { 632 return -1; 633 } else if (opaque && !uri.opaque) { 634 return 1; 635 } else if (opaque && uri.opaque) { 636 ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart); 637 if (ret != 0) { 638 return ret; 639 } 640 } else { 641 642 // otherwise both must be hierarchical 643 644 // compare authorities 645 if (authority != null && uri.authority == null) { 646 return 1; 647 } else if (authority == null && uri.authority != null) { 648 return -1; 649 } else if (authority != null && uri.authority != null) { 650 if (host != null && uri.host != null) { 651 // both are server based, so compare userInfo, host, port 652 if (userInfo != null && uri.userInfo == null) { 653 return 1; 654 } else if (userInfo == null && uri.userInfo != null) { 655 return -1; 656 } else if (userInfo != null && uri.userInfo != null) { 657 ret = userInfo.compareTo(uri.userInfo); 658 if (ret != 0) { 659 return ret; 660 } 661 } 662 663 // userInfo's are the same, compare hostname 664 ret = host.compareToIgnoreCase(uri.host); 665 if (ret != 0) { 666 return ret; 667 } 668 669 // compare port 670 if (port != uri.port) { 671 return port - uri.port; 672 } 673 } else { // one or both are registry based, compare the whole 674 // authority 675 ret = authority.compareTo(uri.authority); 676 if (ret != 0) { 677 return ret; 678 } 679 } 680 } 681 682 // authorities are the same 683 // compare paths 684 ret = path.compareTo(uri.path); 685 if (ret != 0) { 686 return ret; 687 } 688 689 // compare queries 690 691 if (query != null && uri.query == null) { 692 return 1; 693 } else if (query == null && uri.query != null) { 694 return -1; 695 } else if (query != null && uri.query != null) { 696 ret = query.compareTo(uri.query); 697 if (ret != 0) { 698 return ret; 699 } 700 } 701 } 702 703 // everything else is identical, so compare fragments 704 if (fragment != null && uri.fragment == null) { 705 return 1; 706 } else if (fragment == null && uri.fragment != null) { 707 return -1; 708 } else if (fragment != null && uri.fragment != null) { 709 ret = fragment.compareTo(uri.fragment); 710 if (ret != 0) { 711 return ret; 712 } 713 } 714 715 // identical 716 return 0; 717 } 718 719 /** 720 * Returns the URI formed by parsing {@code uri}. This method behaves 721 * identically to the string constructor but throws a different exception 722 * on failure. The constructor fails with a checked {@link 723 * URISyntaxException}; this method fails with an unchecked {@link 724 * IllegalArgumentException}. 725 */ create(String uri)726 public static URI create(String uri) { 727 try { 728 return new URI(uri); 729 } catch (URISyntaxException e) { 730 throw new IllegalArgumentException(e.getMessage()); 731 } 732 } 733 duplicate()734 private URI duplicate() { 735 URI clone = new URI(); 736 clone.absolute = absolute; 737 clone.authority = authority; 738 clone.fragment = fragment; 739 clone.host = host; 740 clone.opaque = opaque; 741 clone.path = path; 742 clone.port = port; 743 clone.query = query; 744 clone.scheme = scheme; 745 clone.schemeSpecificPart = schemeSpecificPart; 746 clone.userInfo = userInfo; 747 clone.serverAuthority = serverAuthority; 748 return clone; 749 } 750 751 /* 752 * Takes a string that may contain hex sequences like %F1 or %2b and 753 * converts the hex values following the '%' to lowercase 754 */ convertHexToLowerCase(String s)755 private String convertHexToLowerCase(String s) { 756 StringBuilder result = new StringBuilder(""); 757 if (s.indexOf('%') == -1) { 758 return s; 759 } 760 761 int index, prevIndex = 0; 762 while ((index = s.indexOf('%', prevIndex)) != -1) { 763 result.append(s.substring(prevIndex, index + 1)); 764 result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US)); 765 index += 3; 766 prevIndex = index; 767 } 768 return result.toString(); 769 } 770 771 /** 772 * Returns true if the given URI escaped strings {@code first} and {@code second} are 773 * equal. 774 * 775 * TODO: This method assumes that both strings are escaped using the same escape rules 776 * yet it still performs case insensitive comparison of the escaped sequences. 777 * Why is this necessary ? We can just replace it with first.equals(second) 778 * otherwise. 779 */ escapedEquals(String first, String second)780 private boolean escapedEquals(String first, String second) { 781 // This length test isn't a micro-optimization. We need it because we sometimes 782 // calculate the number of characters to match based on the length of the second 783 // string. If the second string is shorter than the first, we might attempt to match 784 // 0 chars, and regionMatches is specified to return true in that case. 785 if (first.length() != second.length()) { 786 return false; 787 } 788 789 int prevIndex = 0; 790 while (true) { 791 int index = first.indexOf('%', prevIndex); 792 int index1 = second.indexOf('%', prevIndex); 793 if (index != index1) { 794 return false; 795 } 796 797 // index == index1 from this point on. 798 799 if (index == -1) { 800 // No more escapes, match the remainder of the string 801 // normally. 802 return first.regionMatches(prevIndex, second, prevIndex, 803 second.length() - prevIndex); 804 } 805 806 if (!first.regionMatches(prevIndex, second, prevIndex, (index - prevIndex))) { 807 return false; 808 } 809 810 if (!first.regionMatches(true /* ignore case */, index + 1, second, index + 1, 2)) { 811 return false; 812 } 813 814 index += 3; 815 prevIndex = index; 816 } 817 } 818 equals(Object o)819 @Override public boolean equals(Object o) { 820 if (!(o instanceof URI)) { 821 return false; 822 } 823 URI uri = (URI) o; 824 825 if (uri.fragment == null && fragment != null || uri.fragment != null 826 && fragment == null) { 827 return false; 828 } else if (uri.fragment != null && fragment != null) { 829 if (!escapedEquals(uri.fragment, fragment)) { 830 return false; 831 } 832 } 833 834 if (uri.scheme == null && scheme != null || uri.scheme != null 835 && scheme == null) { 836 return false; 837 } else if (uri.scheme != null && scheme != null) { 838 if (!uri.scheme.equalsIgnoreCase(scheme)) { 839 return false; 840 } 841 } 842 843 if (uri.opaque && opaque) { 844 return escapedEquals(uri.schemeSpecificPart, 845 schemeSpecificPart); 846 } else if (!uri.opaque && !opaque) { 847 if (!escapedEquals(path, uri.path)) { 848 return false; 849 } 850 851 if (uri.query != null && query == null || uri.query == null 852 && query != null) { 853 return false; 854 } else if (uri.query != null && query != null) { 855 if (!escapedEquals(uri.query, query)) { 856 return false; 857 } 858 } 859 860 if (uri.authority != null && authority == null 861 || uri.authority == null && authority != null) { 862 return false; 863 } else if (uri.authority != null && authority != null) { 864 if (uri.host != null && host == null || uri.host == null 865 && host != null) { 866 return false; 867 } else if (uri.host == null && host == null) { 868 // both are registry based, so compare the whole authority 869 return escapedEquals(uri.authority, authority); 870 } else { // uri.host != null && host != null, so server-based 871 if (!host.equalsIgnoreCase(uri.host)) { 872 return false; 873 } 874 875 if (port != uri.port) { 876 return false; 877 } 878 879 if (uri.userInfo != null && userInfo == null 880 || uri.userInfo == null && userInfo != null) { 881 return false; 882 } else if (uri.userInfo != null && userInfo != null) { 883 return escapedEquals(userInfo, uri.userInfo); 884 } else { 885 return true; 886 } 887 } 888 } else { 889 // no authority 890 return true; 891 } 892 893 } else { 894 // one is opaque, the other hierarchical 895 return false; 896 } 897 } 898 899 /** 900 * Returns the scheme of this URI, or null if this URI has no scheme. This 901 * is also known as the protocol. 902 */ getScheme()903 public String getScheme() { 904 return scheme; 905 } 906 907 /** 908 * Returns the decoded scheme-specific part of this URI, or null if this URI 909 * has no scheme-specific part. 910 */ getSchemeSpecificPart()911 public String getSchemeSpecificPart() { 912 return decode(schemeSpecificPart); 913 } 914 915 /** 916 * Returns the encoded scheme-specific part of this URI, or null if this URI 917 * has no scheme-specific part. 918 */ getRawSchemeSpecificPart()919 public String getRawSchemeSpecificPart() { 920 return schemeSpecificPart; 921 } 922 923 /** 924 * Returns the decoded authority part of this URI, or null if this URI has 925 * no authority. 926 */ getAuthority()927 public String getAuthority() { 928 return decode(authority); 929 } 930 931 /** 932 * Returns the encoded authority of this URI, or null if this URI has no 933 * authority. 934 */ getRawAuthority()935 public String getRawAuthority() { 936 return authority; 937 } 938 939 /** 940 * Returns the decoded user info of this URI, or null if this URI has no 941 * user info. 942 */ getUserInfo()943 public String getUserInfo() { 944 return decode(userInfo); 945 } 946 947 /** 948 * Returns the encoded user info of this URI, or null if this URI has no 949 * user info. 950 */ getRawUserInfo()951 public String getRawUserInfo() { 952 return userInfo; 953 } 954 955 /** 956 * Returns the host of this URI, or null if this URI has no host. 957 */ getHost()958 public String getHost() { 959 return host; 960 } 961 962 /** 963 * Returns the port number of this URI, or {@code -1} if this URI has no 964 * explicit port. 965 */ getPort()966 public int getPort() { 967 return port; 968 } 969 970 /** @hide */ getEffectivePort()971 public int getEffectivePort() { 972 return getEffectivePort(scheme, port); 973 } 974 975 /** 976 * Returns the port to use for {@code scheme} connections will use when 977 * {@link #getPort} returns {@code specifiedPort}. 978 * 979 * @hide 980 */ getEffectivePort(String scheme, int specifiedPort)981 public static int getEffectivePort(String scheme, int specifiedPort) { 982 if (specifiedPort != -1) { 983 return specifiedPort; 984 } 985 986 if ("http".equalsIgnoreCase(scheme)) { 987 return 80; 988 } else if ("https".equalsIgnoreCase(scheme)) { 989 return 443; 990 } else { 991 return -1; 992 } 993 } 994 995 /** 996 * Returns the decoded path of this URI, or null if this URI has no path. 997 */ getPath()998 public String getPath() { 999 return decode(path); 1000 } 1001 1002 /** 1003 * Returns the encoded path of this URI, or null if this URI has no path. 1004 */ getRawPath()1005 public String getRawPath() { 1006 return path; 1007 } 1008 1009 /** 1010 * Returns the decoded query of this URI, or null if this URI has no query. 1011 */ getQuery()1012 public String getQuery() { 1013 return decode(query); 1014 } 1015 1016 /** 1017 * Returns the encoded query of this URI, or null if this URI has no query. 1018 */ getRawQuery()1019 public String getRawQuery() { 1020 return query; 1021 } 1022 1023 /** 1024 * Returns the decoded fragment of this URI, or null if this URI has no 1025 * fragment. 1026 */ getFragment()1027 public String getFragment() { 1028 return decode(fragment); 1029 } 1030 1031 /** 1032 * Gets the encoded fragment of this URI, or null if this URI has no 1033 * fragment. 1034 */ getRawFragment()1035 public String getRawFragment() { 1036 return fragment; 1037 } 1038 hashCode()1039 @Override public int hashCode() { 1040 if (hash == -1) { 1041 hash = getHashString().hashCode(); 1042 } 1043 return hash; 1044 } 1045 1046 /** 1047 * Returns true if this URI is absolute, which means that a scheme is 1048 * defined. 1049 */ isAbsolute()1050 public boolean isAbsolute() { 1051 // TODO: simplify to 'scheme != null' ? 1052 return absolute; 1053 } 1054 1055 /** 1056 * Returns true if this URI is opaque. Opaque URIs are absolute and have a 1057 * scheme-specific part that does not start with a slash character. All 1058 * parts except scheme, scheme-specific and fragment are undefined. 1059 */ isOpaque()1060 public boolean isOpaque() { 1061 return opaque; 1062 } 1063 1064 /** 1065 * Returns the normalized path. 1066 */ normalize(String path, boolean discardRelativePrefix)1067 private String normalize(String path, boolean discardRelativePrefix) { 1068 path = UrlUtils.canonicalizePath(path, discardRelativePrefix); 1069 1070 /* 1071 * If the path contains a colon before the first colon, prepend 1072 * "./" to differentiate the path from a scheme prefix. 1073 */ 1074 int colon = path.indexOf(':'); 1075 if (colon != -1) { 1076 int slash = path.indexOf('/'); 1077 if (slash == -1 || colon < slash) { 1078 path = "./" + path; 1079 } 1080 } 1081 1082 return path; 1083 } 1084 1085 /** 1086 * Normalizes the path part of this URI. 1087 * 1088 * @return an URI object which represents this instance with a normalized 1089 * path. 1090 */ normalize()1091 public URI normalize() { 1092 if (opaque) { 1093 return this; 1094 } 1095 String normalizedPath = normalize(path, false); 1096 // if the path is already normalized, return this 1097 if (path.equals(normalizedPath)) { 1098 return this; 1099 } 1100 // get an exact copy of the URI re-calculate the scheme specific part 1101 // since the path of the normalized URI is different from this URI. 1102 URI result = duplicate(); 1103 result.path = normalizedPath; 1104 result.setSchemeSpecificPart(); 1105 return result; 1106 } 1107 1108 /** 1109 * Tries to parse the authority component of this URI to divide it into the 1110 * host, port, and user-info. If this URI is already determined as a 1111 * ServerAuthority this instance will be returned without changes. 1112 * 1113 * @return this instance with the components of the parsed server authority. 1114 * @throws URISyntaxException 1115 * if the authority part could not be parsed as a server-based 1116 * authority. 1117 */ parseServerAuthority()1118 public URI parseServerAuthority() throws URISyntaxException { 1119 if (!serverAuthority) { 1120 parseAuthority(true); 1121 } 1122 return this; 1123 } 1124 1125 /** 1126 * Makes the given URI {@code relative} to a relative URI against the URI 1127 * represented by this instance. 1128 * 1129 * @param relative 1130 * the URI which has to be relativized against this URI. 1131 * @return the relative URI. 1132 */ relativize(URI relative)1133 public URI relativize(URI relative) { 1134 if (relative.opaque || opaque) { 1135 return relative; 1136 } 1137 1138 if (scheme == null ? relative.scheme != null : !scheme 1139 .equals(relative.scheme)) { 1140 return relative; 1141 } 1142 1143 if (authority == null ? relative.authority != null : !authority 1144 .equals(relative.authority)) { 1145 return relative; 1146 } 1147 1148 // normalize both paths 1149 String thisPath = normalize(path, false); 1150 String relativePath = normalize(relative.path, false); 1151 1152 /* 1153 * if the paths aren't equal, then we need to determine if this URI's 1154 * path is a parent path (begins with) the relative URI's path 1155 */ 1156 if (!thisPath.equals(relativePath)) { 1157 // drop everything after the last slash in this path 1158 thisPath = thisPath.substring(0, thisPath.lastIndexOf('/') + 1); 1159 1160 /* 1161 * if the relative URI's path doesn't start with this URI's path, 1162 * then just return the relative URI; the URIs have nothing in 1163 * common 1164 */ 1165 if (!relativePath.startsWith(thisPath)) { 1166 return relative; 1167 } 1168 } 1169 1170 URI result = new URI(); 1171 result.fragment = relative.fragment; 1172 result.query = relative.query; 1173 // the result URI is the remainder of the relative URI's path 1174 result.path = relativePath.substring(thisPath.length()); 1175 result.setSchemeSpecificPart(); 1176 return result; 1177 } 1178 1179 /** 1180 * Resolves the given URI {@code relative} against the URI represented by 1181 * this instance. 1182 * 1183 * @param relative 1184 * the URI which has to be resolved against this URI. 1185 * @return the resolved URI. 1186 */ resolve(URI relative)1187 public URI resolve(URI relative) { 1188 if (relative.absolute || opaque) { 1189 return relative; 1190 } 1191 1192 if (relative.authority != null) { 1193 // If the relative URI has an authority, the result is the relative 1194 // with this URI's scheme. 1195 URI result = relative.duplicate(); 1196 result.scheme = scheme; 1197 result.absolute = absolute; 1198 return result; 1199 } 1200 1201 if (relative.path.isEmpty() && relative.scheme == null && relative.query == null) { 1202 // if the relative URI only consists of at most a fragment, 1203 URI result = duplicate(); 1204 result.fragment = relative.fragment; 1205 return result; 1206 } 1207 1208 URI result = duplicate(); 1209 result.fragment = relative.fragment; 1210 result.query = relative.query; 1211 String resolvedPath; 1212 if (relative.path.startsWith("/")) { 1213 // The relative URI has an absolute path; use it. 1214 resolvedPath = relative.path; 1215 } else if (relative.path.isEmpty()) { 1216 // The relative URI has no path; use the base path. 1217 resolvedPath = path; 1218 } else { 1219 // The relative URI has a relative path; combine the paths. 1220 int endIndex = path.lastIndexOf('/') + 1; 1221 resolvedPath = path.substring(0, endIndex) + relative.path; 1222 } 1223 result.path = UrlUtils.authoritySafePath(result.authority, normalize(resolvedPath, true)); 1224 result.setSchemeSpecificPart(); 1225 return result; 1226 } 1227 1228 /** 1229 * Helper method used to re-calculate the scheme specific part of the 1230 * resolved or normalized URIs 1231 */ setSchemeSpecificPart()1232 private void setSchemeSpecificPart() { 1233 // ssp = [//authority][path][?query] 1234 StringBuilder ssp = new StringBuilder(); 1235 if (authority != null) { 1236 ssp.append("//" + authority); 1237 } 1238 if (path != null) { 1239 ssp.append(path); 1240 } 1241 if (query != null) { 1242 ssp.append("?" + query); 1243 } 1244 schemeSpecificPart = ssp.toString(); 1245 // reset string, so that it can be re-calculated correctly when asked. 1246 string = null; 1247 } 1248 1249 /** 1250 * Creates a new URI instance by parsing the given string {@code relative} 1251 * and resolves the created URI against the URI represented by this 1252 * instance. 1253 * 1254 * @param relative 1255 * the given string to create the new URI instance which has to 1256 * be resolved later on. 1257 * @return the created and resolved URI. 1258 */ resolve(String relative)1259 public URI resolve(String relative) { 1260 return resolve(create(relative)); 1261 } 1262 decode(String s)1263 private String decode(String s) { 1264 return s != null ? UriCodec.decode(s) : null; 1265 } 1266 1267 /** 1268 * Returns the textual string representation of this URI instance using the 1269 * US-ASCII encoding. 1270 * 1271 * @return the US-ASCII string representation of this URI. 1272 */ toASCIIString()1273 public String toASCIIString() { 1274 StringBuilder result = new StringBuilder(); 1275 ASCII_ONLY.appendEncoded(result, toString()); 1276 return result.toString(); 1277 } 1278 1279 /** 1280 * Returns the encoded URI. 1281 */ toString()1282 @Override public String toString() { 1283 if (string != null) { 1284 return string; 1285 } 1286 1287 StringBuilder result = new StringBuilder(); 1288 if (scheme != null) { 1289 result.append(scheme); 1290 result.append(':'); 1291 } 1292 if (opaque) { 1293 result.append(schemeSpecificPart); 1294 } else { 1295 if (authority != null) { 1296 result.append("//"); 1297 result.append(authority); 1298 } 1299 1300 if (path != null) { 1301 result.append(path); 1302 } 1303 1304 if (query != null) { 1305 result.append('?'); 1306 result.append(query); 1307 } 1308 } 1309 1310 if (fragment != null) { 1311 result.append('#'); 1312 result.append(fragment); 1313 } 1314 1315 string = result.toString(); 1316 return string; 1317 } 1318 1319 /* 1320 * Form a string from the components of this URI, similarly to the 1321 * toString() method. But this method converts scheme and host to lowercase, 1322 * and converts escaped octets to lowercase. 1323 */ getHashString()1324 private String getHashString() { 1325 StringBuilder result = new StringBuilder(); 1326 if (scheme != null) { 1327 result.append(scheme.toLowerCase(Locale.US)); 1328 result.append(':'); 1329 } 1330 if (opaque) { 1331 result.append(schemeSpecificPart); 1332 } else { 1333 if (authority != null) { 1334 result.append("//"); 1335 if (host == null) { 1336 result.append(authority); 1337 } else { 1338 if (userInfo != null) { 1339 result.append(userInfo + "@"); 1340 } 1341 result.append(host.toLowerCase(Locale.US)); 1342 if (port != -1) { 1343 result.append(":" + port); 1344 } 1345 } 1346 } 1347 1348 if (path != null) { 1349 result.append(path); 1350 } 1351 1352 if (query != null) { 1353 result.append('?'); 1354 result.append(query); 1355 } 1356 } 1357 1358 if (fragment != null) { 1359 result.append('#'); 1360 result.append(fragment); 1361 } 1362 1363 return convertHexToLowerCase(result.toString()); 1364 } 1365 1366 /** 1367 * Converts this URI instance to a URL. 1368 * 1369 * @return the created URL representing the same resource as this URI. 1370 * @throws MalformedURLException 1371 * if an error occurs while creating the URL or no protocol 1372 * handler could be found. 1373 */ toURL()1374 public URL toURL() throws MalformedURLException { 1375 if (!absolute) { 1376 throw new IllegalArgumentException("URI is not absolute: " + toString()); 1377 } 1378 return new URL(toString()); 1379 } 1380 readObject(ObjectInputStream in)1381 private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { 1382 in.defaultReadObject(); 1383 try { 1384 parseURI(string, false); 1385 } catch (URISyntaxException e) { 1386 throw new IOException(e.toString()); 1387 } 1388 } 1389 writeObject(ObjectOutputStream out)1390 private void writeObject(ObjectOutputStream out) throws IOException, ClassNotFoundException { 1391 // call toString() to ensure the value of string field is calculated 1392 toString(); 1393 out.defaultWriteObject(); 1394 } 1395 } 1396