1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package java.net; 19 20 import java.io.IOException; 21 import java.io.ObjectInputStream; 22 import java.io.ObjectOutputStream; 23 import java.io.Serializable; 24 import java.util.Locale; 25 import libcore.net.UriCodec; 26 import libcore.net.url.UrlUtils; 27 28 /** 29 * A Uniform Resource Identifier that identifies an abstract or physical 30 * resource, as specified by <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 31 * 2396</a>. 32 * 33 * <h3>Parts of a URI</h3> 34 * A URI is composed of many parts. This class can both parse URI strings into 35 * parts and compose URI strings from parts. For example, consider the parts of 36 * this URI: 37 * {@code http://username:password@host:8080/directory/file?query#fragment} 38 * <table> 39 * <tr><th>Component </th><th>Example value </th><th>Also known as</th></tr> 40 * <tr><td>{@link #getScheme() Scheme} </td><td>{@code http} </td><td>protocol</td></tr> 41 * <tr><td>{@link #getSchemeSpecificPart() Scheme-specific part}</td><td>{@code //username:password@host:8080/directory/file?query#fragment}</td><td></td></tr> 42 * <tr><td>{@link #getAuthority() Authority} </td><td>{@code username:password@host:8080} </td><td></td></tr> 43 * <tr><td>{@link #getUserInfo() User Info} </td><td>{@code username:password} </td><td></td></tr> 44 * <tr><td>{@link #getHost() Host} </td><td>{@code host} </td><td></td></tr> 45 * <tr><td>{@link #getPort() Port} </td><td>{@code 8080} </td><td></td></tr> 46 * <tr><td>{@link #getPath() Path} </td><td>{@code /directory/file} </td><td></td></tr> 47 * <tr><td>{@link #getQuery() Query} </td><td>{@code query} </td><td></td></tr> 48 * <tr><td>{@link #getFragment() Fragment} </td><td>{@code fragment} </td><td>ref</td></tr> 49 * </table> 50 * 51 * <h3>Absolute vs. Relative URIs</h3> 52 * URIs are either {@link #isAbsolute() absolute or relative}. 53 * <ul> 54 * <li><strong>Absolute:</strong> {@code http://android.com/robots.txt} 55 * <li><strong>Relative:</strong> {@code robots.txt} 56 * </ul> 57 * 58 * <p>Absolute URIs always have a scheme. If its scheme is supported by {@link 59 * URL}, you can use {@link #toURL} to convert an absolute URI to a URL. 60 * 61 * <p>Relative URIs do not have a scheme and cannot be converted to URLs. If you 62 * have the absolute URI that a relative URI is relative to, you can use {@link 63 * #resolve} to compute the referenced absolute URI. Symmetrically, you can use 64 * {@link #relativize} to compute the relative URI from one URI to another. 65 * <pre> {@code 66 * URI absolute = new URI("http://android.com/"); 67 * URI relative = new URI("robots.txt"); 68 * URI resolved = new URI("http://android.com/robots.txt"); 69 * 70 * // print "http://android.com/robots.txt" 71 * System.out.println(absolute.resolve(relative)); 72 * 73 * // print "robots.txt" 74 * System.out.println(absolute.relativize(resolved)); 75 * }</pre> 76 * 77 * <h3>Opaque vs. Hierarchical URIs</h3> 78 * Absolute URIs are either {@link #isOpaque() opaque or hierarchical}. Relative 79 * URIs are always hierarchical. 80 * <ul> 81 * <li><strong>Hierarchical:</strong> {@code http://android.com/robots.txt} 82 * <li><strong>Opaque:</strong> {@code mailto:robots@example.com} 83 * </ul> 84 * 85 * <p>Opaque URIs have both a scheme and a scheme-specific part that does not 86 * begin with the slash character: {@code /}. The contents of the 87 * scheme-specific part of an opaque URI is not parsed so an opaque URI never 88 * has an authority, user info, host, port, path or query. An opaque URIs may 89 * have a fragment, however. A typical opaque URI is 90 * {@code mailto:robots@example.com}. 91 * <table> 92 * <tr><th>Component </th><th>Example value </th></tr> 93 * <tr><td>Scheme </td><td>{@code mailto} </td></tr> 94 * <tr><td>Scheme-specific part</td><td>{@code robots@example.com}</td></tr> 95 * <tr><td>Fragment </td><td> </td></tr> 96 * </table> 97 * <p>Hierarchical URIs may have values for any URL component. They always 98 * have a non-null path, though that path may be the empty string. 99 * 100 * <h3>Encoding and Decoding URI Components</h3> 101 * Each component of a URI permits a limited set of legal characters. Other 102 * characters must first be <i>encoded</i> before they can be embedded in a URI. 103 * To recover the original characters from a URI, they may be <i>decoded</i>. 104 * <strong>Contrary to what you might expect,</strong> this class uses the 105 * term <i>raw</i> to refer to encoded strings. The non-<i>raw</i> accessors 106 * return decoded strings. For example, consider how this URI is decoded: 107 * {@code http://user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots#over%206%22} 108 * <table> 109 * <tr><th>Component </th><th>Legal Characters </th><th>Other Constraints </th><th>Raw Value </th><th>Value</th></tr> 110 * <tr><td>Scheme </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code +-.} </td><td>First character must be in {@code a-z}, {@code A-Z}</td><td> </td><td>{@code http}</td></tr> 111 * <tr><td>Scheme-specific part</td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay </td><td>{@code //user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots}</td><td>{@code //user:pa55w?rd@host:80/doc|search?q=green robots}</td></tr> 112 * <tr><td>Authority </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=@[]} </td><td>Non-ASCII characters okay </td><td>{@code user:pa55w%3Frd@host:80} </td><td>{@code user:pa55w?rd@host:80}</td></tr> 113 * <tr><td>User Info </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=} </td><td>Non-ASCII characters okay </td><td>{@code user:pa55w%3Frd} </td><td>{@code user:pa55w?rd}</td></tr> 114 * <tr><td>Host </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code -.[]} </td><td>Domain name, IPv4 address or [IPv6 address] </td><td> </td><td>host</td></tr> 115 * <tr><td>Port </td><td>{@code 0-9} </td><td> </td><td> </td><td>{@code 80}</td></tr> 116 * <tr><td>Path </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=/@} </td><td>Non-ASCII characters okay </td><td>{@code /doc%7Csearch} </td><td>{@code /doc|search}</td></tr> 117 * <tr><td>Query </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay </td><td>{@code q=green%20robots} </td><td>{@code q=green robots}</td></tr> 118 * <tr><td>Fragment </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay </td><td>{@code over%206%22} </td><td>{@code over 6"}</td></tr> 119 * </table> 120 * A URI's host, port and scheme are not eligible for encoding and must not 121 * contain illegal characters. 122 * 123 * <p>To encode a URI, invoke any of the multiple-parameter constructors of this 124 * class. These constructors accept your original strings and encode them into 125 * their raw form. 126 * 127 * <p>To decode a URI, invoke the single-string constructor, and then use the 128 * appropriate accessor methods to get the decoded components. 129 * 130 * <p>The {@link URL} class can be used to retrieve resources by their URI. 131 */ 132 public final class URI implements Comparable<URI>, Serializable { 133 134 private static final long serialVersionUID = -6052424284110960213l; 135 136 static final String UNRESERVED = "_-!.~\'()*"; 137 static final String PUNCTUATION = ",;:$&+="; 138 139 static final UriCodec USER_INFO_ENCODER = new PartEncoder(""); 140 static final UriCodec PATH_ENCODER = new PartEncoder("/@"); 141 static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]"); 142 143 /** for java.net.URL, which foolishly combines these two parts */ 144 static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?"); 145 146 /** for query, fragment, and scheme-specific part */ 147 static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@"); 148 149 /** Retains all ASCII chars including delimiters. */ 150 private static final UriCodec ASCII_ONLY = new UriCodec() { 151 @Override protected boolean isRetained(char c) { 152 return c <= 127; 153 } 154 }; 155 156 /** 157 * Encodes the unescaped characters of {@code s} that are not permitted. 158 * Permitted characters are: 159 * <ul> 160 * <li>Unreserved characters in <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>. 161 * <li>{@code extraOkayChars}, 162 * <li>non-ASCII, non-control, non-whitespace characters 163 * </ul> 164 */ 165 private static class PartEncoder extends UriCodec { 166 private final String extraLegalCharacters; 167 PartEncoder(String extraLegalCharacters)168 PartEncoder(String extraLegalCharacters) { 169 this.extraLegalCharacters = extraLegalCharacters; 170 } 171 isRetained(char c)172 @Override protected boolean isRetained(char c) { 173 return UNRESERVED.indexOf(c) != -1 174 || PUNCTUATION.indexOf(c) != -1 175 || extraLegalCharacters.indexOf(c) != -1 176 || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c)); 177 } 178 } 179 180 private String string; 181 private transient String scheme; 182 private transient String schemeSpecificPart; 183 private transient String authority; 184 private transient String userInfo; 185 private transient String host; 186 private transient int port = -1; 187 private transient String path; 188 private transient String query; 189 private transient String fragment; 190 private transient boolean opaque; 191 private transient boolean absolute; 192 private transient boolean serverAuthority = false; 193 194 private transient int hash = -1; 195 URI()196 private URI() {} 197 198 /** 199 * Creates a new URI instance by parsing {@code spec}. 200 * 201 * @param spec a URI whose illegal characters have all been encoded. 202 */ URI(String spec)203 public URI(String spec) throws URISyntaxException { 204 parseURI(spec, false); 205 } 206 207 /** 208 * Creates a new URI instance of the given unencoded component parts. 209 * 210 * @param scheme the URI scheme, or null for a non-absolute URI. 211 */ URI(String scheme, String schemeSpecificPart, String fragment)212 public URI(String scheme, String schemeSpecificPart, String fragment) 213 throws URISyntaxException { 214 StringBuilder uri = new StringBuilder(); 215 if (scheme != null) { 216 uri.append(scheme); 217 uri.append(':'); 218 } 219 if (schemeSpecificPart != null) { 220 ALL_LEGAL_ENCODER.appendEncoded(uri, schemeSpecificPart); 221 } 222 if (fragment != null) { 223 uri.append('#'); 224 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 225 } 226 227 parseURI(uri.toString(), false); 228 } 229 230 /** 231 * Creates a new URI instance of the given unencoded component parts. 232 * 233 * @param scheme the URI scheme, or null for a non-absolute URI. 234 */ URI(String scheme, String userInfo, String host, int port, String path, String query, String fragment)235 public URI(String scheme, String userInfo, String host, int port, String path, String query, 236 String fragment) throws URISyntaxException { 237 if (scheme == null && userInfo == null && host == null && path == null 238 && query == null && fragment == null) { 239 this.path = ""; 240 return; 241 } 242 243 if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') { 244 throw new URISyntaxException(path, "Relative path"); 245 } 246 247 StringBuilder uri = new StringBuilder(); 248 if (scheme != null) { 249 uri.append(scheme); 250 uri.append(':'); 251 } 252 253 if (userInfo != null || host != null || port != -1) { 254 uri.append("//"); 255 } 256 257 if (userInfo != null) { 258 USER_INFO_ENCODER.appendEncoded(uri, userInfo); 259 uri.append('@'); 260 } 261 262 if (host != null) { 263 // check for IPv6 addresses that hasn't been enclosed in square brackets 264 if (host.indexOf(':') != -1 && host.indexOf(']') == -1 && host.indexOf('[') == -1) { 265 host = "[" + host + "]"; 266 } 267 uri.append(host); 268 } 269 270 if (port != -1) { 271 uri.append(':'); 272 uri.append(port); 273 } 274 275 if (path != null) { 276 PATH_ENCODER.appendEncoded(uri, path); 277 } 278 279 if (query != null) { 280 uri.append('?'); 281 ALL_LEGAL_ENCODER.appendEncoded(uri, query); 282 } 283 284 if (fragment != null) { 285 uri.append('#'); 286 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 287 } 288 289 parseURI(uri.toString(), true); 290 } 291 292 /** 293 * Creates a new URI instance of the given unencoded component parts. 294 * 295 * @param scheme the URI scheme, or null for a non-absolute URI. 296 */ URI(String scheme, String host, String path, String fragment)297 public URI(String scheme, String host, String path, String fragment) throws URISyntaxException { 298 this(scheme, null, host, -1, path, null, fragment); 299 } 300 301 /** 302 * Creates a new URI instance of the given unencoded component parts. 303 * 304 * @param scheme the URI scheme, or null for a non-absolute URI. 305 */ URI(String scheme, String authority, String path, String query, String fragment)306 public URI(String scheme, String authority, String path, String query, 307 String fragment) throws URISyntaxException { 308 if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') { 309 throw new URISyntaxException(path, "Relative path"); 310 } 311 312 StringBuilder uri = new StringBuilder(); 313 if (scheme != null) { 314 uri.append(scheme); 315 uri.append(':'); 316 } 317 if (authority != null) { 318 uri.append("//"); 319 AUTHORITY_ENCODER.appendEncoded(uri, authority); 320 } 321 322 if (path != null) { 323 PATH_ENCODER.appendEncoded(uri, path); 324 } 325 if (query != null) { 326 uri.append('?'); 327 ALL_LEGAL_ENCODER.appendEncoded(uri, query); 328 } 329 if (fragment != null) { 330 uri.append('#'); 331 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 332 } 333 334 parseURI(uri.toString(), false); 335 } 336 337 /** 338 * Breaks uri into its component parts. This first splits URI into scheme, 339 * scheme-specific part and fragment: 340 * [scheme:][scheme-specific part][#fragment] 341 * 342 * Then it breaks the scheme-specific part into authority, path and query: 343 * [//authority][path][?query] 344 * 345 * Finally it delegates to parseAuthority to break the authority into user 346 * info, host and port: 347 * [user-info@][host][:port] 348 */ parseURI(String uri, boolean forceServer)349 private void parseURI(String uri, boolean forceServer) throws URISyntaxException { 350 string = uri; 351 352 // "#fragment" 353 int fragmentStart = UrlUtils.findFirstOf(uri, "#", 0, uri.length()); 354 if (fragmentStart < uri.length()) { 355 fragment = ALL_LEGAL_ENCODER.validate(uri, fragmentStart + 1, uri.length(), "fragment"); 356 } 357 358 // scheme: 359 int start; 360 int colon = UrlUtils.findFirstOf(uri, ":", 0, fragmentStart); 361 if (colon < UrlUtils.findFirstOf(uri, "/?#", 0, fragmentStart)) { 362 absolute = true; 363 scheme = validateScheme(uri, colon); 364 start = colon + 1; 365 366 if (start == fragmentStart) { 367 throw new URISyntaxException(uri, "Scheme-specific part expected", start); 368 } 369 370 // URIs with schemes followed by a non-/ char are opaque and need no further parsing. 371 if (!uri.regionMatches(start, "/", 0, 1)) { 372 opaque = true; 373 schemeSpecificPart = ALL_LEGAL_ENCODER.validate( 374 uri, start, fragmentStart, "scheme specific part"); 375 return; 376 } 377 } else { 378 absolute = false; 379 start = 0; 380 } 381 382 opaque = false; 383 schemeSpecificPart = uri.substring(start, fragmentStart); 384 385 // "//authority" 386 int fileStart; 387 if (uri.regionMatches(start, "//", 0, 2)) { 388 int authorityStart = start + 2; 389 fileStart = UrlUtils.findFirstOf(uri, "/?", authorityStart, fragmentStart); 390 if (authorityStart == uri.length()) { 391 throw new URISyntaxException(uri, "Authority expected", uri.length()); 392 } 393 if (authorityStart < fileStart) { 394 authority = AUTHORITY_ENCODER.validate(uri, authorityStart, fileStart, "authority"); 395 } 396 } else { 397 fileStart = start; 398 } 399 400 // "path" 401 int queryStart = UrlUtils.findFirstOf(uri, "?", fileStart, fragmentStart); 402 path = PATH_ENCODER.validate(uri, fileStart, queryStart, "path"); 403 404 // "?query" 405 if (queryStart < fragmentStart) { 406 query = ALL_LEGAL_ENCODER.validate(uri, queryStart + 1, fragmentStart, "query"); 407 } 408 409 parseAuthority(forceServer); 410 } 411 validateScheme(String uri, int end)412 private String validateScheme(String uri, int end) throws URISyntaxException { 413 if (end == 0) { 414 throw new URISyntaxException(uri, "Scheme expected", 0); 415 } 416 417 for (int i = 0; i < end; i++) { 418 if (!UrlUtils.isValidSchemeChar(i, uri.charAt(i))) { 419 throw new URISyntaxException(uri, "Illegal character in scheme", 0); 420 } 421 } 422 423 return uri.substring(0, end); 424 } 425 426 /** 427 * Breaks this URI's authority into user info, host and port parts. 428 * [user-info@][host][:port] 429 * If any part of this fails this method will give up and potentially leave 430 * these fields with their default values. 431 * 432 * @param forceServer true to always throw if the authority cannot be 433 * parsed. If false, this method may still throw for some kinds of 434 * errors; this unpredictable behavior is consistent with the RI. 435 */ parseAuthority(boolean forceServer)436 private void parseAuthority(boolean forceServer) throws URISyntaxException { 437 if (authority == null) { 438 return; 439 } 440 441 String tempUserInfo = null; 442 String temp = authority; 443 int index = temp.indexOf('@'); 444 int hostIndex = 0; 445 if (index != -1) { 446 // remove user info 447 tempUserInfo = temp.substring(0, index); 448 validateUserInfo(authority, tempUserInfo, 0); 449 temp = temp.substring(index + 1); // host[:port] is left 450 hostIndex = index + 1; 451 } 452 453 index = temp.lastIndexOf(':'); 454 int endIndex = temp.indexOf(']'); 455 456 String tempHost; 457 int tempPort = -1; 458 if (index != -1 && endIndex < index) { 459 // determine port and host 460 tempHost = temp.substring(0, index); 461 462 if (index < (temp.length() - 1)) { // port part is not empty 463 try { 464 tempPort = Integer.parseInt(temp.substring(index + 1)); 465 if (tempPort < 0) { 466 if (forceServer) { 467 throw new URISyntaxException(authority, 468 "Invalid port number", hostIndex + index + 1); 469 } 470 return; 471 } 472 } catch (NumberFormatException e) { 473 if (forceServer) { 474 throw new URISyntaxException(authority, 475 "Invalid port number", hostIndex + index + 1); 476 } 477 return; 478 } 479 } 480 } else { 481 tempHost = temp; 482 } 483 484 if (tempHost.isEmpty()) { 485 if (forceServer) { 486 throw new URISyntaxException(authority, "Expected host", hostIndex); 487 } 488 return; 489 } 490 491 if (!isValidHost(forceServer, tempHost)) { 492 return; 493 } 494 495 // this is a server based uri, 496 // fill in the userInfo, host and port fields 497 userInfo = tempUserInfo; 498 host = tempHost; 499 port = tempPort; 500 serverAuthority = true; 501 } 502 validateUserInfo(String uri, String userInfo, int index)503 private void validateUserInfo(String uri, String userInfo, int index) 504 throws URISyntaxException { 505 for (int i = 0; i < userInfo.length(); i++) { 506 char ch = userInfo.charAt(i); 507 if (ch == ']' || ch == '[') { 508 throw new URISyntaxException(uri, "Illegal character in userInfo", index + i); 509 } 510 } 511 } 512 513 /** 514 * Returns true if {@code host} is a well-formed host name or IP address. 515 * 516 * @param forceServer true to always throw if the host cannot be parsed. If 517 * false, this method may still throw for some kinds of errors; this 518 * unpredictable behavior is consistent with the RI. 519 */ isValidHost(boolean forceServer, String host)520 private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException { 521 if (host.startsWith("[")) { 522 // IPv6 address 523 if (!host.endsWith("]")) { 524 throw new URISyntaxException(host, 525 "Expected a closing square bracket for IPv6 address", 0); 526 } 527 if (InetAddress.isNumeric(host)) { 528 // If it's numeric, the presence of square brackets guarantees 529 // that it's a numeric IPv6 address. 530 return true; 531 } 532 throw new URISyntaxException(host, "Malformed IPv6 address"); 533 } 534 535 // '[' and ']' can only be the first char and last char 536 // of the host name 537 if (host.indexOf('[') != -1 || host.indexOf(']') != -1) { 538 throw new URISyntaxException(host, "Illegal character in host name", 0); 539 } 540 541 int index = host.lastIndexOf('.'); 542 if (index < 0 || index == host.length() - 1 543 || !Character.isDigit(host.charAt(index + 1))) { 544 // domain name 545 if (isValidDomainName(host)) { 546 return true; 547 } 548 if (forceServer) { 549 throw new URISyntaxException(host, "Illegal character in host name", 0); 550 } 551 return false; 552 } 553 554 // IPv4 address? 555 try { 556 InetAddress ia = InetAddress.parseNumericAddress(host); 557 if (ia instanceof Inet4Address) { 558 return true; 559 } 560 } catch (IllegalArgumentException ignored) { 561 } 562 563 if (forceServer) { 564 throw new URISyntaxException(host, "Malformed IPv4 address", 0); 565 } 566 return false; 567 } 568 isValidDomainName(String host)569 private boolean isValidDomainName(String host) { 570 try { 571 UriCodec.validateSimple(host, "-."); 572 } catch (URISyntaxException e) { 573 return false; 574 } 575 576 String lastLabel = null; 577 for (String token : host.split("\\.")) { 578 lastLabel = token; 579 if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) { 580 return false; 581 } 582 } 583 584 if (lastLabel == null) { 585 return false; 586 } 587 588 if (!lastLabel.equals(host)) { 589 char ch = lastLabel.charAt(0); 590 if (ch >= '0' && ch <= '9') { 591 return false; 592 } 593 } 594 return true; 595 } 596 597 /** 598 * Compares this URI with the given argument {@code uri}. This method will 599 * return a negative value if this URI instance is less than the given 600 * argument and a positive value if this URI instance is greater than the 601 * given argument. The return value {@code 0} indicates that the two 602 * instances represent the same URI. To define the order the single parts of 603 * the URI are compared with each other. String components will be ordered 604 * in the natural case-sensitive way. A hierarchical URI is less than an 605 * opaque URI and if one part is {@code null} the URI with the undefined 606 * part is less than the other one. 607 * 608 * @param uri 609 * the URI this instance has to compare with. 610 * @return the value representing the order of the two instances. 611 */ compareTo(URI uri)612 public int compareTo(URI uri) { 613 int ret; 614 615 // compare schemes 616 if (scheme == null && uri.scheme != null) { 617 return -1; 618 } else if (scheme != null && uri.scheme == null) { 619 return 1; 620 } else if (scheme != null && uri.scheme != null) { 621 ret = scheme.compareToIgnoreCase(uri.scheme); 622 if (ret != 0) { 623 return ret; 624 } 625 } 626 627 // compare opacities 628 if (!opaque && uri.opaque) { 629 return -1; 630 } else if (opaque && !uri.opaque) { 631 return 1; 632 } else if (opaque && uri.opaque) { 633 ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart); 634 if (ret != 0) { 635 return ret; 636 } 637 } else { 638 639 // otherwise both must be hierarchical 640 641 // compare authorities 642 if (authority != null && uri.authority == null) { 643 return 1; 644 } else if (authority == null && uri.authority != null) { 645 return -1; 646 } else if (authority != null && uri.authority != null) { 647 if (host != null && uri.host != null) { 648 // both are server based, so compare userInfo, host, port 649 if (userInfo != null && uri.userInfo == null) { 650 return 1; 651 } else if (userInfo == null && uri.userInfo != null) { 652 return -1; 653 } else if (userInfo != null && uri.userInfo != null) { 654 ret = userInfo.compareTo(uri.userInfo); 655 if (ret != 0) { 656 return ret; 657 } 658 } 659 660 // userInfo's are the same, compare hostname 661 ret = host.compareToIgnoreCase(uri.host); 662 if (ret != 0) { 663 return ret; 664 } 665 666 // compare port 667 if (port != uri.port) { 668 return port - uri.port; 669 } 670 } else { // one or both are registry based, compare the whole 671 // authority 672 ret = authority.compareTo(uri.authority); 673 if (ret != 0) { 674 return ret; 675 } 676 } 677 } 678 679 // authorities are the same 680 // compare paths 681 ret = path.compareTo(uri.path); 682 if (ret != 0) { 683 return ret; 684 } 685 686 // compare queries 687 688 if (query != null && uri.query == null) { 689 return 1; 690 } else if (query == null && uri.query != null) { 691 return -1; 692 } else if (query != null && uri.query != null) { 693 ret = query.compareTo(uri.query); 694 if (ret != 0) { 695 return ret; 696 } 697 } 698 } 699 700 // everything else is identical, so compare fragments 701 if (fragment != null && uri.fragment == null) { 702 return 1; 703 } else if (fragment == null && uri.fragment != null) { 704 return -1; 705 } else if (fragment != null && uri.fragment != null) { 706 ret = fragment.compareTo(uri.fragment); 707 if (ret != 0) { 708 return ret; 709 } 710 } 711 712 // identical 713 return 0; 714 } 715 716 /** 717 * Returns the URI formed by parsing {@code uri}. This method behaves 718 * identically to the string constructor but throws a different exception 719 * on failure. The constructor fails with a checked {@link 720 * URISyntaxException}; this method fails with an unchecked {@link 721 * IllegalArgumentException}. 722 */ create(String uri)723 public static URI create(String uri) { 724 try { 725 return new URI(uri); 726 } catch (URISyntaxException e) { 727 throw new IllegalArgumentException(e.getMessage()); 728 } 729 } 730 duplicate()731 private URI duplicate() { 732 URI clone = new URI(); 733 clone.absolute = absolute; 734 clone.authority = authority; 735 clone.fragment = fragment; 736 clone.host = host; 737 clone.opaque = opaque; 738 clone.path = path; 739 clone.port = port; 740 clone.query = query; 741 clone.scheme = scheme; 742 clone.schemeSpecificPart = schemeSpecificPart; 743 clone.userInfo = userInfo; 744 clone.serverAuthority = serverAuthority; 745 return clone; 746 } 747 748 /* 749 * Takes a string that may contain hex sequences like %F1 or %2b and 750 * converts the hex values following the '%' to lowercase 751 */ convertHexToLowerCase(String s)752 private String convertHexToLowerCase(String s) { 753 StringBuilder result = new StringBuilder(""); 754 if (s.indexOf('%') == -1) { 755 return s; 756 } 757 758 int index, prevIndex = 0; 759 while ((index = s.indexOf('%', prevIndex)) != -1) { 760 result.append(s.substring(prevIndex, index + 1)); 761 result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US)); 762 index += 3; 763 prevIndex = index; 764 } 765 return result.toString(); 766 } 767 768 /** 769 * Returns true if {@code first} and {@code second} are equal after 770 * unescaping hex sequences like %F1 and %2b. 771 */ escapedEquals(String first, String second)772 private boolean escapedEquals(String first, String second) { 773 if (first.indexOf('%') != second.indexOf('%')) { 774 return first.equals(second); 775 } 776 777 int index, prevIndex = 0; 778 while ((index = first.indexOf('%', prevIndex)) != -1 779 && second.indexOf('%', prevIndex) == index) { 780 boolean match = first.substring(prevIndex, index).equals( 781 second.substring(prevIndex, index)); 782 if (!match) { 783 return false; 784 } 785 786 match = first.substring(index + 1, index + 3).equalsIgnoreCase( 787 second.substring(index + 1, index + 3)); 788 if (!match) { 789 return false; 790 } 791 792 index += 3; 793 prevIndex = index; 794 } 795 return first.substring(prevIndex).equals(second.substring(prevIndex)); 796 } 797 equals(Object o)798 @Override public boolean equals(Object o) { 799 if (!(o instanceof URI)) { 800 return false; 801 } 802 URI uri = (URI) o; 803 804 if (uri.fragment == null && fragment != null || uri.fragment != null 805 && fragment == null) { 806 return false; 807 } else if (uri.fragment != null && fragment != null) { 808 if (!escapedEquals(uri.fragment, fragment)) { 809 return false; 810 } 811 } 812 813 if (uri.scheme == null && scheme != null || uri.scheme != null 814 && scheme == null) { 815 return false; 816 } else if (uri.scheme != null && scheme != null) { 817 if (!uri.scheme.equalsIgnoreCase(scheme)) { 818 return false; 819 } 820 } 821 822 if (uri.opaque && opaque) { 823 return escapedEquals(uri.schemeSpecificPart, 824 schemeSpecificPart); 825 } else if (!uri.opaque && !opaque) { 826 if (!escapedEquals(path, uri.path)) { 827 return false; 828 } 829 830 if (uri.query != null && query == null || uri.query == null 831 && query != null) { 832 return false; 833 } else if (uri.query != null && query != null) { 834 if (!escapedEquals(uri.query, query)) { 835 return false; 836 } 837 } 838 839 if (uri.authority != null && authority == null 840 || uri.authority == null && authority != null) { 841 return false; 842 } else if (uri.authority != null && authority != null) { 843 if (uri.host != null && host == null || uri.host == null 844 && host != null) { 845 return false; 846 } else if (uri.host == null && host == null) { 847 // both are registry based, so compare the whole authority 848 return escapedEquals(uri.authority, authority); 849 } else { // uri.host != null && host != null, so server-based 850 if (!host.equalsIgnoreCase(uri.host)) { 851 return false; 852 } 853 854 if (port != uri.port) { 855 return false; 856 } 857 858 if (uri.userInfo != null && userInfo == null 859 || uri.userInfo == null && userInfo != null) { 860 return false; 861 } else if (uri.userInfo != null && userInfo != null) { 862 return escapedEquals(userInfo, uri.userInfo); 863 } else { 864 return true; 865 } 866 } 867 } else { 868 // no authority 869 return true; 870 } 871 872 } else { 873 // one is opaque, the other hierarchical 874 return false; 875 } 876 } 877 878 /** 879 * Returns the scheme of this URI, or null if this URI has no scheme. This 880 * is also known as the protocol. 881 */ getScheme()882 public String getScheme() { 883 return scheme; 884 } 885 886 /** 887 * Returns the decoded scheme-specific part of this URI, or null if this URI 888 * has no scheme-specific part. 889 */ getSchemeSpecificPart()890 public String getSchemeSpecificPart() { 891 return decode(schemeSpecificPart); 892 } 893 894 /** 895 * Returns the encoded scheme-specific part of this URI, or null if this URI 896 * has no scheme-specific part. 897 */ getRawSchemeSpecificPart()898 public String getRawSchemeSpecificPart() { 899 return schemeSpecificPart; 900 } 901 902 /** 903 * Returns the decoded authority part of this URI, or null if this URI has 904 * no authority. 905 */ getAuthority()906 public String getAuthority() { 907 return decode(authority); 908 } 909 910 /** 911 * Returns the encoded authority of this URI, or null if this URI has no 912 * authority. 913 */ getRawAuthority()914 public String getRawAuthority() { 915 return authority; 916 } 917 918 /** 919 * Returns the decoded user info of this URI, or null if this URI has no 920 * user info. 921 */ getUserInfo()922 public String getUserInfo() { 923 return decode(userInfo); 924 } 925 926 /** 927 * Returns the encoded user info of this URI, or null if this URI has no 928 * user info. 929 */ getRawUserInfo()930 public String getRawUserInfo() { 931 return userInfo; 932 } 933 934 /** 935 * Returns the host of this URI, or null if this URI has no host. 936 */ getHost()937 public String getHost() { 938 return host; 939 } 940 941 /** 942 * Returns the port number of this URI, or {@code -1} if this URI has no 943 * explicit port. 944 */ getPort()945 public int getPort() { 946 return port; 947 } 948 949 /** @hide */ getEffectivePort()950 public int getEffectivePort() { 951 return getEffectivePort(scheme, port); 952 } 953 954 /** 955 * Returns the port to use for {@code scheme} connections will use when 956 * {@link #getPort} returns {@code specifiedPort}. 957 * 958 * @hide 959 */ getEffectivePort(String scheme, int specifiedPort)960 public static int getEffectivePort(String scheme, int specifiedPort) { 961 if (specifiedPort != -1) { 962 return specifiedPort; 963 } 964 965 if ("http".equalsIgnoreCase(scheme)) { 966 return 80; 967 } else if ("https".equalsIgnoreCase(scheme)) { 968 return 443; 969 } else { 970 return -1; 971 } 972 } 973 974 /** 975 * Returns the decoded path of this URI, or null if this URI has no path. 976 */ getPath()977 public String getPath() { 978 return decode(path); 979 } 980 981 /** 982 * Returns the encoded path of this URI, or null if this URI has no path. 983 */ getRawPath()984 public String getRawPath() { 985 return path; 986 } 987 988 /** 989 * Returns the decoded query of this URI, or null if this URI has no query. 990 */ getQuery()991 public String getQuery() { 992 return decode(query); 993 } 994 995 /** 996 * Returns the encoded query of this URI, or null if this URI has no query. 997 */ getRawQuery()998 public String getRawQuery() { 999 return query; 1000 } 1001 1002 /** 1003 * Returns the decoded fragment of this URI, or null if this URI has no 1004 * fragment. 1005 */ getFragment()1006 public String getFragment() { 1007 return decode(fragment); 1008 } 1009 1010 /** 1011 * Gets the encoded fragment of this URI, or null if this URI has no 1012 * fragment. 1013 */ getRawFragment()1014 public String getRawFragment() { 1015 return fragment; 1016 } 1017 hashCode()1018 @Override public int hashCode() { 1019 if (hash == -1) { 1020 hash = getHashString().hashCode(); 1021 } 1022 return hash; 1023 } 1024 1025 /** 1026 * Returns true if this URI is absolute, which means that a scheme is 1027 * defined. 1028 */ isAbsolute()1029 public boolean isAbsolute() { 1030 // TODO: simplify to 'scheme != null' ? 1031 return absolute; 1032 } 1033 1034 /** 1035 * Returns true if this URI is opaque. Opaque URIs are absolute and have a 1036 * scheme-specific part that does not start with a slash character. All 1037 * parts except scheme, scheme-specific and fragment are undefined. 1038 */ isOpaque()1039 public boolean isOpaque() { 1040 return opaque; 1041 } 1042 1043 /** 1044 * Returns the normalized path. 1045 */ normalize(String path, boolean discardRelativePrefix)1046 private String normalize(String path, boolean discardRelativePrefix) { 1047 path = UrlUtils.canonicalizePath(path, discardRelativePrefix); 1048 1049 /* 1050 * If the path contains a colon before the first colon, prepend 1051 * "./" to differentiate the path from a scheme prefix. 1052 */ 1053 int colon = path.indexOf(':'); 1054 if (colon != -1) { 1055 int slash = path.indexOf('/'); 1056 if (slash == -1 || colon < slash) { 1057 path = "./" + path; 1058 } 1059 } 1060 1061 return path; 1062 } 1063 1064 /** 1065 * Normalizes the path part of this URI. 1066 * 1067 * @return an URI object which represents this instance with a normalized 1068 * path. 1069 */ normalize()1070 public URI normalize() { 1071 if (opaque) { 1072 return this; 1073 } 1074 String normalizedPath = normalize(path, false); 1075 // if the path is already normalized, return this 1076 if (path.equals(normalizedPath)) { 1077 return this; 1078 } 1079 // get an exact copy of the URI re-calculate the scheme specific part 1080 // since the path of the normalized URI is different from this URI. 1081 URI result = duplicate(); 1082 result.path = normalizedPath; 1083 result.setSchemeSpecificPart(); 1084 return result; 1085 } 1086 1087 /** 1088 * Tries to parse the authority component of this URI to divide it into the 1089 * host, port, and user-info. If this URI is already determined as a 1090 * ServerAuthority this instance will be returned without changes. 1091 * 1092 * @return this instance with the components of the parsed server authority. 1093 * @throws URISyntaxException 1094 * if the authority part could not be parsed as a server-based 1095 * authority. 1096 */ parseServerAuthority()1097 public URI parseServerAuthority() throws URISyntaxException { 1098 if (!serverAuthority) { 1099 parseAuthority(true); 1100 } 1101 return this; 1102 } 1103 1104 /** 1105 * Makes the given URI {@code relative} to a relative URI against the URI 1106 * represented by this instance. 1107 * 1108 * @param relative 1109 * the URI which has to be relativized against this URI. 1110 * @return the relative URI. 1111 */ relativize(URI relative)1112 public URI relativize(URI relative) { 1113 if (relative.opaque || opaque) { 1114 return relative; 1115 } 1116 1117 if (scheme == null ? relative.scheme != null : !scheme 1118 .equals(relative.scheme)) { 1119 return relative; 1120 } 1121 1122 if (authority == null ? relative.authority != null : !authority 1123 .equals(relative.authority)) { 1124 return relative; 1125 } 1126 1127 // normalize both paths 1128 String thisPath = normalize(path, false); 1129 String relativePath = normalize(relative.path, false); 1130 1131 /* 1132 * if the paths aren't equal, then we need to determine if this URI's 1133 * path is a parent path (begins with) the relative URI's path 1134 */ 1135 if (!thisPath.equals(relativePath)) { 1136 // drop everything after the last slash in this path 1137 thisPath = thisPath.substring(0, thisPath.lastIndexOf('/') + 1); 1138 1139 /* 1140 * if the relative URI's path doesn't start with this URI's path, 1141 * then just return the relative URI; the URIs have nothing in 1142 * common 1143 */ 1144 if (!relativePath.startsWith(thisPath)) { 1145 return relative; 1146 } 1147 } 1148 1149 URI result = new URI(); 1150 result.fragment = relative.fragment; 1151 result.query = relative.query; 1152 // the result URI is the remainder of the relative URI's path 1153 result.path = relativePath.substring(thisPath.length()); 1154 result.setSchemeSpecificPart(); 1155 return result; 1156 } 1157 1158 /** 1159 * Resolves the given URI {@code relative} against the URI represented by 1160 * this instance. 1161 * 1162 * @param relative 1163 * the URI which has to be resolved against this URI. 1164 * @return the resolved URI. 1165 */ resolve(URI relative)1166 public URI resolve(URI relative) { 1167 if (relative.absolute || opaque) { 1168 return relative; 1169 } 1170 1171 if (relative.authority != null) { 1172 // If the relative URI has an authority, the result is the relative 1173 // with this URI's scheme. 1174 URI result = relative.duplicate(); 1175 result.scheme = scheme; 1176 result.absolute = absolute; 1177 return result; 1178 } 1179 1180 if (relative.path.isEmpty() && relative.scheme == null && relative.query == null) { 1181 // if the relative URI only consists of at most a fragment, 1182 URI result = duplicate(); 1183 result.fragment = relative.fragment; 1184 return result; 1185 } 1186 1187 URI result = duplicate(); 1188 result.fragment = relative.fragment; 1189 result.query = relative.query; 1190 String resolvedPath; 1191 if (relative.path.startsWith("/")) { 1192 // The relative URI has an absolute path; use it. 1193 resolvedPath = relative.path; 1194 } else if (relative.path.isEmpty()) { 1195 // The relative URI has no path; use the base path. 1196 resolvedPath = path; 1197 } else { 1198 // The relative URI has a relative path; combine the paths. 1199 int endIndex = path.lastIndexOf('/') + 1; 1200 resolvedPath = path.substring(0, endIndex) + relative.path; 1201 } 1202 result.path = UrlUtils.authoritySafePath(result.authority, normalize(resolvedPath, true)); 1203 result.setSchemeSpecificPart(); 1204 return result; 1205 } 1206 1207 /** 1208 * Helper method used to re-calculate the scheme specific part of the 1209 * resolved or normalized URIs 1210 */ setSchemeSpecificPart()1211 private void setSchemeSpecificPart() { 1212 // ssp = [//authority][path][?query] 1213 StringBuilder ssp = new StringBuilder(); 1214 if (authority != null) { 1215 ssp.append("//" + authority); 1216 } 1217 if (path != null) { 1218 ssp.append(path); 1219 } 1220 if (query != null) { 1221 ssp.append("?" + query); 1222 } 1223 schemeSpecificPart = ssp.toString(); 1224 // reset string, so that it can be re-calculated correctly when asked. 1225 string = null; 1226 } 1227 1228 /** 1229 * Creates a new URI instance by parsing the given string {@code relative} 1230 * and resolves the created URI against the URI represented by this 1231 * instance. 1232 * 1233 * @param relative 1234 * the given string to create the new URI instance which has to 1235 * be resolved later on. 1236 * @return the created and resolved URI. 1237 */ resolve(String relative)1238 public URI resolve(String relative) { 1239 return resolve(create(relative)); 1240 } 1241 decode(String s)1242 private String decode(String s) { 1243 return s != null ? UriCodec.decode(s) : null; 1244 } 1245 1246 /** 1247 * Returns the textual string representation of this URI instance using the 1248 * US-ASCII encoding. 1249 * 1250 * @return the US-ASCII string representation of this URI. 1251 */ toASCIIString()1252 public String toASCIIString() { 1253 StringBuilder result = new StringBuilder(); 1254 ASCII_ONLY.appendEncoded(result, toString()); 1255 return result.toString(); 1256 } 1257 1258 /** 1259 * Returns the encoded URI. 1260 */ toString()1261 @Override public String toString() { 1262 if (string != null) { 1263 return string; 1264 } 1265 1266 StringBuilder result = new StringBuilder(); 1267 if (scheme != null) { 1268 result.append(scheme); 1269 result.append(':'); 1270 } 1271 if (opaque) { 1272 result.append(schemeSpecificPart); 1273 } else { 1274 if (authority != null) { 1275 result.append("//"); 1276 result.append(authority); 1277 } 1278 1279 if (path != null) { 1280 result.append(path); 1281 } 1282 1283 if (query != null) { 1284 result.append('?'); 1285 result.append(query); 1286 } 1287 } 1288 1289 if (fragment != null) { 1290 result.append('#'); 1291 result.append(fragment); 1292 } 1293 1294 string = result.toString(); 1295 return string; 1296 } 1297 1298 /* 1299 * Form a string from the components of this URI, similarly to the 1300 * toString() method. But this method converts scheme and host to lowercase, 1301 * and converts escaped octets to lowercase. 1302 */ getHashString()1303 private String getHashString() { 1304 StringBuilder result = new StringBuilder(); 1305 if (scheme != null) { 1306 result.append(scheme.toLowerCase(Locale.US)); 1307 result.append(':'); 1308 } 1309 if (opaque) { 1310 result.append(schemeSpecificPart); 1311 } else { 1312 if (authority != null) { 1313 result.append("//"); 1314 if (host == null) { 1315 result.append(authority); 1316 } else { 1317 if (userInfo != null) { 1318 result.append(userInfo + "@"); 1319 } 1320 result.append(host.toLowerCase(Locale.US)); 1321 if (port != -1) { 1322 result.append(":" + port); 1323 } 1324 } 1325 } 1326 1327 if (path != null) { 1328 result.append(path); 1329 } 1330 1331 if (query != null) { 1332 result.append('?'); 1333 result.append(query); 1334 } 1335 } 1336 1337 if (fragment != null) { 1338 result.append('#'); 1339 result.append(fragment); 1340 } 1341 1342 return convertHexToLowerCase(result.toString()); 1343 } 1344 1345 /** 1346 * Converts this URI instance to a URL. 1347 * 1348 * @return the created URL representing the same resource as this URI. 1349 * @throws MalformedURLException 1350 * if an error occurs while creating the URL or no protocol 1351 * handler could be found. 1352 */ toURL()1353 public URL toURL() throws MalformedURLException { 1354 if (!absolute) { 1355 throw new IllegalArgumentException("URI is not absolute: " + toString()); 1356 } 1357 return new URL(toString()); 1358 } 1359 readObject(ObjectInputStream in)1360 private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { 1361 in.defaultReadObject(); 1362 try { 1363 parseURI(string, false); 1364 } catch (URISyntaxException e) { 1365 throw new IOException(e.toString()); 1366 } 1367 } 1368 writeObject(ObjectOutputStream out)1369 private void writeObject(ObjectOutputStream out) throws IOException, ClassNotFoundException { 1370 // call toString() to ensure the value of string field is calculated 1371 toString(); 1372 out.defaultWriteObject(); 1373 } 1374 } 1375