• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Licensed to the Apache Software Foundation (ASF) under one or more
3  *  contributor license agreements.  See the NOTICE file distributed with
4  *  this work for additional information regarding copyright ownership.
5  *  The ASF licenses this file to You under the Apache License, Version 2.0
6  *  (the "License"); you may not use this file except in compliance with
7  *  the License.  You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  */
17 
18 package java.net;
19 
20 import java.io.IOException;
21 import java.io.ObjectInputStream;
22 import java.io.ObjectOutputStream;
23 import java.io.Serializable;
24 import java.util.Locale;
25 import libcore.net.UriCodec;
26 import libcore.net.url.UrlUtils;
27 
28 /**
29  * A Uniform Resource Identifier that identifies an abstract or physical
30  * resource, as specified by <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC
31  * 2396</a>.
32  *
33  * <h3>Parts of a URI</h3>
34  * A URI is composed of many parts. This class can both parse URI strings into
35  * parts and compose URI strings from parts. For example, consider the parts of
36  * this URI:
37  * {@code http://username:password@host:8080/directory/file?query#fragment}
38  * <table>
39  * <tr><th>Component                                            </th><th>Example value                                                      </th><th>Also known as</th></tr>
40  * <tr><td>{@link #getScheme() Scheme}                          </td><td>{@code http}                                                       </td><td>protocol</td></tr>
41  * <tr><td>{@link #getSchemeSpecificPart() Scheme-specific part}</td><td>{@code //username:password@host:8080/directory/file?query#fragment}</td><td></td></tr>
42  * <tr><td>{@link #getAuthority() Authority}                    </td><td>{@code username:password@host:8080}                                </td><td></td></tr>
43  * <tr><td>{@link #getUserInfo() User Info}                     </td><td>{@code username:password}                                          </td><td></td></tr>
44  * <tr><td>{@link #getHost() Host}                              </td><td>{@code host}                                                       </td><td></td></tr>
45  * <tr><td>{@link #getPort() Port}                              </td><td>{@code 8080}                                                       </td><td></td></tr>
46  * <tr><td>{@link #getPath() Path}                              </td><td>{@code /directory/file}                                            </td><td></td></tr>
47  * <tr><td>{@link #getQuery() Query}                            </td><td>{@code query}                                                      </td><td></td></tr>
48  * <tr><td>{@link #getFragment() Fragment}                      </td><td>{@code fragment}                                                   </td><td>ref</td></tr>
49  * </table>
50  *
51  * <h3>Absolute vs. Relative URIs</h3>
52  * URIs are either {@link #isAbsolute() absolute or relative}.
53  * <ul>
54  *     <li><strong>Absolute:</strong> {@code http://android.com/robots.txt}
55  *     <li><strong>Relative:</strong> {@code robots.txt}
56  * </ul>
57  *
58  * <p>Absolute URIs always have a scheme. If its scheme is supported by {@link
59  * URL}, you can use {@link #toURL} to convert an absolute URI to a URL.
60  *
61  * <p>Relative URIs do not have a scheme and cannot be converted to URLs. If you
62  * have the absolute URI that a relative URI is relative to, you can use {@link
63  * #resolve} to compute the referenced absolute URI. Symmetrically, you can use
64  * {@link #relativize} to compute the relative URI from one URI to another.
65  * <pre>   {@code
66  *   URI absolute = new URI("http://android.com/");
67  *   URI relative = new URI("robots.txt");
68  *   URI resolved = new URI("http://android.com/robots.txt");
69  *
70  *   // print "http://android.com/robots.txt"
71  *   System.out.println(absolute.resolve(relative));
72  *
73  *   // print "robots.txt"
74  *   System.out.println(absolute.relativize(resolved));
75  * }</pre>
76  *
77  * <h3>Opaque vs. Hierarchical URIs</h3>
78  * Absolute URIs are either {@link #isOpaque() opaque or hierarchical}. Relative
79  * URIs are always hierarchical.
80  * <ul>
81  *     <li><strong>Hierarchical:</strong> {@code http://android.com/robots.txt}
82  *     <li><strong>Opaque:</strong> {@code mailto:robots@example.com}
83  * </ul>
84  *
85  * <p>Opaque URIs have both a scheme and a scheme-specific part that does not
86  * begin with the slash character: {@code /}. The contents of the
87  * scheme-specific part of an opaque URI is not parsed so an opaque URI never
88  * has an authority, user info, host, port, path or query. An opaque URIs may
89  * have a fragment, however. A typical opaque URI is
90  * {@code mailto:robots@example.com}.
91  * <table>
92  * <tr><th>Component           </th><th>Example value             </th></tr>
93  * <tr><td>Scheme              </td><td>{@code mailto}            </td></tr>
94  * <tr><td>Scheme-specific part</td><td>{@code robots@example.com}</td></tr>
95  * <tr><td>Fragment            </td><td>                          </td></tr>
96  * </table>
97  * <p>Hierarchical URIs may have values for any URL component. They always
98  * have a non-null path, though that path may be the empty string.
99  *
100  * <h3>Encoding and Decoding URI Components</h3>
101  * Each component of a URI permits a limited set of legal characters. Other
102  * characters must first be <i>encoded</i> before they can be embedded in a URI.
103  * To recover the original characters from a URI, they may be <i>decoded</i>.
104  * <strong>Contrary to what you might expect,</strong> this class uses the
105  * term <i>raw</i> to refer to encoded strings. The non-<i>raw</i> accessors
106  * return decoded strings. For example, consider how this URI is decoded:
107  * {@code http://user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots#over%206%22}
108  * <table>
109  * <tr><th>Component           </th><th>Legal Characters                                                    </th><th>Other Constraints                                  </th><th>Raw Value                                                      </th><th>Value</th></tr>
110  * <tr><td>Scheme              </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code +-.}                  </td><td>First character must be in {@code a-z}, {@code A-Z}</td><td>                                                               </td><td>{@code http}</td></tr>
111  * <tr><td>Scheme-specific part</td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code //user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots}</td><td>{@code //user:pa55w?rd@host:80/doc|search?q=green robots}</td></tr>
112  * <tr><td>Authority           </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=@[]}  </td><td>Non-ASCII characters okay                          </td><td>{@code user:pa55w%3Frd@host:80}                                </td><td>{@code user:pa55w?rd@host:80}</td></tr>
113  * <tr><td>User Info           </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=}     </td><td>Non-ASCII characters okay                          </td><td>{@code user:pa55w%3Frd}                                        </td><td>{@code user:pa55w?rd}</td></tr>
114  * <tr><td>Host                </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code -.[]}                 </td><td>Domain name, IPv4 address or [IPv6 address]        </td><td>                                                               </td><td>host</td></tr>
115  * <tr><td>Port                </td><td>{@code 0-9}                                                         </td><td>                                                   </td><td>                                                               </td><td>{@code 80}</td></tr>
116  * <tr><td>Path                </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=/@}   </td><td>Non-ASCII characters okay                          </td><td>{@code /doc%7Csearch}                                          </td><td>{@code /doc|search}</td></tr>
117  * <tr><td>Query               </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code q=green%20robots}                                       </td><td>{@code q=green robots}</td></tr>
118  * <tr><td>Fragment            </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code over%206%22}                                            </td><td>{@code over 6"}</td></tr>
119  * </table>
120  * A URI's host, port and scheme are not eligible for encoding and must not
121  * contain illegal characters.
122  *
123  * <p>To encode a URI, invoke any of the multiple-parameter constructors of this
124  * class. These constructors accept your original strings and encode them into
125  * their raw form.
126  *
127  * <p>To decode a URI, invoke the single-string constructor, and then use the
128  * appropriate accessor methods to get the decoded components.
129  *
130  * <p>The {@link URL} class can be used to retrieve resources by their URI.
131  */
132 public final class URI implements Comparable<URI>, Serializable {
133 
134     private static final long serialVersionUID = -6052424284110960213l;
135 
136     static final String UNRESERVED = "_-!.~\'()*";
137     static final String PUNCTUATION = ",;:$&+=";
138 
139     static final UriCodec USER_INFO_ENCODER = new PartEncoder("");
140     static final UriCodec PATH_ENCODER = new PartEncoder("/@");
141     static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]");
142 
143     /** for java.net.URL, which foolishly combines these two parts */
144     static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?");
145 
146     /** for query, fragment, and scheme-specific part */
147     static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@");
148 
149     /** Retains all ASCII chars including delimiters. */
150     private static final UriCodec ASCII_ONLY = new UriCodec() {
151         @Override protected boolean isRetained(char c) {
152             return c <= 127;
153         }
154     };
155 
156     /**
157      * Encodes the unescaped characters of {@code s} that are not permitted.
158      * Permitted characters are:
159      * <ul>
160      *   <li>Unreserved characters in <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>.
161      *   <li>{@code extraOkayChars},
162      *   <li>non-ASCII, non-control, non-whitespace characters
163      * </ul>
164      */
165     private static class PartEncoder extends UriCodec {
166         private final String extraLegalCharacters;
167 
PartEncoder(String extraLegalCharacters)168         PartEncoder(String extraLegalCharacters) {
169             this.extraLegalCharacters = extraLegalCharacters;
170         }
171 
isRetained(char c)172         @Override protected boolean isRetained(char c) {
173             return UNRESERVED.indexOf(c) != -1
174                     || PUNCTUATION.indexOf(c) != -1
175                     || extraLegalCharacters.indexOf(c) != -1
176                     || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c));
177         }
178     }
179 
180     private String string;
181     private transient String scheme;
182     private transient String schemeSpecificPart;
183     private transient String authority;
184     private transient String userInfo;
185     private transient String host;
186     private transient int port = -1;
187     private transient String path;
188     private transient String query;
189     private transient String fragment;
190     private transient boolean opaque;
191     private transient boolean absolute;
192     private transient boolean serverAuthority = false;
193 
194     private transient int hash = -1;
195 
URI()196     private URI() {}
197 
198     /**
199      * Creates a new URI instance by parsing {@code spec}.
200      *
201      * @param spec a URI whose illegal characters have all been encoded.
202      */
URI(String spec)203     public URI(String spec) throws URISyntaxException {
204         parseURI(spec, false);
205     }
206 
207     /**
208      * Creates a new URI instance of the given unencoded component parts.
209      *
210      * @param scheme the URI scheme, or null for a non-absolute URI.
211      */
URI(String scheme, String schemeSpecificPart, String fragment)212     public URI(String scheme, String schemeSpecificPart, String fragment)
213             throws URISyntaxException {
214         StringBuilder uri = new StringBuilder();
215         if (scheme != null) {
216             uri.append(scheme);
217             uri.append(':');
218         }
219         if (schemeSpecificPart != null) {
220             ALL_LEGAL_ENCODER.appendEncoded(uri, schemeSpecificPart);
221         }
222         if (fragment != null) {
223             uri.append('#');
224             ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
225         }
226 
227         parseURI(uri.toString(), false);
228     }
229 
230     /**
231      * Creates a new URI instance of the given unencoded component parts.
232      *
233      * @param scheme the URI scheme, or null for a non-absolute URI.
234      */
URI(String scheme, String userInfo, String host, int port, String path, String query, String fragment)235     public URI(String scheme, String userInfo, String host, int port, String path, String query,
236             String fragment) throws URISyntaxException {
237         if (scheme == null && userInfo == null && host == null && path == null
238                 && query == null && fragment == null) {
239             this.path = "";
240             return;
241         }
242 
243         if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') {
244             throw new URISyntaxException(path, "Relative path");
245         }
246 
247         StringBuilder uri = new StringBuilder();
248         if (scheme != null) {
249             uri.append(scheme);
250             uri.append(':');
251         }
252 
253         if (userInfo != null || host != null || port != -1) {
254             uri.append("//");
255         }
256 
257         if (userInfo != null) {
258             USER_INFO_ENCODER.appendEncoded(uri, userInfo);
259             uri.append('@');
260         }
261 
262         if (host != null) {
263             // check for IPv6 addresses that hasn't been enclosed in square brackets
264             if (host.indexOf(':') != -1 && host.indexOf(']') == -1 && host.indexOf('[') == -1) {
265                 host = "[" + host + "]";
266             }
267             uri.append(host);
268         }
269 
270         if (port != -1) {
271             uri.append(':');
272             uri.append(port);
273         }
274 
275         if (path != null) {
276             PATH_ENCODER.appendEncoded(uri, path);
277         }
278 
279         if (query != null) {
280             uri.append('?');
281             ALL_LEGAL_ENCODER.appendEncoded(uri, query);
282         }
283 
284         if (fragment != null) {
285             uri.append('#');
286             ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
287         }
288 
289         parseURI(uri.toString(), true);
290     }
291 
292     /**
293      * Creates a new URI instance of the given unencoded component parts.
294      *
295      * @param scheme the URI scheme, or null for a non-absolute URI.
296      */
URI(String scheme, String host, String path, String fragment)297     public URI(String scheme, String host, String path, String fragment) throws URISyntaxException {
298         this(scheme, null, host, -1, path, null, fragment);
299     }
300 
301     /**
302      * Creates a new URI instance of the given unencoded component parts.
303      *
304      * @param scheme the URI scheme, or null for a non-absolute URI.
305      */
URI(String scheme, String authority, String path, String query, String fragment)306     public URI(String scheme, String authority, String path, String query,
307             String fragment) throws URISyntaxException {
308         if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') {
309             throw new URISyntaxException(path, "Relative path");
310         }
311 
312         StringBuilder uri = new StringBuilder();
313         if (scheme != null) {
314             uri.append(scheme);
315             uri.append(':');
316         }
317         if (authority != null) {
318             uri.append("//");
319             AUTHORITY_ENCODER.appendEncoded(uri, authority);
320         }
321 
322         if (path != null) {
323             PATH_ENCODER.appendEncoded(uri, path);
324         }
325         if (query != null) {
326             uri.append('?');
327             ALL_LEGAL_ENCODER.appendEncoded(uri, query);
328         }
329         if (fragment != null) {
330             uri.append('#');
331             ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
332         }
333 
334         parseURI(uri.toString(), false);
335     }
336 
337     /**
338      * Breaks uri into its component parts. This first splits URI into scheme,
339      * scheme-specific part and fragment:
340      *   [scheme:][scheme-specific part][#fragment]
341      *
342      * Then it breaks the scheme-specific part into authority, path and query:
343      *   [//authority][path][?query]
344      *
345      * Finally it delegates to parseAuthority to break the authority into user
346      * info, host and port:
347      *   [user-info@][host][:port]
348      */
parseURI(String uri, boolean forceServer)349     private void parseURI(String uri, boolean forceServer) throws URISyntaxException {
350         string = uri;
351 
352         // "#fragment"
353         int fragmentStart = UrlUtils.findFirstOf(uri, "#", 0, uri.length());
354         if (fragmentStart < uri.length()) {
355             fragment = ALL_LEGAL_ENCODER.validate(uri, fragmentStart + 1, uri.length(), "fragment");
356         }
357 
358         // scheme:
359         int start;
360         int colon = UrlUtils.findFirstOf(uri, ":", 0, fragmentStart);
361         if (colon < UrlUtils.findFirstOf(uri, "/?#", 0, fragmentStart)) {
362             absolute = true;
363             scheme = validateScheme(uri, colon);
364             start = colon + 1;
365 
366             if (start == fragmentStart) {
367                 throw new URISyntaxException(uri, "Scheme-specific part expected", start);
368             }
369 
370             // URIs with schemes followed by a non-/ char are opaque and need no further parsing.
371             if (!uri.regionMatches(start, "/", 0, 1)) {
372                 opaque = true;
373                 schemeSpecificPart = ALL_LEGAL_ENCODER.validate(
374                         uri, start, fragmentStart, "scheme specific part");
375                 return;
376             }
377         } else {
378             absolute = false;
379             start = 0;
380         }
381 
382         opaque = false;
383         schemeSpecificPart = uri.substring(start, fragmentStart);
384 
385         // "//authority"
386         int fileStart;
387         if (uri.regionMatches(start, "//", 0, 2)) {
388             int authorityStart = start + 2;
389             fileStart = UrlUtils.findFirstOf(uri, "/?", authorityStart, fragmentStart);
390             if (authorityStart == uri.length()) {
391                 throw new URISyntaxException(uri, "Authority expected", uri.length());
392             }
393             if (authorityStart < fileStart) {
394                 authority = AUTHORITY_ENCODER.validate(uri, authorityStart, fileStart, "authority");
395             }
396         } else {
397             fileStart = start;
398         }
399 
400         // "path"
401         int queryStart = UrlUtils.findFirstOf(uri, "?", fileStart, fragmentStart);
402         path = PATH_ENCODER.validate(uri, fileStart, queryStart, "path");
403 
404         // "?query"
405         if (queryStart < fragmentStart) {
406             query = ALL_LEGAL_ENCODER.validate(uri, queryStart + 1, fragmentStart, "query");
407         }
408 
409         parseAuthority(forceServer);
410     }
411 
validateScheme(String uri, int end)412     private String validateScheme(String uri, int end) throws URISyntaxException {
413         if (end == 0) {
414             throw new URISyntaxException(uri, "Scheme expected", 0);
415         }
416 
417         for (int i = 0; i < end; i++) {
418             if (!UrlUtils.isValidSchemeChar(i, uri.charAt(i))) {
419                 throw new URISyntaxException(uri, "Illegal character in scheme", 0);
420             }
421         }
422 
423         return uri.substring(0, end);
424     }
425 
426     /**
427      * Breaks this URI's authority into user info, host and port parts.
428      *   [user-info@][host][:port]
429      * If any part of this fails this method will give up and potentially leave
430      * these fields with their default values.
431      *
432      * @param forceServer true to always throw if the authority cannot be
433      *     parsed. If false, this method may still throw for some kinds of
434      *     errors; this unpredictable behavior is consistent with the RI.
435      */
parseAuthority(boolean forceServer)436     private void parseAuthority(boolean forceServer) throws URISyntaxException {
437         if (authority == null) {
438             return;
439         }
440 
441         String tempUserInfo = null;
442         String temp = authority;
443         int index = temp.indexOf('@');
444         int hostIndex = 0;
445         if (index != -1) {
446             // remove user info
447             tempUserInfo = temp.substring(0, index);
448             validateUserInfo(authority, tempUserInfo, 0);
449             temp = temp.substring(index + 1); // host[:port] is left
450             hostIndex = index + 1;
451         }
452 
453         index = temp.lastIndexOf(':');
454         int endIndex = temp.indexOf(']');
455 
456         String tempHost;
457         int tempPort = -1;
458         if (index != -1 && endIndex < index) {
459             // determine port and host
460             tempHost = temp.substring(0, index);
461 
462             if (index < (temp.length() - 1)) { // port part is not empty
463                 try {
464                     tempPort = Integer.parseInt(temp.substring(index + 1));
465                     if (tempPort < 0) {
466                         if (forceServer) {
467                             throw new URISyntaxException(authority,
468                                     "Invalid port number", hostIndex + index + 1);
469                         }
470                         return;
471                     }
472                 } catch (NumberFormatException e) {
473                     if (forceServer) {
474                         throw new URISyntaxException(authority,
475                                 "Invalid port number", hostIndex + index + 1);
476                     }
477                     return;
478                 }
479             }
480         } else {
481             tempHost = temp;
482         }
483 
484         if (tempHost.isEmpty()) {
485             if (forceServer) {
486                 throw new URISyntaxException(authority, "Expected host", hostIndex);
487             }
488             return;
489         }
490 
491         if (!isValidHost(forceServer, tempHost)) {
492             return;
493         }
494 
495         // this is a server based uri,
496         // fill in the userInfo, host and port fields
497         userInfo = tempUserInfo;
498         host = tempHost;
499         port = tempPort;
500         serverAuthority = true;
501     }
502 
validateUserInfo(String uri, String userInfo, int index)503     private void validateUserInfo(String uri, String userInfo, int index)
504             throws URISyntaxException {
505         for (int i = 0; i < userInfo.length(); i++) {
506             char ch = userInfo.charAt(i);
507             if (ch == ']' || ch == '[') {
508                 throw new URISyntaxException(uri, "Illegal character in userInfo", index + i);
509             }
510         }
511     }
512 
513     /**
514      * Returns true if {@code host} is a well-formed host name or IP address.
515      *
516      * @param forceServer true to always throw if the host cannot be parsed. If
517      *     false, this method may still throw for some kinds of errors; this
518      *     unpredictable behavior is consistent with the RI.
519      */
isValidHost(boolean forceServer, String host)520     private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException {
521         if (host.startsWith("[")) {
522             // IPv6 address
523             if (!host.endsWith("]")) {
524                 throw new URISyntaxException(host,
525                         "Expected a closing square bracket for IPv6 address", 0);
526             }
527             if (InetAddress.isNumeric(host)) {
528                 // If it's numeric, the presence of square brackets guarantees
529                 // that it's a numeric IPv6 address.
530                 return true;
531             }
532             throw new URISyntaxException(host, "Malformed IPv6 address");
533         }
534 
535         // '[' and ']' can only be the first char and last char
536         // of the host name
537         if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
538             throw new URISyntaxException(host, "Illegal character in host name", 0);
539         }
540 
541         int index = host.lastIndexOf('.');
542         if (index < 0 || index == host.length() - 1
543                 || !Character.isDigit(host.charAt(index + 1))) {
544             // domain name
545             if (isValidDomainName(host)) {
546                 return true;
547             }
548             if (forceServer) {
549                 throw new URISyntaxException(host, "Illegal character in host name", 0);
550             }
551             return false;
552         }
553 
554         // IPv4 address?
555         try {
556             InetAddress ia = InetAddress.parseNumericAddress(host);
557             if (ia instanceof Inet4Address) {
558                 return true;
559             }
560         } catch (IllegalArgumentException ignored) {
561         }
562 
563         if (forceServer) {
564             throw new URISyntaxException(host, "Malformed IPv4 address", 0);
565         }
566         return false;
567     }
568 
isValidDomainName(String host)569     private boolean isValidDomainName(String host) {
570         try {
571             UriCodec.validateSimple(host, "-.");
572         } catch (URISyntaxException e) {
573             return false;
574         }
575 
576         String lastLabel = null;
577         for (String token : host.split("\\.")) {
578             lastLabel = token;
579             if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) {
580                 return false;
581             }
582         }
583 
584         if (lastLabel == null) {
585             return false;
586         }
587 
588         if (!lastLabel.equals(host)) {
589             char ch = lastLabel.charAt(0);
590             if (ch >= '0' && ch <= '9') {
591                 return false;
592             }
593         }
594         return true;
595     }
596 
597     /**
598      * Compares this URI with the given argument {@code uri}. This method will
599      * return a negative value if this URI instance is less than the given
600      * argument and a positive value if this URI instance is greater than the
601      * given argument. The return value {@code 0} indicates that the two
602      * instances represent the same URI. To define the order the single parts of
603      * the URI are compared with each other. String components will be ordered
604      * in the natural case-sensitive way. A hierarchical URI is less than an
605      * opaque URI and if one part is {@code null} the URI with the undefined
606      * part is less than the other one.
607      *
608      * @param uri
609      *            the URI this instance has to compare with.
610      * @return the value representing the order of the two instances.
611      */
compareTo(URI uri)612     public int compareTo(URI uri) {
613         int ret;
614 
615         // compare schemes
616         if (scheme == null && uri.scheme != null) {
617             return -1;
618         } else if (scheme != null && uri.scheme == null) {
619             return 1;
620         } else if (scheme != null && uri.scheme != null) {
621             ret = scheme.compareToIgnoreCase(uri.scheme);
622             if (ret != 0) {
623                 return ret;
624             }
625         }
626 
627         // compare opacities
628         if (!opaque && uri.opaque) {
629             return -1;
630         } else if (opaque && !uri.opaque) {
631             return 1;
632         } else if (opaque && uri.opaque) {
633             ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart);
634             if (ret != 0) {
635                 return ret;
636             }
637         } else {
638 
639             // otherwise both must be hierarchical
640 
641             // compare authorities
642             if (authority != null && uri.authority == null) {
643                 return 1;
644             } else if (authority == null && uri.authority != null) {
645                 return -1;
646             } else if (authority != null && uri.authority != null) {
647                 if (host != null && uri.host != null) {
648                     // both are server based, so compare userInfo, host, port
649                     if (userInfo != null && uri.userInfo == null) {
650                         return 1;
651                     } else if (userInfo == null && uri.userInfo != null) {
652                         return -1;
653                     } else if (userInfo != null && uri.userInfo != null) {
654                         ret = userInfo.compareTo(uri.userInfo);
655                         if (ret != 0) {
656                             return ret;
657                         }
658                     }
659 
660                     // userInfo's are the same, compare hostname
661                     ret = host.compareToIgnoreCase(uri.host);
662                     if (ret != 0) {
663                         return ret;
664                     }
665 
666                     // compare port
667                     if (port != uri.port) {
668                         return port - uri.port;
669                     }
670                 } else { // one or both are registry based, compare the whole
671                     // authority
672                     ret = authority.compareTo(uri.authority);
673                     if (ret != 0) {
674                         return ret;
675                     }
676                 }
677             }
678 
679             // authorities are the same
680             // compare paths
681             ret = path.compareTo(uri.path);
682             if (ret != 0) {
683                 return ret;
684             }
685 
686             // compare queries
687 
688             if (query != null && uri.query == null) {
689                 return 1;
690             } else if (query == null && uri.query != null) {
691                 return -1;
692             } else if (query != null && uri.query != null) {
693                 ret = query.compareTo(uri.query);
694                 if (ret != 0) {
695                     return ret;
696                 }
697             }
698         }
699 
700         // everything else is identical, so compare fragments
701         if (fragment != null && uri.fragment == null) {
702             return 1;
703         } else if (fragment == null && uri.fragment != null) {
704             return -1;
705         } else if (fragment != null && uri.fragment != null) {
706             ret = fragment.compareTo(uri.fragment);
707             if (ret != 0) {
708                 return ret;
709             }
710         }
711 
712         // identical
713         return 0;
714     }
715 
716     /**
717      * Returns the URI formed by parsing {@code uri}. This method behaves
718      * identically to the string constructor but throws a different exception
719      * on failure. The constructor fails with a checked {@link
720      * URISyntaxException}; this method fails with an unchecked {@link
721      * IllegalArgumentException}.
722      */
create(String uri)723     public static URI create(String uri) {
724         try {
725             return new URI(uri);
726         } catch (URISyntaxException e) {
727             throw new IllegalArgumentException(e.getMessage());
728         }
729     }
730 
duplicate()731     private URI duplicate() {
732         URI clone = new URI();
733         clone.absolute = absolute;
734         clone.authority = authority;
735         clone.fragment = fragment;
736         clone.host = host;
737         clone.opaque = opaque;
738         clone.path = path;
739         clone.port = port;
740         clone.query = query;
741         clone.scheme = scheme;
742         clone.schemeSpecificPart = schemeSpecificPart;
743         clone.userInfo = userInfo;
744         clone.serverAuthority = serverAuthority;
745         return clone;
746     }
747 
748     /*
749      * Takes a string that may contain hex sequences like %F1 or %2b and
750      * converts the hex values following the '%' to lowercase
751      */
convertHexToLowerCase(String s)752     private String convertHexToLowerCase(String s) {
753         StringBuilder result = new StringBuilder("");
754         if (s.indexOf('%') == -1) {
755             return s;
756         }
757 
758         int index, prevIndex = 0;
759         while ((index = s.indexOf('%', prevIndex)) != -1) {
760             result.append(s.substring(prevIndex, index + 1));
761             result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US));
762             index += 3;
763             prevIndex = index;
764         }
765         return result.toString();
766     }
767 
768     /**
769      * Returns true if {@code first} and {@code second} are equal after
770      * unescaping hex sequences like %F1 and %2b.
771      */
escapedEquals(String first, String second)772     private boolean escapedEquals(String first, String second) {
773         if (first.indexOf('%') != second.indexOf('%')) {
774             return first.equals(second);
775         }
776 
777         int index, prevIndex = 0;
778         while ((index = first.indexOf('%', prevIndex)) != -1
779                 && second.indexOf('%', prevIndex) == index) {
780             boolean match = first.substring(prevIndex, index).equals(
781                     second.substring(prevIndex, index));
782             if (!match) {
783                 return false;
784             }
785 
786             match = first.substring(index + 1, index + 3).equalsIgnoreCase(
787                     second.substring(index + 1, index + 3));
788             if (!match) {
789                 return false;
790             }
791 
792             index += 3;
793             prevIndex = index;
794         }
795         return first.substring(prevIndex).equals(second.substring(prevIndex));
796     }
797 
equals(Object o)798     @Override public boolean equals(Object o) {
799         if (!(o instanceof URI)) {
800             return false;
801         }
802         URI uri = (URI) o;
803 
804         if (uri.fragment == null && fragment != null || uri.fragment != null
805                 && fragment == null) {
806             return false;
807         } else if (uri.fragment != null && fragment != null) {
808             if (!escapedEquals(uri.fragment, fragment)) {
809                 return false;
810             }
811         }
812 
813         if (uri.scheme == null && scheme != null || uri.scheme != null
814                 && scheme == null) {
815             return false;
816         } else if (uri.scheme != null && scheme != null) {
817             if (!uri.scheme.equalsIgnoreCase(scheme)) {
818                 return false;
819             }
820         }
821 
822         if (uri.opaque && opaque) {
823             return escapedEquals(uri.schemeSpecificPart,
824                     schemeSpecificPart);
825         } else if (!uri.opaque && !opaque) {
826             if (!escapedEquals(path, uri.path)) {
827                 return false;
828             }
829 
830             if (uri.query != null && query == null || uri.query == null
831                     && query != null) {
832                 return false;
833             } else if (uri.query != null && query != null) {
834                 if (!escapedEquals(uri.query, query)) {
835                     return false;
836                 }
837             }
838 
839             if (uri.authority != null && authority == null
840                     || uri.authority == null && authority != null) {
841                 return false;
842             } else if (uri.authority != null && authority != null) {
843                 if (uri.host != null && host == null || uri.host == null
844                         && host != null) {
845                     return false;
846                 } else if (uri.host == null && host == null) {
847                     // both are registry based, so compare the whole authority
848                     return escapedEquals(uri.authority, authority);
849                 } else { // uri.host != null && host != null, so server-based
850                     if (!host.equalsIgnoreCase(uri.host)) {
851                         return false;
852                     }
853 
854                     if (port != uri.port) {
855                         return false;
856                     }
857 
858                     if (uri.userInfo != null && userInfo == null
859                             || uri.userInfo == null && userInfo != null) {
860                         return false;
861                     } else if (uri.userInfo != null && userInfo != null) {
862                         return escapedEquals(userInfo, uri.userInfo);
863                     } else {
864                         return true;
865                     }
866                 }
867             } else {
868                 // no authority
869                 return true;
870             }
871 
872         } else {
873             // one is opaque, the other hierarchical
874             return false;
875         }
876     }
877 
878     /**
879      * Returns the scheme of this URI, or null if this URI has no scheme. This
880      * is also known as the protocol.
881      */
getScheme()882     public String getScheme() {
883         return scheme;
884     }
885 
886     /**
887      * Returns the decoded scheme-specific part of this URI, or null if this URI
888      * has no scheme-specific part.
889      */
getSchemeSpecificPart()890     public String getSchemeSpecificPart() {
891         return decode(schemeSpecificPart);
892     }
893 
894     /**
895      * Returns the encoded scheme-specific part of this URI, or null if this URI
896      * has no scheme-specific part.
897      */
getRawSchemeSpecificPart()898     public String getRawSchemeSpecificPart() {
899         return schemeSpecificPart;
900     }
901 
902     /**
903      * Returns the decoded authority part of this URI, or null if this URI has
904      * no authority.
905      */
getAuthority()906     public String getAuthority() {
907         return decode(authority);
908     }
909 
910     /**
911      * Returns the encoded authority of this URI, or null if this URI has no
912      * authority.
913      */
getRawAuthority()914     public String getRawAuthority() {
915         return authority;
916     }
917 
918     /**
919      * Returns the decoded user info of this URI, or null if this URI has no
920      * user info.
921      */
getUserInfo()922     public String getUserInfo() {
923         return decode(userInfo);
924     }
925 
926     /**
927      * Returns the encoded user info of this URI, or null if this URI has no
928      * user info.
929      */
getRawUserInfo()930     public String getRawUserInfo() {
931         return userInfo;
932     }
933 
934     /**
935      * Returns the host of this URI, or null if this URI has no host.
936      */
getHost()937     public String getHost() {
938         return host;
939     }
940 
941     /**
942      * Returns the port number of this URI, or {@code -1} if this URI has no
943      * explicit port.
944      */
getPort()945     public int getPort() {
946         return port;
947     }
948 
949     /** @hide */
getEffectivePort()950     public int getEffectivePort() {
951         return getEffectivePort(scheme, port);
952     }
953 
954     /**
955      * Returns the port to use for {@code scheme} connections will use when
956      * {@link #getPort} returns {@code specifiedPort}.
957      *
958      * @hide
959      */
getEffectivePort(String scheme, int specifiedPort)960     public static int getEffectivePort(String scheme, int specifiedPort) {
961         if (specifiedPort != -1) {
962             return specifiedPort;
963         }
964 
965         if ("http".equalsIgnoreCase(scheme)) {
966             return 80;
967         } else if ("https".equalsIgnoreCase(scheme)) {
968             return 443;
969         } else {
970             return -1;
971         }
972     }
973 
974     /**
975      * Returns the decoded path of this URI, or null if this URI has no path.
976      */
getPath()977     public String getPath() {
978         return decode(path);
979     }
980 
981     /**
982      * Returns the encoded path of this URI, or null if this URI has no path.
983      */
getRawPath()984     public String getRawPath() {
985         return path;
986     }
987 
988     /**
989      * Returns the decoded query of this URI, or null if this URI has no query.
990      */
getQuery()991     public String getQuery() {
992         return decode(query);
993     }
994 
995     /**
996      * Returns the encoded query of this URI, or null if this URI has no query.
997      */
getRawQuery()998     public String getRawQuery() {
999         return query;
1000     }
1001 
1002     /**
1003      * Returns the decoded fragment of this URI, or null if this URI has no
1004      * fragment.
1005      */
getFragment()1006     public String getFragment() {
1007         return decode(fragment);
1008     }
1009 
1010     /**
1011      * Gets the encoded fragment of this URI, or null if this URI has no
1012      * fragment.
1013      */
getRawFragment()1014     public String getRawFragment() {
1015         return fragment;
1016     }
1017 
hashCode()1018     @Override public int hashCode() {
1019         if (hash == -1) {
1020             hash = getHashString().hashCode();
1021         }
1022         return hash;
1023     }
1024 
1025     /**
1026      * Returns true if this URI is absolute, which means that a scheme is
1027      * defined.
1028      */
isAbsolute()1029     public boolean isAbsolute() {
1030         // TODO: simplify to 'scheme != null' ?
1031         return absolute;
1032     }
1033 
1034     /**
1035      * Returns true if this URI is opaque. Opaque URIs are absolute and have a
1036      * scheme-specific part that does not start with a slash character. All
1037      * parts except scheme, scheme-specific and fragment are undefined.
1038      */
isOpaque()1039     public boolean isOpaque() {
1040         return opaque;
1041     }
1042 
1043     /**
1044      * Returns the normalized path.
1045      */
normalize(String path, boolean discardRelativePrefix)1046     private String normalize(String path, boolean discardRelativePrefix) {
1047         path = UrlUtils.canonicalizePath(path, discardRelativePrefix);
1048 
1049         /*
1050          * If the path contains a colon before the first colon, prepend
1051          * "./" to differentiate the path from a scheme prefix.
1052          */
1053         int colon = path.indexOf(':');
1054         if (colon != -1) {
1055             int slash = path.indexOf('/');
1056             if (slash == -1 || colon < slash) {
1057                 path = "./" + path;
1058             }
1059         }
1060 
1061         return path;
1062     }
1063 
1064     /**
1065      * Normalizes the path part of this URI.
1066      *
1067      * @return an URI object which represents this instance with a normalized
1068      *         path.
1069      */
normalize()1070     public URI normalize() {
1071         if (opaque) {
1072             return this;
1073         }
1074         String normalizedPath = normalize(path, false);
1075         // if the path is already normalized, return this
1076         if (path.equals(normalizedPath)) {
1077             return this;
1078         }
1079         // get an exact copy of the URI re-calculate the scheme specific part
1080         // since the path of the normalized URI is different from this URI.
1081         URI result = duplicate();
1082         result.path = normalizedPath;
1083         result.setSchemeSpecificPart();
1084         return result;
1085     }
1086 
1087     /**
1088      * Tries to parse the authority component of this URI to divide it into the
1089      * host, port, and user-info. If this URI is already determined as a
1090      * ServerAuthority this instance will be returned without changes.
1091      *
1092      * @return this instance with the components of the parsed server authority.
1093      * @throws URISyntaxException
1094      *             if the authority part could not be parsed as a server-based
1095      *             authority.
1096      */
parseServerAuthority()1097     public URI parseServerAuthority() throws URISyntaxException {
1098         if (!serverAuthority) {
1099             parseAuthority(true);
1100         }
1101         return this;
1102     }
1103 
1104     /**
1105      * Makes the given URI {@code relative} to a relative URI against the URI
1106      * represented by this instance.
1107      *
1108      * @param relative
1109      *            the URI which has to be relativized against this URI.
1110      * @return the relative URI.
1111      */
relativize(URI relative)1112     public URI relativize(URI relative) {
1113         if (relative.opaque || opaque) {
1114             return relative;
1115         }
1116 
1117         if (scheme == null ? relative.scheme != null : !scheme
1118                 .equals(relative.scheme)) {
1119             return relative;
1120         }
1121 
1122         if (authority == null ? relative.authority != null : !authority
1123                 .equals(relative.authority)) {
1124             return relative;
1125         }
1126 
1127         // normalize both paths
1128         String thisPath = normalize(path, false);
1129         String relativePath = normalize(relative.path, false);
1130 
1131         /*
1132          * if the paths aren't equal, then we need to determine if this URI's
1133          * path is a parent path (begins with) the relative URI's path
1134          */
1135         if (!thisPath.equals(relativePath)) {
1136             // drop everything after the last slash in this path
1137             thisPath = thisPath.substring(0, thisPath.lastIndexOf('/') + 1);
1138 
1139             /*
1140              * if the relative URI's path doesn't start with this URI's path,
1141              * then just return the relative URI; the URIs have nothing in
1142              * common
1143              */
1144             if (!relativePath.startsWith(thisPath)) {
1145                 return relative;
1146             }
1147         }
1148 
1149         URI result = new URI();
1150         result.fragment = relative.fragment;
1151         result.query = relative.query;
1152         // the result URI is the remainder of the relative URI's path
1153         result.path = relativePath.substring(thisPath.length());
1154         result.setSchemeSpecificPart();
1155         return result;
1156     }
1157 
1158     /**
1159      * Resolves the given URI {@code relative} against the URI represented by
1160      * this instance.
1161      *
1162      * @param relative
1163      *            the URI which has to be resolved against this URI.
1164      * @return the resolved URI.
1165      */
resolve(URI relative)1166     public URI resolve(URI relative) {
1167         if (relative.absolute || opaque) {
1168             return relative;
1169         }
1170 
1171         if (relative.authority != null) {
1172             // If the relative URI has an authority, the result is the relative
1173             // with this URI's scheme.
1174             URI result = relative.duplicate();
1175             result.scheme = scheme;
1176             result.absolute = absolute;
1177             return result;
1178         }
1179 
1180         if (relative.path.isEmpty() && relative.scheme == null && relative.query == null) {
1181             // if the relative URI only consists of at most a fragment,
1182             URI result = duplicate();
1183             result.fragment = relative.fragment;
1184             return result;
1185         }
1186 
1187         URI result = duplicate();
1188         result.fragment = relative.fragment;
1189         result.query = relative.query;
1190         String resolvedPath;
1191         if (relative.path.startsWith("/")) {
1192             // The relative URI has an absolute path; use it.
1193             resolvedPath = relative.path;
1194         } else if (relative.path.isEmpty()) {
1195             // The relative URI has no path; use the base path.
1196             resolvedPath = path;
1197         } else {
1198             // The relative URI has a relative path; combine the paths.
1199             int endIndex = path.lastIndexOf('/') + 1;
1200             resolvedPath = path.substring(0, endIndex) + relative.path;
1201         }
1202         result.path = UrlUtils.authoritySafePath(result.authority, normalize(resolvedPath, true));
1203         result.setSchemeSpecificPart();
1204         return result;
1205     }
1206 
1207     /**
1208      * Helper method used to re-calculate the scheme specific part of the
1209      * resolved or normalized URIs
1210      */
setSchemeSpecificPart()1211     private void setSchemeSpecificPart() {
1212         // ssp = [//authority][path][?query]
1213         StringBuilder ssp = new StringBuilder();
1214         if (authority != null) {
1215             ssp.append("//" + authority);
1216         }
1217         if (path != null) {
1218             ssp.append(path);
1219         }
1220         if (query != null) {
1221             ssp.append("?" + query);
1222         }
1223         schemeSpecificPart = ssp.toString();
1224         // reset string, so that it can be re-calculated correctly when asked.
1225         string = null;
1226     }
1227 
1228     /**
1229      * Creates a new URI instance by parsing the given string {@code relative}
1230      * and resolves the created URI against the URI represented by this
1231      * instance.
1232      *
1233      * @param relative
1234      *            the given string to create the new URI instance which has to
1235      *            be resolved later on.
1236      * @return the created and resolved URI.
1237      */
resolve(String relative)1238     public URI resolve(String relative) {
1239         return resolve(create(relative));
1240     }
1241 
decode(String s)1242     private String decode(String s) {
1243         return s != null ? UriCodec.decode(s) : null;
1244     }
1245 
1246     /**
1247      * Returns the textual string representation of this URI instance using the
1248      * US-ASCII encoding.
1249      *
1250      * @return the US-ASCII string representation of this URI.
1251      */
toASCIIString()1252     public String toASCIIString() {
1253         StringBuilder result = new StringBuilder();
1254         ASCII_ONLY.appendEncoded(result, toString());
1255         return result.toString();
1256     }
1257 
1258     /**
1259      * Returns the encoded URI.
1260      */
toString()1261     @Override public String toString() {
1262         if (string != null) {
1263             return string;
1264         }
1265 
1266         StringBuilder result = new StringBuilder();
1267         if (scheme != null) {
1268             result.append(scheme);
1269             result.append(':');
1270         }
1271         if (opaque) {
1272             result.append(schemeSpecificPart);
1273         } else {
1274             if (authority != null) {
1275                 result.append("//");
1276                 result.append(authority);
1277             }
1278 
1279             if (path != null) {
1280                 result.append(path);
1281             }
1282 
1283             if (query != null) {
1284                 result.append('?');
1285                 result.append(query);
1286             }
1287         }
1288 
1289         if (fragment != null) {
1290             result.append('#');
1291             result.append(fragment);
1292         }
1293 
1294         string = result.toString();
1295         return string;
1296     }
1297 
1298     /*
1299      * Form a string from the components of this URI, similarly to the
1300      * toString() method. But this method converts scheme and host to lowercase,
1301      * and converts escaped octets to lowercase.
1302      */
getHashString()1303     private String getHashString() {
1304         StringBuilder result = new StringBuilder();
1305         if (scheme != null) {
1306             result.append(scheme.toLowerCase(Locale.US));
1307             result.append(':');
1308         }
1309         if (opaque) {
1310             result.append(schemeSpecificPart);
1311         } else {
1312             if (authority != null) {
1313                 result.append("//");
1314                 if (host == null) {
1315                     result.append(authority);
1316                 } else {
1317                     if (userInfo != null) {
1318                         result.append(userInfo + "@");
1319                     }
1320                     result.append(host.toLowerCase(Locale.US));
1321                     if (port != -1) {
1322                         result.append(":" + port);
1323                     }
1324                 }
1325             }
1326 
1327             if (path != null) {
1328                 result.append(path);
1329             }
1330 
1331             if (query != null) {
1332                 result.append('?');
1333                 result.append(query);
1334             }
1335         }
1336 
1337         if (fragment != null) {
1338             result.append('#');
1339             result.append(fragment);
1340         }
1341 
1342         return convertHexToLowerCase(result.toString());
1343     }
1344 
1345     /**
1346      * Converts this URI instance to a URL.
1347      *
1348      * @return the created URL representing the same resource as this URI.
1349      * @throws MalformedURLException
1350      *             if an error occurs while creating the URL or no protocol
1351      *             handler could be found.
1352      */
toURL()1353     public URL toURL() throws MalformedURLException {
1354         if (!absolute) {
1355             throw new IllegalArgumentException("URI is not absolute: " + toString());
1356         }
1357         return new URL(toString());
1358     }
1359 
readObject(ObjectInputStream in)1360     private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
1361         in.defaultReadObject();
1362         try {
1363             parseURI(string, false);
1364         } catch (URISyntaxException e) {
1365             throw new IOException(e.toString());
1366         }
1367     }
1368 
writeObject(ObjectOutputStream out)1369     private void writeObject(ObjectOutputStream out) throws IOException, ClassNotFoundException {
1370         // call toString() to ensure the value of string field is calculated
1371         toString();
1372         out.defaultWriteObject();
1373     }
1374 }
1375