• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Licensed to the Apache Software Foundation (ASF) under one or more
3  *  contributor license agreements.  See the NOTICE file distributed with
4  *  this work for additional information regarding copyright ownership.
5  *  The ASF licenses this file to You under the Apache License, Version 2.0
6  *  (the "License"); you may not use this file except in compliance with
7  *  the License.  You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  */
17 
18 package java.net;
19 
20 import java.io.IOException;
21 import java.io.ObjectInputStream;
22 import java.io.ObjectOutputStream;
23 import java.io.Serializable;
24 import java.util.Locale;
25 import libcore.net.UriCodec;
26 import libcore.net.url.UrlUtils;
27 
28 /**
29  * A Uniform Resource Identifier that identifies an abstract or physical
30  * resource, as specified by <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC
31  * 2396</a>.
32  *
33  * <h3>Parts of a URI</h3>
34  * A URI is composed of many parts. This class can both parse URI strings into
35  * parts and compose URI strings from parts. For example, consider the parts of
36  * this URI:
37  * {@code http://username:password@host:8080/directory/file?query#fragment}
38  * <table>
39  * <tr><th>Component                                            </th><th>Example value                                                      </th><th>Also known as</th></tr>
40  * <tr><td>{@link #getScheme() Scheme}                          </td><td>{@code http}                                                       </td><td>protocol</td></tr>
41  * <tr><td>{@link #getSchemeSpecificPart() Scheme-specific part}</td><td>{@code //username:password@host:8080/directory/file?query#fragment}</td><td></td></tr>
42  * <tr><td>{@link #getAuthority() Authority}                    </td><td>{@code username:password@host:8080}                                </td><td></td></tr>
43  * <tr><td>{@link #getUserInfo() User Info}                     </td><td>{@code username:password}                                          </td><td></td></tr>
44  * <tr><td>{@link #getHost() Host}                              </td><td>{@code host}                                                       </td><td></td></tr>
45  * <tr><td>{@link #getPort() Port}                              </td><td>{@code 8080}                                                       </td><td></td></tr>
46  * <tr><td>{@link #getPath() Path}                              </td><td>{@code /directory/file}                                            </td><td></td></tr>
47  * <tr><td>{@link #getQuery() Query}                            </td><td>{@code query}                                                      </td><td></td></tr>
48  * <tr><td>{@link #getFragment() Fragment}                      </td><td>{@code fragment}                                                   </td><td>ref</td></tr>
49  * </table>
50  *
51  * <h3>Absolute vs. Relative URIs</h3>
52  * URIs are either {@link #isAbsolute() absolute or relative}.
53  * <ul>
54  *     <li><strong>Absolute:</strong> {@code http://android.com/robots.txt}
55  *     <li><strong>Relative:</strong> {@code robots.txt}
56  * </ul>
57  *
58  * <p>Absolute URIs always have a scheme. If its scheme is supported by {@link
59  * URL}, you can use {@link #toURL} to convert an absolute URI to a URL.
60  *
61  * <p>Relative URIs do not have a scheme and cannot be converted to URLs. If you
62  * have the absolute URI that a relative URI is relative to, you can use {@link
63  * #resolve} to compute the referenced absolute URI. Symmetrically, you can use
64  * {@link #relativize} to compute the relative URI from one URI to another.
65  * <pre>   {@code
66  *   URI absolute = new URI("http://android.com/");
67  *   URI relative = new URI("robots.txt");
68  *   URI resolved = new URI("http://android.com/robots.txt");
69  *
70  *   // print "http://android.com/robots.txt"
71  *   System.out.println(absolute.resolve(relative));
72  *
73  *   // print "robots.txt"
74  *   System.out.println(absolute.relativize(resolved));
75  * }</pre>
76  *
77  * <h3>Opaque vs. Hierarchical URIs</h3>
78  * Absolute URIs are either {@link #isOpaque() opaque or hierarchical}. Relative
79  * URIs are always hierarchical.
80  * <ul>
81  *     <li><strong>Hierarchical:</strong> {@code http://android.com/robots.txt}
82  *     <li><strong>Opaque:</strong> {@code mailto:robots@example.com}
83  * </ul>
84  *
85  * <p>Opaque URIs have both a scheme and a scheme-specific part that does not
86  * begin with the slash character: {@code /}. The contents of the
87  * scheme-specific part of an opaque URI is not parsed so an opaque URI never
88  * has an authority, user info, host, port, path or query. An opaque URIs may
89  * have a fragment, however. A typical opaque URI is
90  * {@code mailto:robots@example.com}.
91  * <table>
92  * <tr><th>Component           </th><th>Example value             </th></tr>
93  * <tr><td>Scheme              </td><td>{@code mailto}            </td></tr>
94  * <tr><td>Scheme-specific part</td><td>{@code robots@example.com}</td></tr>
95  * <tr><td>Fragment            </td><td>                          </td></tr>
96  * </table>
97  * <p>Hierarchical URIs may have values for any URL component. They always
98  * have a non-null path, though that path may be the empty string.
99  *
100  * <h3>Encoding and Decoding URI Components</h3>
101  * Each component of a URI permits a limited set of legal characters. Other
102  * characters must first be <i>encoded</i> before they can be embedded in a URI.
103  * To recover the original characters from a URI, they may be <i>decoded</i>.
104  * <strong>Contrary to what you might expect,</strong> this class uses the
105  * term <i>raw</i> to refer to encoded strings. The non-<i>raw</i> accessors
106  * return decoded strings. For example, consider how this URI is decoded:
107  * {@code http://user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots#over%206%22}
108  * <table>
109  * <tr><th>Component           </th><th>Legal Characters                                                    </th><th>Other Constraints                                  </th><th>Raw Value                                                      </th><th>Value</th></tr>
110  * <tr><td>Scheme              </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code +-.}                  </td><td>First character must be in {@code a-z}, {@code A-Z}</td><td>                                                               </td><td>{@code http}</td></tr>
111  * <tr><td>Scheme-specific part</td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code //user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots}</td><td>{@code //user:pa55w?rd@host:80/doc|search?q=green robots}</td></tr>
112  * <tr><td>Authority           </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=@[]}  </td><td>Non-ASCII characters okay                          </td><td>{@code user:pa55w%3Frd@host:80}                                </td><td>{@code user:pa55w?rd@host:80}</td></tr>
113  * <tr><td>User Info           </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=}     </td><td>Non-ASCII characters okay                          </td><td>{@code user:pa55w%3Frd}                                        </td><td>{@code user:pa55w?rd}</td></tr>
114  * <tr><td>Host                </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code -.[]}                 </td><td>Domain name, IPv4 address or [IPv6 address]        </td><td>                                                               </td><td>host</td></tr>
115  * <tr><td>Port                </td><td>{@code 0-9}                                                         </td><td>                                                   </td><td>                                                               </td><td>{@code 80}</td></tr>
116  * <tr><td>Path                </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=/@}   </td><td>Non-ASCII characters okay                          </td><td>{@code /doc%7Csearch}                                          </td><td>{@code /doc|search}</td></tr>
117  * <tr><td>Query               </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code q=green%20robots}                                       </td><td>{@code q=green robots}</td></tr>
118  * <tr><td>Fragment            </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code over%206%22}                                            </td><td>{@code over 6"}</td></tr>
119  * </table>
120  * A URI's host, port and scheme are not eligible for encoding and must not
121  * contain illegal characters.
122  *
123  * <p>To encode a URI, invoke any of the multiple-parameter constructors of this
124  * class. These constructors accept your original strings and encode them into
125  * their raw form.
126  *
127  * <p>To decode a URI, invoke the single-string constructor, and then use the
128  * appropriate accessor methods to get the decoded components.
129  *
130  * <p>The {@link URL} class can be used to retrieve resources by their URI.
131  */
132 public final class URI implements Comparable<URI>, Serializable {
133 
134     private static final long serialVersionUID = -6052424284110960213l;
135 
136     static final String UNRESERVED = "_-!.~\'()*";
137     static final String PUNCTUATION = ",;:$&+=";
138 
139     static final UriCodec USER_INFO_ENCODER = new PartEncoder("");
140     static final UriCodec PATH_ENCODER = new PartEncoder("/@");
141     static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]");
142 
143     /** for java.net.URL, which foolishly combines these two parts */
144     static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?");
145 
146     /** for query, fragment, and scheme-specific part */
147     static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@");
148 
149     /** Retains all ASCII chars including delimiters. */
150     private static final UriCodec ASCII_ONLY = new UriCodec() {
151         @Override protected boolean isRetained(char c) {
152             return c <= 127;
153         }
154     };
155 
156     /**
157      * Encodes the unescaped characters of {@code s} that are not permitted.
158      * Permitted characters are:
159      * <ul>
160      *   <li>Unreserved characters in <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>.
161      *   <li>{@code extraOkayChars},
162      *   <li>non-ASCII, non-control, non-whitespace characters
163      * </ul>
164      */
165     private static class PartEncoder extends UriCodec {
166         private final String extraLegalCharacters;
167 
PartEncoder(String extraLegalCharacters)168         PartEncoder(String extraLegalCharacters) {
169             this.extraLegalCharacters = extraLegalCharacters;
170         }
171 
isRetained(char c)172         @Override protected boolean isRetained(char c) {
173             return UNRESERVED.indexOf(c) != -1
174                     || PUNCTUATION.indexOf(c) != -1
175                     || extraLegalCharacters.indexOf(c) != -1
176                     || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c));
177         }
178     }
179 
180     private String string;
181     private transient String scheme;
182     private transient String schemeSpecificPart;
183     private transient String authority;
184     private transient String userInfo;
185     private transient String host;
186     private transient int port = -1;
187     private transient String path;
188     private transient String query;
189     private transient String fragment;
190     private transient boolean opaque;
191     private transient boolean absolute;
192     private transient boolean serverAuthority = false;
193 
194     private transient int hash = -1;
195 
URI()196     private URI() {}
197 
198     /**
199      * Creates a new URI instance by parsing {@code spec}.
200      *
201      * @param spec a URI whose illegal characters have all been encoded.
202      */
URI(String spec)203     public URI(String spec) throws URISyntaxException {
204         parseURI(spec, false);
205     }
206 
207     /**
208      * Creates a new URI instance of the given unencoded component parts.
209      *
210      * @param scheme the URI scheme, or null for a non-absolute URI.
211      */
URI(String scheme, String schemeSpecificPart, String fragment)212     public URI(String scheme, String schemeSpecificPart, String fragment)
213             throws URISyntaxException {
214         StringBuilder uri = new StringBuilder();
215         if (scheme != null) {
216             uri.append(scheme);
217             uri.append(':');
218         }
219         if (schemeSpecificPart != null) {
220             ALL_LEGAL_ENCODER.appendEncoded(uri, schemeSpecificPart);
221         }
222         if (fragment != null) {
223             uri.append('#');
224             ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
225         }
226 
227         parseURI(uri.toString(), false);
228     }
229 
230     /**
231      * Creates a new URI instance of the given unencoded component parts.
232      *
233      * @param scheme the URI scheme, or null for a non-absolute URI.
234      */
URI(String scheme, String userInfo, String host, int port, String path, String query, String fragment)235     public URI(String scheme, String userInfo, String host, int port, String path, String query,
236             String fragment) throws URISyntaxException {
237         if (scheme == null && userInfo == null && host == null && path == null
238                 && query == null && fragment == null) {
239             this.path = "";
240             return;
241         }
242 
243         if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') {
244             throw new URISyntaxException(path, "Relative path");
245         }
246 
247         StringBuilder uri = new StringBuilder();
248         if (scheme != null) {
249             uri.append(scheme);
250             uri.append(':');
251         }
252 
253         if (userInfo != null || host != null || port != -1) {
254             uri.append("//");
255         }
256 
257         if (userInfo != null) {
258             USER_INFO_ENCODER.appendEncoded(uri, userInfo);
259             uri.append('@');
260         }
261 
262         if (host != null) {
263             // check for IPv6 addresses that hasn't been enclosed in square brackets
264             if (host.indexOf(':') != -1 && host.indexOf(']') == -1 && host.indexOf('[') == -1) {
265                 host = "[" + host + "]";
266             }
267             uri.append(host);
268         }
269 
270         if (port != -1) {
271             uri.append(':');
272             uri.append(port);
273         }
274 
275         if (path != null) {
276             PATH_ENCODER.appendEncoded(uri, path);
277         }
278 
279         if (query != null) {
280             uri.append('?');
281             ALL_LEGAL_ENCODER.appendEncoded(uri, query);
282         }
283 
284         if (fragment != null) {
285             uri.append('#');
286             ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
287         }
288 
289         parseURI(uri.toString(), true);
290     }
291 
292     /**
293      * Creates a new URI instance of the given unencoded component parts.
294      *
295      * @param scheme the URI scheme, or null for a non-absolute URI.
296      */
URI(String scheme, String host, String path, String fragment)297     public URI(String scheme, String host, String path, String fragment) throws URISyntaxException {
298         this(scheme, null, host, -1, path, null, fragment);
299     }
300 
301     /**
302      * Creates a new URI instance of the given unencoded component parts.
303      *
304      * @param scheme the URI scheme, or null for a non-absolute URI.
305      */
URI(String scheme, String authority, String path, String query, String fragment)306     public URI(String scheme, String authority, String path, String query,
307             String fragment) throws URISyntaxException {
308         if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') {
309             throw new URISyntaxException(path, "Relative path");
310         }
311 
312         StringBuilder uri = new StringBuilder();
313         if (scheme != null) {
314             uri.append(scheme);
315             uri.append(':');
316         }
317         if (authority != null) {
318             uri.append("//");
319             AUTHORITY_ENCODER.appendEncoded(uri, authority);
320         }
321 
322         if (path != null) {
323             PATH_ENCODER.appendEncoded(uri, path);
324         }
325         if (query != null) {
326             uri.append('?');
327             ALL_LEGAL_ENCODER.appendEncoded(uri, query);
328         }
329         if (fragment != null) {
330             uri.append('#');
331             ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
332         }
333 
334         parseURI(uri.toString(), false);
335     }
336 
337     /**
338      * Breaks uri into its component parts. This first splits URI into scheme,
339      * scheme-specific part and fragment:
340      *   [scheme:][scheme-specific part][#fragment]
341      *
342      * Then it breaks the scheme-specific part into authority, path and query:
343      *   [//authority][path][?query]
344      *
345      * Finally it delegates to parseAuthority to break the authority into user
346      * info, host and port:
347      *   [user-info@][host][:port]
348      */
parseURI(String uri, boolean forceServer)349     private void parseURI(String uri, boolean forceServer) throws URISyntaxException {
350         string = uri;
351 
352         // "#fragment"
353         int fragmentStart = UrlUtils.findFirstOf(uri, "#", 0, uri.length());
354         if (fragmentStart < uri.length()) {
355             fragment = ALL_LEGAL_ENCODER.validate(uri, fragmentStart + 1, uri.length(), "fragment");
356         }
357 
358         // scheme:
359         int start;
360         int colon = UrlUtils.findFirstOf(uri, ":", 0, fragmentStart);
361         if (colon < UrlUtils.findFirstOf(uri, "/?#", 0, fragmentStart)) {
362             absolute = true;
363             scheme = validateScheme(uri, colon);
364             start = colon + 1;
365 
366             if (start == fragmentStart) {
367                 throw new URISyntaxException(uri, "Scheme-specific part expected", start);
368             }
369 
370             // URIs with schemes followed by a non-/ char are opaque and need no further parsing.
371             if (!uri.regionMatches(start, "/", 0, 1)) {
372                 opaque = true;
373                 schemeSpecificPart = ALL_LEGAL_ENCODER.validate(
374                         uri, start, fragmentStart, "scheme specific part");
375                 return;
376             }
377         } else {
378             absolute = false;
379             start = 0;
380         }
381 
382         opaque = false;
383         schemeSpecificPart = uri.substring(start, fragmentStart);
384 
385         // "//authority"
386         int fileStart;
387         if (uri.regionMatches(start, "//", 0, 2)) {
388             int authorityStart = start + 2;
389             fileStart = UrlUtils.findFirstOf(uri, "/?", authorityStart, fragmentStart);
390             if (authorityStart == uri.length()) {
391                 throw new URISyntaxException(uri, "Authority expected", uri.length());
392             }
393             if (authorityStart < fileStart) {
394                 authority = AUTHORITY_ENCODER.validate(uri, authorityStart, fileStart, "authority");
395             }
396         } else {
397             fileStart = start;
398         }
399 
400         // "path"
401         int queryStart = UrlUtils.findFirstOf(uri, "?", fileStart, fragmentStart);
402         path = PATH_ENCODER.validate(uri, fileStart, queryStart, "path");
403 
404         // "?query"
405         if (queryStart < fragmentStart) {
406             query = ALL_LEGAL_ENCODER.validate(uri, queryStart + 1, fragmentStart, "query");
407         }
408 
409         parseAuthority(forceServer);
410     }
411 
validateScheme(String uri, int end)412     private String validateScheme(String uri, int end) throws URISyntaxException {
413         if (end == 0) {
414             throw new URISyntaxException(uri, "Scheme expected", 0);
415         }
416 
417         for (int i = 0; i < end; i++) {
418             if (!UrlUtils.isValidSchemeChar(i, uri.charAt(i))) {
419                 throw new URISyntaxException(uri, "Illegal character in scheme", 0);
420             }
421         }
422 
423         return uri.substring(0, end);
424     }
425 
426     /**
427      * Breaks this URI's authority into user info, host and port parts.
428      *   [user-info@][host][:port]
429      * If any part of this fails this method will give up and potentially leave
430      * these fields with their default values.
431      *
432      * @param forceServer true to always throw if the authority cannot be
433      *     parsed. If false, this method may still throw for some kinds of
434      *     errors; this unpredictable behavior is consistent with the RI.
435      */
parseAuthority(boolean forceServer)436     private void parseAuthority(boolean forceServer) throws URISyntaxException {
437         if (authority == null) {
438             return;
439         }
440 
441         String tempUserInfo = null;
442         String temp = authority;
443         int index = temp.indexOf('@');
444         int hostIndex = 0;
445         if (index != -1) {
446             // remove user info
447             tempUserInfo = temp.substring(0, index);
448             validateUserInfo(authority, tempUserInfo, 0);
449             temp = temp.substring(index + 1); // host[:port] is left
450             hostIndex = index + 1;
451         }
452 
453         index = temp.lastIndexOf(':');
454         int endIndex = temp.indexOf(']');
455 
456         String tempHost;
457         int tempPort = -1;
458         if (index != -1 && endIndex < index) {
459             // determine port and host
460             tempHost = temp.substring(0, index);
461 
462             if (index < (temp.length() - 1)) { // port part is not empty
463                 try {
464                     char firstPortChar = temp.charAt(index + 1);
465                     if (firstPortChar >= '0' && firstPortChar <= '9') {
466                         // allow only digits, no signs
467                         tempPort = Integer.parseInt(temp.substring(index + 1));
468                     } else {
469                         if (forceServer) {
470                             throw new URISyntaxException(authority,
471                                 "Invalid port number", hostIndex + index + 1);
472                         }
473                         return;
474                     }
475                 } catch (NumberFormatException e) {
476                     if (forceServer) {
477                         throw new URISyntaxException(authority,
478                                 "Invalid port number", hostIndex + index + 1);
479                     }
480                     return;
481                 }
482             }
483         } else {
484             tempHost = temp;
485         }
486 
487         if (tempHost.isEmpty()) {
488             if (forceServer) {
489                 throw new URISyntaxException(authority, "Expected host", hostIndex);
490             }
491             return;
492         }
493 
494         if (!isValidHost(forceServer, tempHost)) {
495             return;
496         }
497 
498         // this is a server based uri,
499         // fill in the userInfo, host and port fields
500         userInfo = tempUserInfo;
501         host = tempHost;
502         port = tempPort;
503         serverAuthority = true;
504     }
505 
validateUserInfo(String uri, String userInfo, int index)506     private void validateUserInfo(String uri, String userInfo, int index)
507             throws URISyntaxException {
508         for (int i = 0; i < userInfo.length(); i++) {
509             char ch = userInfo.charAt(i);
510             if (ch == ']' || ch == '[') {
511                 throw new URISyntaxException(uri, "Illegal character in userInfo", index + i);
512             }
513         }
514     }
515 
516     /**
517      * Returns true if {@code host} is a well-formed host name or IP address.
518      *
519      * @param forceServer true to always throw if the host cannot be parsed. If
520      *     false, this method may still throw for some kinds of errors; this
521      *     unpredictable behavior is consistent with the RI.
522      */
isValidHost(boolean forceServer, String host)523     private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException {
524         if (host.startsWith("[")) {
525             // IPv6 address
526             if (!host.endsWith("]")) {
527                 throw new URISyntaxException(host,
528                         "Expected a closing square bracket for IPv6 address", 0);
529             }
530             if (InetAddress.isNumeric(host)) {
531                 // If it's numeric, the presence of square brackets guarantees
532                 // that it's a numeric IPv6 address.
533                 return true;
534             }
535             throw new URISyntaxException(host, "Malformed IPv6 address");
536         }
537 
538         // '[' and ']' can only be the first char and last char
539         // of the host name
540         if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
541             throw new URISyntaxException(host, "Illegal character in host name", 0);
542         }
543 
544         int index = host.lastIndexOf('.');
545         if (index < 0 || index == host.length() - 1
546                 || !Character.isDigit(host.charAt(index + 1))) {
547             // domain name
548             if (isValidDomainName(host)) {
549                 return true;
550             }
551             if (forceServer) {
552                 throw new URISyntaxException(host, "Illegal character in host name", 0);
553             }
554             return false;
555         }
556 
557         // IPv4 address?
558         try {
559             InetAddress ia = InetAddress.parseNumericAddress(host);
560             if (ia instanceof Inet4Address) {
561                 return true;
562             }
563         } catch (IllegalArgumentException ignored) {
564         }
565 
566         if (forceServer) {
567             throw new URISyntaxException(host, "Malformed IPv4 address", 0);
568         }
569         return false;
570     }
571 
isValidDomainName(String host)572     private boolean isValidDomainName(String host) {
573         try {
574             UriCodec.validateSimple(host, "-.");
575         } catch (URISyntaxException e) {
576             return false;
577         }
578 
579         String lastLabel = null;
580         for (String token : host.split("\\.")) {
581             lastLabel = token;
582             if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) {
583                 return false;
584             }
585         }
586 
587         if (lastLabel == null) {
588             return false;
589         }
590 
591         if (!lastLabel.equals(host)) {
592             char ch = lastLabel.charAt(0);
593             if (ch >= '0' && ch <= '9') {
594                 return false;
595             }
596         }
597         return true;
598     }
599 
600     /**
601      * Compares this URI with the given argument {@code uri}. This method will
602      * return a negative value if this URI instance is less than the given
603      * argument and a positive value if this URI instance is greater than the
604      * given argument. The return value {@code 0} indicates that the two
605      * instances represent the same URI. To define the order the single parts of
606      * the URI are compared with each other. String components will be ordered
607      * in the natural case-sensitive way. A hierarchical URI is less than an
608      * opaque URI and if one part is {@code null} the URI with the undefined
609      * part is less than the other one.
610      *
611      * @param uri
612      *            the URI this instance has to compare with.
613      * @return the value representing the order of the two instances.
614      */
compareTo(URI uri)615     public int compareTo(URI uri) {
616         int ret;
617 
618         // compare schemes
619         if (scheme == null && uri.scheme != null) {
620             return -1;
621         } else if (scheme != null && uri.scheme == null) {
622             return 1;
623         } else if (scheme != null && uri.scheme != null) {
624             ret = scheme.compareToIgnoreCase(uri.scheme);
625             if (ret != 0) {
626                 return ret;
627             }
628         }
629 
630         // compare opacities
631         if (!opaque && uri.opaque) {
632             return -1;
633         } else if (opaque && !uri.opaque) {
634             return 1;
635         } else if (opaque && uri.opaque) {
636             ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart);
637             if (ret != 0) {
638                 return ret;
639             }
640         } else {
641 
642             // otherwise both must be hierarchical
643 
644             // compare authorities
645             if (authority != null && uri.authority == null) {
646                 return 1;
647             } else if (authority == null && uri.authority != null) {
648                 return -1;
649             } else if (authority != null && uri.authority != null) {
650                 if (host != null && uri.host != null) {
651                     // both are server based, so compare userInfo, host, port
652                     if (userInfo != null && uri.userInfo == null) {
653                         return 1;
654                     } else if (userInfo == null && uri.userInfo != null) {
655                         return -1;
656                     } else if (userInfo != null && uri.userInfo != null) {
657                         ret = userInfo.compareTo(uri.userInfo);
658                         if (ret != 0) {
659                             return ret;
660                         }
661                     }
662 
663                     // userInfo's are the same, compare hostname
664                     ret = host.compareToIgnoreCase(uri.host);
665                     if (ret != 0) {
666                         return ret;
667                     }
668 
669                     // compare port
670                     if (port != uri.port) {
671                         return port - uri.port;
672                     }
673                 } else { // one or both are registry based, compare the whole
674                     // authority
675                     ret = authority.compareTo(uri.authority);
676                     if (ret != 0) {
677                         return ret;
678                     }
679                 }
680             }
681 
682             // authorities are the same
683             // compare paths
684             ret = path.compareTo(uri.path);
685             if (ret != 0) {
686                 return ret;
687             }
688 
689             // compare queries
690 
691             if (query != null && uri.query == null) {
692                 return 1;
693             } else if (query == null && uri.query != null) {
694                 return -1;
695             } else if (query != null && uri.query != null) {
696                 ret = query.compareTo(uri.query);
697                 if (ret != 0) {
698                     return ret;
699                 }
700             }
701         }
702 
703         // everything else is identical, so compare fragments
704         if (fragment != null && uri.fragment == null) {
705             return 1;
706         } else if (fragment == null && uri.fragment != null) {
707             return -1;
708         } else if (fragment != null && uri.fragment != null) {
709             ret = fragment.compareTo(uri.fragment);
710             if (ret != 0) {
711                 return ret;
712             }
713         }
714 
715         // identical
716         return 0;
717     }
718 
719     /**
720      * Returns the URI formed by parsing {@code uri}. This method behaves
721      * identically to the string constructor but throws a different exception
722      * on failure. The constructor fails with a checked {@link
723      * URISyntaxException}; this method fails with an unchecked {@link
724      * IllegalArgumentException}.
725      */
create(String uri)726     public static URI create(String uri) {
727         try {
728             return new URI(uri);
729         } catch (URISyntaxException e) {
730             throw new IllegalArgumentException(e.getMessage());
731         }
732     }
733 
duplicate()734     private URI duplicate() {
735         URI clone = new URI();
736         clone.absolute = absolute;
737         clone.authority = authority;
738         clone.fragment = fragment;
739         clone.host = host;
740         clone.opaque = opaque;
741         clone.path = path;
742         clone.port = port;
743         clone.query = query;
744         clone.scheme = scheme;
745         clone.schemeSpecificPart = schemeSpecificPart;
746         clone.userInfo = userInfo;
747         clone.serverAuthority = serverAuthority;
748         return clone;
749     }
750 
751     /*
752      * Takes a string that may contain hex sequences like %F1 or %2b and
753      * converts the hex values following the '%' to lowercase
754      */
convertHexToLowerCase(String s)755     private String convertHexToLowerCase(String s) {
756         StringBuilder result = new StringBuilder("");
757         if (s.indexOf('%') == -1) {
758             return s;
759         }
760 
761         int index, prevIndex = 0;
762         while ((index = s.indexOf('%', prevIndex)) != -1) {
763             result.append(s.substring(prevIndex, index + 1));
764             result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US));
765             index += 3;
766             prevIndex = index;
767         }
768         return result.toString();
769     }
770 
771     /**
772      * Returns true if the given URI escaped strings {@code first} and {@code second} are
773      * equal.
774      *
775      * TODO: This method assumes that both strings are escaped using the same escape rules
776      * yet it still performs case insensitive comparison of the escaped sequences.
777      * Why is this necessary ? We can just replace it with first.equals(second)
778      * otherwise.
779      */
escapedEquals(String first, String second)780     private boolean escapedEquals(String first, String second) {
781         // This length test isn't a micro-optimization. We need it because we sometimes
782         // calculate the number of characters to match based on the length of the second
783         // string. If the second string is shorter than the first, we might attempt to match
784         // 0 chars, and regionMatches is specified to return true in that case.
785         if (first.length() != second.length()) {
786             return false;
787         }
788 
789         int prevIndex = 0;
790         while (true) {
791             int index = first.indexOf('%', prevIndex);
792             int index1 = second.indexOf('%', prevIndex);
793             if (index != index1) {
794                 return false;
795             }
796 
797             // index == index1 from this point on.
798 
799             if (index == -1) {
800                 // No more escapes, match the remainder of the string
801                 // normally.
802                return first.regionMatches(prevIndex, second, prevIndex,
803                        second.length() - prevIndex);
804             }
805 
806             if (!first.regionMatches(prevIndex, second, prevIndex, (index - prevIndex))) {
807                 return false;
808             }
809 
810             if (!first.regionMatches(true /* ignore case */, index + 1, second, index + 1, 2)) {
811                 return false;
812             }
813 
814             index += 3;
815             prevIndex = index;
816         }
817     }
818 
equals(Object o)819     @Override public boolean equals(Object o) {
820         if (!(o instanceof URI)) {
821             return false;
822         }
823         URI uri = (URI) o;
824 
825         if (uri.fragment == null && fragment != null || uri.fragment != null
826                 && fragment == null) {
827             return false;
828         } else if (uri.fragment != null && fragment != null) {
829             if (!escapedEquals(uri.fragment, fragment)) {
830                 return false;
831             }
832         }
833 
834         if (uri.scheme == null && scheme != null || uri.scheme != null
835                 && scheme == null) {
836             return false;
837         } else if (uri.scheme != null && scheme != null) {
838             if (!uri.scheme.equalsIgnoreCase(scheme)) {
839                 return false;
840             }
841         }
842 
843         if (uri.opaque && opaque) {
844             return escapedEquals(uri.schemeSpecificPart,
845                     schemeSpecificPart);
846         } else if (!uri.opaque && !opaque) {
847             if (!escapedEquals(path, uri.path)) {
848                 return false;
849             }
850 
851             if (uri.query != null && query == null || uri.query == null
852                     && query != null) {
853                 return false;
854             } else if (uri.query != null && query != null) {
855                 if (!escapedEquals(uri.query, query)) {
856                     return false;
857                 }
858             }
859 
860             if (uri.authority != null && authority == null
861                     || uri.authority == null && authority != null) {
862                 return false;
863             } else if (uri.authority != null && authority != null) {
864                 if (uri.host != null && host == null || uri.host == null
865                         && host != null) {
866                     return false;
867                 } else if (uri.host == null && host == null) {
868                     // both are registry based, so compare the whole authority
869                     return escapedEquals(uri.authority, authority);
870                 } else { // uri.host != null && host != null, so server-based
871                     if (!host.equalsIgnoreCase(uri.host)) {
872                         return false;
873                     }
874 
875                     if (port != uri.port) {
876                         return false;
877                     }
878 
879                     if (uri.userInfo != null && userInfo == null
880                             || uri.userInfo == null && userInfo != null) {
881                         return false;
882                     } else if (uri.userInfo != null && userInfo != null) {
883                         return escapedEquals(userInfo, uri.userInfo);
884                     } else {
885                         return true;
886                     }
887                 }
888             } else {
889                 // no authority
890                 return true;
891             }
892 
893         } else {
894             // one is opaque, the other hierarchical
895             return false;
896         }
897     }
898 
899     /**
900      * Returns the scheme of this URI, or null if this URI has no scheme. This
901      * is also known as the protocol.
902      */
getScheme()903     public String getScheme() {
904         return scheme;
905     }
906 
907     /**
908      * Returns the decoded scheme-specific part of this URI, or null if this URI
909      * has no scheme-specific part.
910      */
getSchemeSpecificPart()911     public String getSchemeSpecificPart() {
912         return decode(schemeSpecificPart);
913     }
914 
915     /**
916      * Returns the encoded scheme-specific part of this URI, or null if this URI
917      * has no scheme-specific part.
918      */
getRawSchemeSpecificPart()919     public String getRawSchemeSpecificPart() {
920         return schemeSpecificPart;
921     }
922 
923     /**
924      * Returns the decoded authority part of this URI, or null if this URI has
925      * no authority.
926      */
getAuthority()927     public String getAuthority() {
928         return decode(authority);
929     }
930 
931     /**
932      * Returns the encoded authority of this URI, or null if this URI has no
933      * authority.
934      */
getRawAuthority()935     public String getRawAuthority() {
936         return authority;
937     }
938 
939     /**
940      * Returns the decoded user info of this URI, or null if this URI has no
941      * user info.
942      */
getUserInfo()943     public String getUserInfo() {
944         return decode(userInfo);
945     }
946 
947     /**
948      * Returns the encoded user info of this URI, or null if this URI has no
949      * user info.
950      */
getRawUserInfo()951     public String getRawUserInfo() {
952         return userInfo;
953     }
954 
955     /**
956      * Returns the host of this URI, or null if this URI has no host.
957      */
getHost()958     public String getHost() {
959         return host;
960     }
961 
962     /**
963      * Returns the port number of this URI, or {@code -1} if this URI has no
964      * explicit port.
965      */
getPort()966     public int getPort() {
967         return port;
968     }
969 
970     /** @hide */
getEffectivePort()971     public int getEffectivePort() {
972         return getEffectivePort(scheme, port);
973     }
974 
975     /**
976      * Returns the port to use for {@code scheme} connections will use when
977      * {@link #getPort} returns {@code specifiedPort}.
978      *
979      * @hide
980      */
getEffectivePort(String scheme, int specifiedPort)981     public static int getEffectivePort(String scheme, int specifiedPort) {
982         if (specifiedPort != -1) {
983             return specifiedPort;
984         }
985 
986         if ("http".equalsIgnoreCase(scheme)) {
987             return 80;
988         } else if ("https".equalsIgnoreCase(scheme)) {
989             return 443;
990         } else {
991             return -1;
992         }
993     }
994 
995     /**
996      * Returns the decoded path of this URI, or null if this URI has no path.
997      */
getPath()998     public String getPath() {
999         return decode(path);
1000     }
1001 
1002     /**
1003      * Returns the encoded path of this URI, or null if this URI has no path.
1004      */
getRawPath()1005     public String getRawPath() {
1006         return path;
1007     }
1008 
1009     /**
1010      * Returns the decoded query of this URI, or null if this URI has no query.
1011      */
getQuery()1012     public String getQuery() {
1013         return decode(query);
1014     }
1015 
1016     /**
1017      * Returns the encoded query of this URI, or null if this URI has no query.
1018      */
getRawQuery()1019     public String getRawQuery() {
1020         return query;
1021     }
1022 
1023     /**
1024      * Returns the decoded fragment of this URI, or null if this URI has no
1025      * fragment.
1026      */
getFragment()1027     public String getFragment() {
1028         return decode(fragment);
1029     }
1030 
1031     /**
1032      * Gets the encoded fragment of this URI, or null if this URI has no
1033      * fragment.
1034      */
getRawFragment()1035     public String getRawFragment() {
1036         return fragment;
1037     }
1038 
hashCode()1039     @Override public int hashCode() {
1040         if (hash == -1) {
1041             hash = getHashString().hashCode();
1042         }
1043         return hash;
1044     }
1045 
1046     /**
1047      * Returns true if this URI is absolute, which means that a scheme is
1048      * defined.
1049      */
isAbsolute()1050     public boolean isAbsolute() {
1051         // TODO: simplify to 'scheme != null' ?
1052         return absolute;
1053     }
1054 
1055     /**
1056      * Returns true if this URI is opaque. Opaque URIs are absolute and have a
1057      * scheme-specific part that does not start with a slash character. All
1058      * parts except scheme, scheme-specific and fragment are undefined.
1059      */
isOpaque()1060     public boolean isOpaque() {
1061         return opaque;
1062     }
1063 
1064     /**
1065      * Returns the normalized path.
1066      */
normalize(String path, boolean discardRelativePrefix)1067     private String normalize(String path, boolean discardRelativePrefix) {
1068         path = UrlUtils.canonicalizePath(path, discardRelativePrefix);
1069 
1070         /*
1071          * If the path contains a colon before the first colon, prepend
1072          * "./" to differentiate the path from a scheme prefix.
1073          */
1074         int colon = path.indexOf(':');
1075         if (colon != -1) {
1076             int slash = path.indexOf('/');
1077             if (slash == -1 || colon < slash) {
1078                 path = "./" + path;
1079             }
1080         }
1081 
1082         return path;
1083     }
1084 
1085     /**
1086      * Normalizes the path part of this URI.
1087      *
1088      * @return an URI object which represents this instance with a normalized
1089      *         path.
1090      */
normalize()1091     public URI normalize() {
1092         if (opaque) {
1093             return this;
1094         }
1095         String normalizedPath = normalize(path, false);
1096         // if the path is already normalized, return this
1097         if (path.equals(normalizedPath)) {
1098             return this;
1099         }
1100         // get an exact copy of the URI re-calculate the scheme specific part
1101         // since the path of the normalized URI is different from this URI.
1102         URI result = duplicate();
1103         result.path = normalizedPath;
1104         result.setSchemeSpecificPart();
1105         return result;
1106     }
1107 
1108     /**
1109      * Tries to parse the authority component of this URI to divide it into the
1110      * host, port, and user-info. If this URI is already determined as a
1111      * ServerAuthority this instance will be returned without changes.
1112      *
1113      * @return this instance with the components of the parsed server authority.
1114      * @throws URISyntaxException
1115      *             if the authority part could not be parsed as a server-based
1116      *             authority.
1117      */
parseServerAuthority()1118     public URI parseServerAuthority() throws URISyntaxException {
1119         if (!serverAuthority) {
1120             parseAuthority(true);
1121         }
1122         return this;
1123     }
1124 
1125     /**
1126      * Makes the given URI {@code relative} to a relative URI against the URI
1127      * represented by this instance.
1128      *
1129      * @param relative
1130      *            the URI which has to be relativized against this URI.
1131      * @return the relative URI.
1132      */
relativize(URI relative)1133     public URI relativize(URI relative) {
1134         if (relative.opaque || opaque) {
1135             return relative;
1136         }
1137 
1138         if (scheme == null ? relative.scheme != null : !scheme
1139                 .equals(relative.scheme)) {
1140             return relative;
1141         }
1142 
1143         if (authority == null ? relative.authority != null : !authority
1144                 .equals(relative.authority)) {
1145             return relative;
1146         }
1147 
1148         // normalize both paths
1149         String thisPath = normalize(path, false);
1150         String relativePath = normalize(relative.path, false);
1151 
1152         /*
1153          * if the paths aren't equal, then we need to determine if this URI's
1154          * path is a parent path (begins with) the relative URI's path
1155          */
1156         if (!thisPath.equals(relativePath)) {
1157             // drop everything after the last slash in this path
1158             thisPath = thisPath.substring(0, thisPath.lastIndexOf('/') + 1);
1159 
1160             /*
1161              * if the relative URI's path doesn't start with this URI's path,
1162              * then just return the relative URI; the URIs have nothing in
1163              * common
1164              */
1165             if (!relativePath.startsWith(thisPath)) {
1166                 return relative;
1167             }
1168         }
1169 
1170         URI result = new URI();
1171         result.fragment = relative.fragment;
1172         result.query = relative.query;
1173         // the result URI is the remainder of the relative URI's path
1174         result.path = relativePath.substring(thisPath.length());
1175         result.setSchemeSpecificPart();
1176         return result;
1177     }
1178 
1179     /**
1180      * Resolves the given URI {@code relative} against the URI represented by
1181      * this instance.
1182      *
1183      * @param relative
1184      *            the URI which has to be resolved against this URI.
1185      * @return the resolved URI.
1186      */
resolve(URI relative)1187     public URI resolve(URI relative) {
1188         if (relative.absolute || opaque) {
1189             return relative;
1190         }
1191 
1192         if (relative.authority != null) {
1193             // If the relative URI has an authority, the result is the relative
1194             // with this URI's scheme.
1195             URI result = relative.duplicate();
1196             result.scheme = scheme;
1197             result.absolute = absolute;
1198             return result;
1199         }
1200 
1201         if (relative.path.isEmpty() && relative.scheme == null && relative.query == null) {
1202             // if the relative URI only consists of at most a fragment,
1203             URI result = duplicate();
1204             result.fragment = relative.fragment;
1205             return result;
1206         }
1207 
1208         URI result = duplicate();
1209         result.fragment = relative.fragment;
1210         result.query = relative.query;
1211         String resolvedPath;
1212         if (relative.path.startsWith("/")) {
1213             // The relative URI has an absolute path; use it.
1214             resolvedPath = relative.path;
1215         } else if (relative.path.isEmpty()) {
1216             // The relative URI has no path; use the base path.
1217             resolvedPath = path;
1218         } else {
1219             // The relative URI has a relative path; combine the paths.
1220             int endIndex = path.lastIndexOf('/') + 1;
1221             resolvedPath = path.substring(0, endIndex) + relative.path;
1222         }
1223         result.path = UrlUtils.authoritySafePath(result.authority, normalize(resolvedPath, true));
1224         result.setSchemeSpecificPart();
1225         return result;
1226     }
1227 
1228     /**
1229      * Helper method used to re-calculate the scheme specific part of the
1230      * resolved or normalized URIs
1231      */
setSchemeSpecificPart()1232     private void setSchemeSpecificPart() {
1233         // ssp = [//authority][path][?query]
1234         StringBuilder ssp = new StringBuilder();
1235         if (authority != null) {
1236             ssp.append("//" + authority);
1237         }
1238         if (path != null) {
1239             ssp.append(path);
1240         }
1241         if (query != null) {
1242             ssp.append("?" + query);
1243         }
1244         schemeSpecificPart = ssp.toString();
1245         // reset string, so that it can be re-calculated correctly when asked.
1246         string = null;
1247     }
1248 
1249     /**
1250      * Creates a new URI instance by parsing the given string {@code relative}
1251      * and resolves the created URI against the URI represented by this
1252      * instance.
1253      *
1254      * @param relative
1255      *            the given string to create the new URI instance which has to
1256      *            be resolved later on.
1257      * @return the created and resolved URI.
1258      */
resolve(String relative)1259     public URI resolve(String relative) {
1260         return resolve(create(relative));
1261     }
1262 
decode(String s)1263     private String decode(String s) {
1264         return s != null ? UriCodec.decode(s) : null;
1265     }
1266 
1267     /**
1268      * Returns the textual string representation of this URI instance using the
1269      * US-ASCII encoding.
1270      *
1271      * @return the US-ASCII string representation of this URI.
1272      */
toASCIIString()1273     public String toASCIIString() {
1274         StringBuilder result = new StringBuilder();
1275         ASCII_ONLY.appendEncoded(result, toString());
1276         return result.toString();
1277     }
1278 
1279     /**
1280      * Returns the encoded URI.
1281      */
toString()1282     @Override public String toString() {
1283         if (string != null) {
1284             return string;
1285         }
1286 
1287         StringBuilder result = new StringBuilder();
1288         if (scheme != null) {
1289             result.append(scheme);
1290             result.append(':');
1291         }
1292         if (opaque) {
1293             result.append(schemeSpecificPart);
1294         } else {
1295             if (authority != null) {
1296                 result.append("//");
1297                 result.append(authority);
1298             }
1299 
1300             if (path != null) {
1301                 result.append(path);
1302             }
1303 
1304             if (query != null) {
1305                 result.append('?');
1306                 result.append(query);
1307             }
1308         }
1309 
1310         if (fragment != null) {
1311             result.append('#');
1312             result.append(fragment);
1313         }
1314 
1315         string = result.toString();
1316         return string;
1317     }
1318 
1319     /*
1320      * Form a string from the components of this URI, similarly to the
1321      * toString() method. But this method converts scheme and host to lowercase,
1322      * and converts escaped octets to lowercase.
1323      */
getHashString()1324     private String getHashString() {
1325         StringBuilder result = new StringBuilder();
1326         if (scheme != null) {
1327             result.append(scheme.toLowerCase(Locale.US));
1328             result.append(':');
1329         }
1330         if (opaque) {
1331             result.append(schemeSpecificPart);
1332         } else {
1333             if (authority != null) {
1334                 result.append("//");
1335                 if (host == null) {
1336                     result.append(authority);
1337                 } else {
1338                     if (userInfo != null) {
1339                         result.append(userInfo + "@");
1340                     }
1341                     result.append(host.toLowerCase(Locale.US));
1342                     if (port != -1) {
1343                         result.append(":" + port);
1344                     }
1345                 }
1346             }
1347 
1348             if (path != null) {
1349                 result.append(path);
1350             }
1351 
1352             if (query != null) {
1353                 result.append('?');
1354                 result.append(query);
1355             }
1356         }
1357 
1358         if (fragment != null) {
1359             result.append('#');
1360             result.append(fragment);
1361         }
1362 
1363         return convertHexToLowerCase(result.toString());
1364     }
1365 
1366     /**
1367      * Converts this URI instance to a URL.
1368      *
1369      * @return the created URL representing the same resource as this URI.
1370      * @throws MalformedURLException
1371      *             if an error occurs while creating the URL or no protocol
1372      *             handler could be found.
1373      */
toURL()1374     public URL toURL() throws MalformedURLException {
1375         if (!absolute) {
1376             throw new IllegalArgumentException("URI is not absolute: " + toString());
1377         }
1378         return new URL(toString());
1379     }
1380 
readObject(ObjectInputStream in)1381     private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
1382         in.defaultReadObject();
1383         try {
1384             parseURI(string, false);
1385         } catch (URISyntaxException e) {
1386             throw new IOException(e.toString());
1387         }
1388     }
1389 
writeObject(ObjectOutputStream out)1390     private void writeObject(ObjectOutputStream out) throws IOException, ClassNotFoundException {
1391         // call toString() to ensure the value of string field is calculated
1392         toString();
1393         out.defaultWriteObject();
1394     }
1395 }
1396