• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.common.net;
18 
19 import com.google.common.annotations.Beta;
20 import com.google.common.annotations.GwtCompatible;
21 import com.google.common.escape.Escaper;
22 
23 /**
24  * {@code Escaper} instances suitable for strings to be included in particular
25  * sections of URLs.
26  *
27  * <p>If the resulting URLs are inserted into an HTML or XML document, they will
28  * require additional escaping with {@link com.google.common.html.HtmlEscapers}
29  * or {@link com.google.common.xml.XmlEscapers}.
30  *
31  *
32  * @author David Beaumont
33  * @author Chris Povirk
34  * @since 15.0
35  */
36 @Beta
37 @GwtCompatible
38 public final class UrlEscapers {
UrlEscapers()39   private UrlEscapers() {}
40 
41   // For each xxxEscaper() method, please add links to external reference pages
42   // that are considered authoritative for the behavior of that escaper.
43 
44   static final String URL_FORM_PARAMETER_OTHER_SAFE_CHARS = "-_.*";
45 
46   static final String URL_PATH_OTHER_SAFE_CHARS_LACKING_PLUS =
47       "-._~" +        // Unreserved characters.
48       "!$'()*,;&=" +  // The subdelim characters (excluding '+').
49       "@:";           // The gendelim characters permitted in paths.
50 
51   /**
52    * Returns an {@link Escaper} instance that escapes strings so they can be
53    * safely included in <a href="http://goo.gl/OQEc8">URL form parameter names
54    * and values</a>. Escaping is performed with the UTF-8 character encoding.
55    * The caller is responsible for <a href="http://goo.gl/i20ms">replacing any
56    * unpaired carriage return or line feed characters with a CR+LF pair</a> on
57    * any non-file inputs before escaping them with this escaper.
58    *
59    * <p>When escaping a String, the following rules apply:
60    * <ul>
61    * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0"
62    *     through "9" remain the same.
63    * <li>The special characters ".", "-", "*", and "_" remain the same.
64    * <li>The space character " " is converted into a plus sign "+".
65    * <li>All other characters are converted into one or more bytes using UTF-8
66    *     encoding and each byte is then represented by the 3-character string
67    *     "%XY", where "XY" is the two-digit, uppercase, hexadecimal
68    *     representation of the byte value.
69    * </ul>
70    *
71    * <p>This escaper is suitable for escaping parameter names and values even
72    * when <a href="http://goo.gl/utn6M">using the non-standard semicolon</a>,
73    * rather than the ampersand, as a parameter delimiter. Nevertheless, we
74    * recommend using the ampersand unless you must interoperate with systems
75    * that require semicolons.
76    *
77    * <p><b>Note:</b> Unlike other escapers, URL escapers produce uppercase
78    * hexadecimal sequences. From <a href="http://www.ietf.org/rfc/rfc3986.txt">
79    * RFC 3986</a>:<br>
80    * <i>"URI producers and normalizers should use uppercase hexadecimal digits
81    * for all percent-encodings."</i>
82    *
83    */
urlFormParameterEscaper()84   public static Escaper urlFormParameterEscaper() {
85     return URL_FORM_PARAMETER_ESCAPER;
86   }
87 
88   private static final Escaper URL_FORM_PARAMETER_ESCAPER =
89       new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS, true);
90 
91   /**
92    * Returns an {@link Escaper} instance that escapes strings so they can be
93    * safely included in <a href="http://goo.gl/swjbR">URL path segments</a>. The
94    * returned escaper escapes all non-ASCII characters, even though <a
95    * href="http://goo.gl/xIJWe">many of these are accepted in modern URLs</a>.
96    * (<a href="http://goo.gl/WMGvZ">If the escaper were to leave these
97    * characters unescaped, they would be escaped by the consumer at parse time,
98    * anyway.</a>) Additionally, the escaper escapes the slash character ("/").
99    * While slashes are acceptable in URL paths, they are considered by the
100    * specification to be separators between "path segments." This implies that,
101    * if you wish for your path to contain slashes, you must escape each segment
102    * separately and then join them.
103    *
104    * <p>When escaping a String, the following rules apply:
105    * <ul>
106    * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0"
107    *     through "9" remain the same.
108    * <li>The unreserved characters ".", "-", "~", and "_" remain the same.
109    * <li>The general delimiters "@" and ":" remain the same.
110    * <li>The subdelimiters "!", "$", "&amp;", "'", "(", ")", "*", "+", ",", ";",
111    *     and "=" remain the same.
112    * <li>The space character " " is converted into %20.
113    * <li>All other characters are converted into one or more bytes using UTF-8
114    *     encoding and each byte is then represented by the 3-character string
115    *     "%XY", where "XY" is the two-digit, uppercase, hexadecimal
116    *     representation of the byte value.
117    * </ul>
118    *
119    * <p><b>Note:</b> Unlike other escapers, URL escapers produce uppercase
120    * hexadecimal sequences. From <a href="http://www.ietf.org/rfc/rfc3986.txt">
121    * RFC 3986</a>:<br>
122    * <i>"URI producers and normalizers should use uppercase hexadecimal digits
123    * for all percent-encodings."</i>
124    */
urlPathSegmentEscaper()125   public static Escaper urlPathSegmentEscaper() {
126     return URL_PATH_SEGMENT_ESCAPER;
127   }
128 
129   private static final Escaper URL_PATH_SEGMENT_ESCAPER =
130       new PercentEscaper(URL_PATH_OTHER_SAFE_CHARS_LACKING_PLUS + "+", false);
131 
132   /**
133    * Returns an {@link Escaper} instance that escapes strings so they can be
134    * safely included in a <a href="http://goo.gl/xXEq4p">URL fragment</a>. The
135    * returned escaper escapes all non-ASCII characters, even though <a
136    * href="http://goo.gl/xIJWe">many of these are accepted in modern URLs</a>.
137    * (<a href="http://goo.gl/WMGvZ">If the escaper were to leave these
138    * characters unescaped, they would be escaped by the consumer at parse time,
139    * anyway.</a>)
140    *
141    * <p>When escaping a String, the following rules apply:
142    * <ul>
143    * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0"
144    *     through "9" remain the same.
145    * <li>The unreserved characters ".", "-", "~", and "_" remain the same.
146    * <li>The general delimiters "@" and ":" remain the same.
147    * <li>The subdelimiters "!", "$", "&amp;", "'", "(", ")", "*", "+", ",", ";",
148    *     and "=" remain the same.
149    * <li>The space character " " is converted into %20.
150    * <li>Fragments allow unescaped "/" and "?", so they remain the same.
151    * <li>All other characters are converted into one or more bytes using UTF-8
152    *     encoding and each byte is then represented by the 3-character string
153    *     "%XY", where "XY" is the two-digit, uppercase, hexadecimal
154    *     representation of the byte value.
155    * </ul>
156    *
157    * <p><b>Note:</b> Unlike other escapers, URL escapers produce uppercase
158    * hexadecimal sequences. From <a href="http://www.ietf.org/rfc/rfc3986.txt">
159    * RFC 3986</a>:<br>
160    * <i>"URI producers and normalizers should use uppercase hexadecimal digits
161    * for all percent-encodings."</i>
162    */
urlFragmentEscaper()163   public static Escaper urlFragmentEscaper() {
164     return URL_FRAGMENT_ESCAPER;
165   }
166 
167   private static final Escaper URL_FRAGMENT_ESCAPER =
168       new PercentEscaper(URL_PATH_OTHER_SAFE_CHARS_LACKING_PLUS + "+/?", false);
169 }
170