• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5  * in compliance with the License. You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software distributed under the License
10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing permissions and limitations under
12  * the License.
13  */
14 
15 package com.google.common.escape;
16 
17 import com.google.common.annotations.GwtCompatible;
18 import com.google.common.base.Function;
19 import com.google.errorprone.annotations.DoNotMock;
20 
21 /**
22  * An object that converts literal text into a format safe for inclusion in a particular context
23  * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the
24  * text is performed automatically by the relevant parser.
25  *
26  * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code
27  * "Foo&lt;Bar&gt;"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the
28  * resulting XML document is parsed, the parser API will return this text as the original literal
29  * string {@code "Foo<Bar>"}.
30  *
31  * <p>An {@code Escaper} instance is required to be stateless, and safe when used concurrently by
32  * multiple threads.
33  *
34  * <p>Because, in general, escaping operates on the code points of a string and not on its
35  * individual {@code char} values, it is not safe to assume that {@code escape(s)} is equivalent to
36  * {@code escape(s.substring(0, n)) + escape(s.substring(n))} for arbitrary {@code n}. This is
37  * because of the possibility of splitting a surrogate pair. The only case in which it is safe to
38  * escape strings and concatenate the results is if you can rule out this possibility, either by
39  * splitting an existing long string into short strings adaptively around {@linkplain
40  * Character#isHighSurrogate surrogate} {@linkplain Character#isLowSurrogate pairs}, or by starting
41  * with short strings already known to be free of unpaired surrogates.
42  *
43  * <p>The two primary implementations of this interface are {@link CharEscaper} and {@link
44  * UnicodeEscaper}. They are heavily optimized for performance and greatly simplify the task of
45  * implementing new escapers. It is strongly recommended that when implementing a new escaper you
46  * extend one of these classes. If you find that you are unable to achieve the desired behavior
47  * using either of these classes, please contact the Java libraries team for advice.
48  *
49  * <p>Popular escapers are defined as constants in classes like {@link
50  * com.google.common.html.HtmlEscapers} and {@link com.google.common.xml.XmlEscapers}. To create
51  * your own escapers, use {@link CharEscaperBuilder}, or extend {@code CharEscaper} or {@code
52  * UnicodeEscaper}.
53  *
54  * @author David Beaumont
55  * @since 15.0
56  */
57 @DoNotMock("Use Escapers.nullEscaper() or another methods from the *Escapers classes")
58 @GwtCompatible
59 public abstract class Escaper {
60   // TODO(dbeaumont): evaluate custom implementations, considering package private constructor.
61   /** Constructor for use by subclasses. */
Escaper()62   protected Escaper() {}
63 
64   /**
65    * Returns the escaped form of a given literal string.
66    *
67    * <p>Note that this method may treat input characters differently depending on the specific
68    * escaper implementation.
69    *
70    * <ul>
71    *   <li>{@link UnicodeEscaper} handles <a href="http://en.wikipedia.org/wiki/UTF-16">UTF-16</a>
72    *       correctly, including surrogate character pairs. If the input is badly formed the escaper
73    *       should throw {@link IllegalArgumentException}.
74    *   <li>{@link CharEscaper} handles Java characters independently and does not verify the input
75    *       for well formed characters. A {@code CharEscaper} should not be used in situations where
76    *       input is not guaranteed to be restricted to the Basic Multilingual Plane (BMP).
77    * </ul>
78    *
79    * @param string the literal string to be escaped
80    * @return the escaped form of {@code string}
81    * @throws NullPointerException if {@code string} is null
82    * @throws IllegalArgumentException if {@code string} contains badly formed UTF-16 or cannot be
83    *     escaped for any other reason
84    */
escape(String string)85   public abstract String escape(String string);
86 
87   private final Function<String, String> asFunction =
88       new Function<String, String>() {
89         @Override
90         public String apply(String from) {
91           return escape(from);
92         }
93       };
94 
95   /** Returns a {@link Function} that invokes {@link #escape(String)} on this escaper. */
asFunction()96   public final Function<String, String> asFunction() {
97     return asFunction;
98   }
99 }
100