1 /* 2 * Copyright (C) 2008 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 * in compliance with the License. You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software distributed under the License 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 * or implied. See the License for the specific language governing permissions and limitations under 12 * the License. 13 */ 14 15 package com.google.common.escape; 16 17 import com.google.common.annotations.GwtCompatible; 18 import com.google.common.base.Function; 19 import com.google.errorprone.annotations.DoNotMock; 20 21 /** 22 * An object that converts literal text into a format safe for inclusion in a particular context 23 * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the 24 * text is performed automatically by the relevant parser. 25 * 26 * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code 27 * "Foo<Bar>"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the 28 * resulting XML document is parsed, the parser API will return this text as the original literal 29 * string {@code "Foo<Bar>"}. 30 * 31 * <p>An {@code Escaper} instance is required to be stateless, and safe when used concurrently by 32 * multiple threads. 33 * 34 * <p>Because, in general, escaping operates on the code points of a string and not on its 35 * individual {@code char} values, it is not safe to assume that {@code escape(s)} is equivalent to 36 * {@code escape(s.substring(0, n)) + escape(s.substring(n))} for arbitrary {@code n}. This is 37 * because of the possibility of splitting a surrogate pair. The only case in which it is safe to 38 * escape strings and concatenate the results is if you can rule out this possibility, either by 39 * splitting an existing long string into short strings adaptively around {@linkplain 40 * Character#isHighSurrogate surrogate} {@linkplain Character#isLowSurrogate pairs}, or by starting 41 * with short strings already known to be free of unpaired surrogates. 42 * 43 * <p>The two primary implementations of this interface are {@link CharEscaper} and {@link 44 * UnicodeEscaper}. They are heavily optimized for performance and greatly simplify the task of 45 * implementing new escapers. It is strongly recommended that when implementing a new escaper you 46 * extend one of these classes. If you find that you are unable to achieve the desired behavior 47 * using either of these classes, please contact the Java libraries team for advice. 48 * 49 * <p>Popular escapers are defined as constants in classes like {@link 50 * com.google.common.html.HtmlEscapers} and {@link com.google.common.xml.XmlEscapers}. To create 51 * your own escapers, use {@link CharEscaperBuilder}, or extend {@code CharEscaper} or {@code 52 * UnicodeEscaper}. 53 * 54 * @author David Beaumont 55 * @since 15.0 56 */ 57 @DoNotMock("Use Escapers.nullEscaper() or another methods from the *Escapers classes") 58 @GwtCompatible 59 public abstract class Escaper { 60 // TODO(dbeaumont): evaluate custom implementations, considering package private constructor. 61 /** Constructor for use by subclasses. */ Escaper()62 protected Escaper() {} 63 64 /** 65 * Returns the escaped form of a given literal string. 66 * 67 * <p>Note that this method may treat input characters differently depending on the specific 68 * escaper implementation. 69 * 70 * <ul> 71 * <li>{@link UnicodeEscaper} handles <a href="http://en.wikipedia.org/wiki/UTF-16">UTF-16</a> 72 * correctly, including surrogate character pairs. If the input is badly formed the escaper 73 * should throw {@link IllegalArgumentException}. 74 * <li>{@link CharEscaper} handles Java characters independently and does not verify the input 75 * for well formed characters. A {@code CharEscaper} should not be used in situations where 76 * input is not guaranteed to be restricted to the Basic Multilingual Plane (BMP). 77 * </ul> 78 * 79 * @param string the literal string to be escaped 80 * @return the escaped form of {@code string} 81 * @throws NullPointerException if {@code string} is null 82 * @throws IllegalArgumentException if {@code string} contains badly formed UTF-16 or cannot be 83 * escaped for any other reason 84 */ escape(String string)85 public abstract String escape(String string); 86 87 private final Function<String, String> asFunction = 88 new Function<String, String>() { 89 @Override 90 public String apply(String from) { 91 return escape(from); 92 } 93 }; 94 95 /** Returns a {@link Function} that invokes {@link #escape(String)} on this escaper. */ asFunction()96 public final Function<String, String> asFunction() { 97 return asFunction; 98 } 99 } 100