• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2006 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5  * in compliance with the License. You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software distributed under the License
10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing permissions and limitations under
12  * the License.
13  */
14 
15 package com.google.common.escape;
16 
17 import static com.google.common.base.Preconditions.checkNotNull;
18 
19 import com.google.common.annotations.Beta;
20 import com.google.common.annotations.GwtCompatible;
21 
22 /**
23  * An object that converts literal text into a format safe for inclusion in a particular context
24  * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the
25  * text is performed automatically by the relevant parser.
26  *
27  * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code
28  * "Foo&lt;Bar&gt;"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the
29  * resulting XML document is parsed, the parser API will return this text as the original literal
30  * string {@code "Foo<Bar>"}.
31  *
32  * <p>A {@code CharEscaper} instance is required to be stateless, and safe when used concurrently by
33  * multiple threads.
34  *
35  * <p>Popular escapers are defined as constants in classes like {@link
36  * com.google.common.html.HtmlEscapers} and {@link com.google.common.xml.XmlEscapers}. To create
37  * your own escapers extend this class and implement the {@link #escape(char)} method.
38  *
39  * @author Sven Mawson
40  * @since 15.0
41  */
42 @Beta
43 @GwtCompatible
44 public abstract class CharEscaper extends Escaper {
45   /** Constructor for use by subclasses. */
CharEscaper()46   protected CharEscaper() {}
47 
48   /**
49    * Returns the escaped form of a given literal string.
50    *
51    * @param string the literal string to be escaped
52    * @return the escaped form of {@code string}
53    * @throws NullPointerException if {@code string} is null
54    */
55   @Override
escape(String string)56   public String escape(String string) {
57     checkNotNull(string); // GWT specific check (do not optimize)
58     // Inlineable fast-path loop which hands off to escapeSlow() only if needed
59     int length = string.length();
60     for (int index = 0; index < length; index++) {
61       if (escape(string.charAt(index)) != null) {
62         return escapeSlow(string, index);
63       }
64     }
65     return string;
66   }
67 
68   /**
69    * Returns the escaped form of the given character, or {@code null} if this character does not
70    * need to be escaped. If an empty array is returned, this effectively strips the input character
71    * from the resulting text.
72    *
73    * <p>If the character does not need to be escaped, this method should return {@code null}, rather
74    * than a one-character array containing the character itself. This enables the escaping algorithm
75    * to perform more efficiently.
76    *
77    * <p>An escaper is expected to be able to deal with any {@code char} value, so this method should
78    * not throw any exceptions.
79    *
80    * @param c the character to escape if necessary
81    * @return the replacement characters, or {@code null} if no escaping was needed
82    */
escape(char c)83   protected abstract char[] escape(char c);
84 
85   /**
86    * Returns the escaped form of a given literal string, starting at the given index. This method is
87    * called by the {@link #escape(String)} method when it discovers that escaping is required. It is
88    * protected to allow subclasses to override the fastpath escaping function to inline their
89    * escaping test. See {@link CharEscaperBuilder} for an example usage.
90    *
91    * @param s the literal string to be escaped
92    * @param index the index to start escaping from
93    * @return the escaped form of {@code string}
94    * @throws NullPointerException if {@code string} is null
95    */
escapeSlow(String s, int index)96   protected final String escapeSlow(String s, int index) {
97     int slen = s.length();
98 
99     // Get a destination buffer and setup some loop variables.
100     char[] dest = Platform.charBufferFromThreadLocal();
101     int destSize = dest.length;
102     int destIndex = 0;
103     int lastEscape = 0;
104 
105     // Loop through the rest of the string, replacing when needed into the
106     // destination buffer, which gets grown as needed as well.
107     for (; index < slen; index++) {
108 
109       // Get a replacement for the current character.
110       char[] r = escape(s.charAt(index));
111 
112       // If no replacement is needed, just continue.
113       if (r == null) {
114         continue;
115       }
116 
117       int rlen = r.length;
118       int charsSkipped = index - lastEscape;
119 
120       // This is the size needed to add the replacement, not the full size
121       // needed by the string. We only regrow when we absolutely must, and
122       // when we do grow, grow enough to avoid excessive growing. Grow.
123       int sizeNeeded = destIndex + charsSkipped + rlen;
124       if (destSize < sizeNeeded) {
125         destSize = sizeNeeded + DEST_PAD_MULTIPLIER * (slen - index);
126         dest = growBuffer(dest, destIndex, destSize);
127       }
128 
129       // If we have skipped any characters, we need to copy them now.
130       if (charsSkipped > 0) {
131         s.getChars(lastEscape, index, dest, destIndex);
132         destIndex += charsSkipped;
133       }
134 
135       // Copy the replacement string into the dest buffer as needed.
136       if (rlen > 0) {
137         System.arraycopy(r, 0, dest, destIndex, rlen);
138         destIndex += rlen;
139       }
140       lastEscape = index + 1;
141     }
142 
143     // Copy leftover characters if there are any.
144     int charsLeft = slen - lastEscape;
145     if (charsLeft > 0) {
146       int sizeNeeded = destIndex + charsLeft;
147       if (destSize < sizeNeeded) {
148 
149         // Regrow and copy, expensive! No padding as this is the final copy.
150         dest = growBuffer(dest, destIndex, sizeNeeded);
151       }
152       s.getChars(lastEscape, slen, dest, destIndex);
153       destIndex = sizeNeeded;
154     }
155     return new String(dest, 0, destIndex);
156   }
157 
158   /**
159    * Helper method to grow the character buffer as needed, this only happens once in a while so it's
160    * ok if it's in a method call. If the index passed in is 0 then no copying will be done.
161    */
growBuffer(char[] dest, int index, int size)162   private static char[] growBuffer(char[] dest, int index, int size) {
163     if (size < 0) { // overflow - should be OutOfMemoryError but GWT/j2cl don't support it
164       throw new AssertionError("Cannot increase internal buffer any further");
165     }
166     char[] copy = new char[size];
167     if (index > 0) {
168       System.arraycopy(dest, 0, copy, 0, index);
169     }
170     return copy;
171   }
172 
173   /** The multiplier for padding to use when growing the escape buffer. */
174   private static final int DEST_PAD_MULTIPLIER = 2;
175 }
176