1 /* 2 * Copyright (C) 2006 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 * in compliance with the License. You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software distributed under the License 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 * or implied. See the License for the specific language governing permissions and limitations under 12 * the License. 13 */ 14 15 package com.google.common.escape; 16 17 import static com.google.common.base.Preconditions.checkNotNull; 18 19 import com.google.common.annotations.Beta; 20 import com.google.common.annotations.GwtCompatible; 21 22 /** 23 * An object that converts literal text into a format safe for inclusion in a particular context 24 * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the 25 * text is performed automatically by the relevant parser. 26 * 27 * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code 28 * "Foo<Bar>"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the 29 * resulting XML document is parsed, the parser API will return this text as the original literal 30 * string {@code "Foo<Bar>"}. 31 * 32 * <p>A {@code CharEscaper} instance is required to be stateless, and safe when used concurrently by 33 * multiple threads. 34 * 35 * <p>Popular escapers are defined as constants in classes like {@link 36 * com.google.common.html.HtmlEscapers} and {@link com.google.common.xml.XmlEscapers}. To create 37 * your own escapers extend this class and implement the {@link #escape(char)} method. 38 * 39 * @author Sven Mawson 40 * @since 15.0 41 */ 42 @Beta 43 @GwtCompatible 44 public abstract class CharEscaper extends Escaper { 45 /** Constructor for use by subclasses. */ CharEscaper()46 protected CharEscaper() {} 47 48 /** 49 * Returns the escaped form of a given literal string. 50 * 51 * @param string the literal string to be escaped 52 * @return the escaped form of {@code string} 53 * @throws NullPointerException if {@code string} is null 54 */ 55 @Override escape(String string)56 public String escape(String string) { 57 checkNotNull(string); // GWT specific check (do not optimize) 58 // Inlineable fast-path loop which hands off to escapeSlow() only if needed 59 int length = string.length(); 60 for (int index = 0; index < length; index++) { 61 if (escape(string.charAt(index)) != null) { 62 return escapeSlow(string, index); 63 } 64 } 65 return string; 66 } 67 68 /** 69 * Returns the escaped form of the given character, or {@code null} if this character does not 70 * need to be escaped. If an empty array is returned, this effectively strips the input character 71 * from the resulting text. 72 * 73 * <p>If the character does not need to be escaped, this method should return {@code null}, rather 74 * than a one-character array containing the character itself. This enables the escaping algorithm 75 * to perform more efficiently. 76 * 77 * <p>An escaper is expected to be able to deal with any {@code char} value, so this method should 78 * not throw any exceptions. 79 * 80 * @param c the character to escape if necessary 81 * @return the replacement characters, or {@code null} if no escaping was needed 82 */ escape(char c)83 protected abstract char[] escape(char c); 84 85 /** 86 * Returns the escaped form of a given literal string, starting at the given index. This method is 87 * called by the {@link #escape(String)} method when it discovers that escaping is required. It is 88 * protected to allow subclasses to override the fastpath escaping function to inline their 89 * escaping test. See {@link CharEscaperBuilder} for an example usage. 90 * 91 * @param s the literal string to be escaped 92 * @param index the index to start escaping from 93 * @return the escaped form of {@code string} 94 * @throws NullPointerException if {@code string} is null 95 */ escapeSlow(String s, int index)96 protected final String escapeSlow(String s, int index) { 97 int slen = s.length(); 98 99 // Get a destination buffer and setup some loop variables. 100 char[] dest = Platform.charBufferFromThreadLocal(); 101 int destSize = dest.length; 102 int destIndex = 0; 103 int lastEscape = 0; 104 105 // Loop through the rest of the string, replacing when needed into the 106 // destination buffer, which gets grown as needed as well. 107 for (; index < slen; index++) { 108 109 // Get a replacement for the current character. 110 char[] r = escape(s.charAt(index)); 111 112 // If no replacement is needed, just continue. 113 if (r == null) { 114 continue; 115 } 116 117 int rlen = r.length; 118 int charsSkipped = index - lastEscape; 119 120 // This is the size needed to add the replacement, not the full size 121 // needed by the string. We only regrow when we absolutely must, and 122 // when we do grow, grow enough to avoid excessive growing. Grow. 123 int sizeNeeded = destIndex + charsSkipped + rlen; 124 if (destSize < sizeNeeded) { 125 destSize = sizeNeeded + DEST_PAD_MULTIPLIER * (slen - index); 126 dest = growBuffer(dest, destIndex, destSize); 127 } 128 129 // If we have skipped any characters, we need to copy them now. 130 if (charsSkipped > 0) { 131 s.getChars(lastEscape, index, dest, destIndex); 132 destIndex += charsSkipped; 133 } 134 135 // Copy the replacement string into the dest buffer as needed. 136 if (rlen > 0) { 137 System.arraycopy(r, 0, dest, destIndex, rlen); 138 destIndex += rlen; 139 } 140 lastEscape = index + 1; 141 } 142 143 // Copy leftover characters if there are any. 144 int charsLeft = slen - lastEscape; 145 if (charsLeft > 0) { 146 int sizeNeeded = destIndex + charsLeft; 147 if (destSize < sizeNeeded) { 148 149 // Regrow and copy, expensive! No padding as this is the final copy. 150 dest = growBuffer(dest, destIndex, sizeNeeded); 151 } 152 s.getChars(lastEscape, slen, dest, destIndex); 153 destIndex = sizeNeeded; 154 } 155 return new String(dest, 0, destIndex); 156 } 157 158 /** 159 * Helper method to grow the character buffer as needed, this only happens once in a while so it's 160 * ok if it's in a method call. If the index passed in is 0 then no copying will be done. 161 */ growBuffer(char[] dest, int index, int size)162 private static char[] growBuffer(char[] dest, int index, int size) { 163 if (size < 0) { // overflow - should be OutOfMemoryError but GWT/j2cl don't support it 164 throw new AssertionError("Cannot increase internal buffer any further"); 165 } 166 char[] copy = new char[size]; 167 if (index > 0) { 168 System.arraycopy(dest, 0, copy, 0, index); 169 } 170 return copy; 171 } 172 173 /** The multiplier for padding to use when growing the escape buffer. */ 174 private static final int DEST_PAD_MULTIPLIER = 2; 175 } 176