1 // Copyright (c) 2012 Jeff Ichnowski 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions 6 // are met: 7 // 8 // * Redistributions of source code must retain the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer. 11 // 12 // * Redistributions in binary form must reproduce the above 13 // copyright notice, this list of conditions and the following 14 // disclaimer in the documentation and/or other materials 15 // provided with the distribution. 16 // 17 // * Neither the name of the OWASP nor the names of its 18 // contributors may be used to endorse or promote products 19 // derived from this software without specific prior written 20 // permission. 21 // 22 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 27 // INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 28 // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 31 // STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 33 // OF THE POSSIBILITY OF SUCH DAMAGE. 34 35 package org.owasp.encoder; 36 37 import java.nio.CharBuffer; 38 import java.nio.charset.CoderResult; 39 40 /** 41 * <p>This is the low-level encoding API. For each flavor of encoding 42 * there is an instance of this class that performs the actual 43 * encoding. Overriding and implementing Encoders outside of the 44 * OWASP Encoder's project is not currently supported.</p> 45 * 46 * <p>Unless otherwise documented, instances of these classes are 47 * thread-safe. Encoders implementations do not generally carry 48 * state, and if they do the state will be flush with a call to {@link 49 * #encode(java.nio.CharBuffer, java.nio.CharBuffer, boolean)} with 50 * {@code endOfInput} set to {@code true}.</p> 51 * 52 * <p>To use an Encoder instance directly, repeatedly call {@link 53 * #encode(java.nio.CharBuffer, java.nio.CharBuffer, boolean)} with 54 * the {@code endOfInput} parameter set to {@code false} while there 55 * is (the possibility of) more input to encode. Once there is no 56 * more input to encode, call {@link #encode(java.nio.CharBuffer, 57 * java.nio.CharBuffer, boolean)} with {@code endOfInput} set to 58 * {@code true} until the method returns {@link 59 * java.nio.charset.CoderResult#UNDERFLOW}.</p> 60 * 61 * <p>In general, this class is not expected to be needed directly. 62 * Use the {@link Encode} fluent interface for encoding Strings or 63 * {@link EncodedWriter} for large blocks of contextual encoding.</p> 64 * 65 * @author Jeff Ichnowski 66 * @see Encode 67 * @see EncodedWriter 68 */ 69 public abstract class Encoder { 70 /** 71 * Hexadecimal conversion array. Package private to prevent corruption. 72 */ 73 static final char[] HEX = "0123456789abcdef".toCharArray(); 74 75 /** 76 * Bit-shift used for encoding values in hexadecimal. 77 */ 78 static final int HEX_SHIFT = 4; 79 80 /** 81 * Bit-mask used for encoding values in hexadecimal. 82 */ 83 static final int HEX_MASK = 0xf; 84 85 /** 86 * Package-private constructor to prevent having to support 87 * external implementations of this class. This may be opened up 88 * in future releases. 89 */ Encoder()90 Encoder() {} 91 92 /** 93 * <p>This is the kernel of encoding. Currently only CharBuffers 94 * backed by arrays (i.e. {@link java.nio.CharBuffer#hasArray()} 95 * returns {@code true}) are supported. <strong>Using a 96 * direct-mapped CharBuffer will result in an 97 * UnsupportedOperationException</strong>, though this behavior 98 * may change in future releases.</p> 99 * 100 * <p>This method should be called repeatedly while {@code 101 * endOfInput} set to {@code false} while there is more input. 102 * Once there is no more input, this method should be called 103 * {@code endOfInput} set to {@code false} until {@link 104 * java.nio.charset.CoderResult#UNDERFLOW} is returned.</p> 105 * 106 * <p>After any call to this method, except when {@code 107 * endOfInput} is {@code true} and the method returns {@code 108 * UNDERFLOW}, there may be characters left to encode in the 109 * {@code input} buffer (i.e. {@code input.hasRemaining() == 110 * true}). This will happen when the encoder needs to see more 111 * input before determining what to do--for example when encoding 112 * for CDATA, if the input ends with {@code "foo]]"}, the encoder 113 * will need to see the next character to determine if it is a ">" 114 * or not.</p> 115 * 116 * <p>Example usage:</p> 117 * <pre> 118 * CharBuffer input = CharBuffer.allocate(1024); 119 * CharBuffer output = CharBuffer.allocate(1024); 120 * CoderResult cr; 121 * // assuming doRead fills in the input buffer or 122 * // returns -1 at end of input 123 * while(doRead(input) != -1) { 124 * input.flip(); 125 * for (;;) { 126 * cr = encoder.encode(input, output, false); 127 * if (cr.isUnderflow()) { 128 * break; 129 * } 130 * if (cr.isOverflow()) { 131 * // assuming doWrite flushes the encoded 132 * // characters somewhere. 133 * output.flip(); 134 * doWrite(output); 135 * output.compact(); 136 * } 137 * } 138 * input.compact(); 139 * } 140 * 141 * // at end of input 142 * input.flip(); 143 * do { 144 * cr = encoder.encode(input, output, true); 145 * output.flip(); 146 * doWrite(output); 147 * output.compact(); 148 * } while (cr.isOverflow()); 149 * </pre> 150 * 151 * @param input the input buffer to encode 152 * @param output the output buffer to receive the encoded results 153 * @param endOfInput set to {@code true} if there is no more input, and any 154 * remaining characters at the end of input will either be encoded or 155 * replaced as invalid. 156 * @return Either {@link java.nio.charset.CoderResult#UNDERFLOW} 157 * or {@link java.nio.charset.CoderResult#OVERFLOW}. No other 158 * CoderResult value will be returned. Characters or sequences 159 * that might conceivably return and invalid or unmappable 160 * character result (as part of the nio Charset API) are 161 * automatically replaced to avoid security implications. 162 */ encode(CharBuffer input, CharBuffer output, boolean endOfInput)163 public CoderResult encode(CharBuffer input, CharBuffer output, boolean endOfInput) { 164 if (input.hasRemaining()) { 165 if (input.hasArray() && output.hasArray()) { 166 return encodeArrays(input, output, endOfInput); 167 } else { 168 return encodeBuffers(input, output, endOfInput); 169 } 170 } else { 171 return CoderResult.UNDERFLOW; 172 } 173 } 174 175 /** 176 * The core encoding loop used when both the input and output buffers 177 * are array backed. The loop is expected to fetch the arrays and 178 * interact with the arrays directly for performance. 179 * 180 * @param input the input buffer. 181 * @param output the output buffer. 182 * @param endOfInput when true, this is the last input to encode 183 * @return UNDERFLOW or OVERFLOW 184 */ encodeArrays(CharBuffer input, CharBuffer output, boolean endOfInput)185 CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean endOfInput) { 186 throw new UnsupportedOperationException(); 187 } 188 189 /** 190 * The core encoding loop used when either or both input and output 191 * buffers are NOT array-backed. E.g. they are direct buffers or 192 * perhaps the input buffer is a read-only wrapper. In any case, 193 * this method is not currently implemented by any of the encoder 194 * implementations since it is not expected to be common use-case. 195 * The stub is included here for completeness and to demarcate 196 * where the non-array-backed use-case would be included. 197 * 198 * @param input the input buffer. 199 * @param output the output buffer. 200 * @param endOfInput when true, this is the last input to encode 201 * @return never returns. 202 * @throws UnsupportedOperationException -- always 203 */ encodeBuffers(CharBuffer input, CharBuffer output, boolean endOfInput)204 CoderResult encodeBuffers(CharBuffer input, CharBuffer output, boolean endOfInput) 205 throws UnsupportedOperationException 206 { 207 throw new UnsupportedOperationException(); 208 } 209 210 /** 211 * Returns the maximum encoded length (in chars) of an input sequence of 212 * {@code n} characters. 213 * 214 * @param n the number of characters of input 215 * @return the worst-case number of characters required to encode 216 */ maxEncodedLength(int n)217 abstract int maxEncodedLength(int n); 218 219 /** 220 * Scans the input string for the first character index that requires 221 * encoding. If the entire input does not require encoding then the 222 * length is returned. This method is used by the Encode.forXYZ methods 223 * to return input strings unchanged when possible. 224 * 225 * @param input the input to check for encoding 226 * @param off the offset of the first character to check 227 * @param len the number of characters to check 228 * @return the index of the first character to encode. The return value 229 * will be {@code off+len} if no characters in the input require encoding. 230 */ firstEncodedOffset(String input, int off, int len)231 abstract int firstEncodedOffset(String input, int off, int len); 232 233 /** 234 * Internal helper method to properly position buffers after encoding up 235 * until an overflow. 236 * 237 * @param input the input buffer 238 * @param i the array offset in the input buffer (translated to position) 239 * @param output the output buffer 240 * @param j the array offset in the output buffer (translated to position) 241 * @return CoderResult.OVERFLOW 242 */ overflow(CharBuffer input, int i, CharBuffer output, int j)243 static CoderResult overflow(CharBuffer input, int i, CharBuffer output, int j) { 244 input.position(i - input.arrayOffset()); 245 output.position(j - output.arrayOffset()); 246 return CoderResult.OVERFLOW; 247 } 248 249 /** 250 * Internal helper method to properly position buffers after encoding up 251 * until an underflow. 252 * 253 * @param input the input buffer 254 * @param i the array offset in the input buffer (translated to position) 255 * @param output the output buffer 256 * @param j the array offset in the output buffer (translated to position) 257 * @return CoderResult.UNDERFLOW 258 */ underflow(CharBuffer input, int i, CharBuffer output, int j)259 static CoderResult underflow(CharBuffer input, int i, CharBuffer output, int j) { 260 input.position(i - input.arrayOffset()); 261 output.position(j - output.arrayOffset()); 262 return CoderResult.UNDERFLOW; 263 } 264 } 265