• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 Jeff Ichnowski
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions
6 // are met:
7 //
8 //     * Redistributions of source code must retain the above
9 //       copyright notice, this list of conditions and the following
10 //       disclaimer.
11 //
12 //     * Redistributions in binary form must reproduce the above
13 //       copyright notice, this list of conditions and the following
14 //       disclaimer in the documentation and/or other materials
15 //       provided with the distribution.
16 //
17 //     * Neither the name of the OWASP nor the names of its
18 //       contributors may be used to endorse or promote products
19 //       derived from this software without specific prior written
20 //       permission.
21 //
22 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
27 // INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28 // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31 // STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
33 // OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 package org.owasp.encoder;
36 
37 import java.nio.CharBuffer;
38 import java.nio.charset.CoderResult;
39 
40 /**
41  * <p>This is the low-level encoding API.  For each flavor of encoding
42  * there is an instance of this class that performs the actual
43  * encoding.  Overriding and implementing Encoders outside of the
44  * OWASP Encoder's project is not currently supported.</p>
45  *
46  * <p>Unless otherwise documented, instances of these classes are
47  * thread-safe.  Encoders implementations do not generally carry
48  * state, and if they do the state will be flush with a call to {@link
49  * #encode(java.nio.CharBuffer, java.nio.CharBuffer, boolean)} with
50  * {@code endOfInput} set to {@code true}.</p>
51  *
52  * <p>To use an Encoder instance directly, repeatedly call {@link
53  * #encode(java.nio.CharBuffer, java.nio.CharBuffer, boolean)} with
54  * the {@code endOfInput} parameter set to {@code false} while there
55  * is (the possibility of) more input to encode.  Once there is no
56  * more input to encode, call {@link #encode(java.nio.CharBuffer,
57  * java.nio.CharBuffer, boolean)} with {@code endOfInput} set to
58  * {@code true} until the method returns {@link
59  * java.nio.charset.CoderResult#UNDERFLOW}.</p>
60  *
61  * <p>In general, this class is not expected to be needed directly.
62  * Use the {@link Encode} fluent interface for encoding Strings or
63  * {@link EncodedWriter} for large blocks of contextual encoding.</p>
64  *
65  * @author Jeff Ichnowski
66  * @see Encode
67  * @see EncodedWriter
68  */
69 public abstract class Encoder {
70     /**
71      * Hexadecimal conversion array.  Package private to prevent corruption.
72      */
73     static final char[] HEX = "0123456789abcdef".toCharArray();
74 
75     /**
76      * Bit-shift used for encoding values in hexadecimal.
77      */
78     static final int HEX_SHIFT = 4;
79 
80     /**
81      * Bit-mask used for encoding values in hexadecimal.
82      */
83     static final int HEX_MASK = 0xf;
84 
85     /**
86      * Package-private constructor to prevent having to support
87      * external implementations of this class.  This may be opened up
88      * in future releases.
89      */
Encoder()90     Encoder() {}
91 
92     /**
93      * <p>This is the kernel of encoding.  Currently only CharBuffers
94      * backed by arrays (i.e. {@link java.nio.CharBuffer#hasArray()}
95      * returns {@code true}) are supported.  <strong>Using a
96      * direct-mapped CharBuffer will result in an
97      * UnsupportedOperationException</strong>, though this behavior
98      * may change in future releases.</p>
99      *
100      * <p>This method should be called repeatedly while {@code
101      * endOfInput} set to {@code false} while there is more input.
102      * Once there is no more input, this method should be called
103      * {@code endOfInput} set to {@code false} until {@link
104      * java.nio.charset.CoderResult#UNDERFLOW} is returned.</p>
105      *
106      * <p>After any call to this method, except when {@code
107      * endOfInput} is {@code true} and the method returns {@code
108      * UNDERFLOW}, there may be characters left to encode in the
109      * {@code input} buffer (i.e. {@code input.hasRemaining() ==
110      * true}).  This will happen when the encoder needs to see more
111      * input before determining what to do--for example when encoding
112      * for CDATA, if the input ends with {@code "foo]]"}, the encoder
113      * will need to see the next character to determine if it is a "&gt;"
114      * or not.</p>
115      *
116      * <p>Example usage:</p>
117      * <pre>
118      *   CharBuffer input = CharBuffer.allocate(1024);
119      *   CharBuffer output = CharBuffer.allocate(1024);
120      *   CoderResult cr;
121      *   // assuming doRead fills in the input buffer or
122      *   // returns -1 at end of input
123      *   while(doRead(input) != -1) {
124      *     input.flip();
125      *     for (;;) {
126      *       cr = encoder.encode(input, output, false);
127      *       if (cr.isUnderflow()) {
128      *         break;
129      *       }
130      *       if (cr.isOverflow()) {
131      *         // assuming doWrite flushes the encoded
132      *         // characters somewhere.
133      *         output.flip();
134      *         doWrite(output);
135      *         output.compact();
136      *       }
137      *     }
138      *     input.compact();
139      *   }
140      *
141      *   // at end of input
142      *   input.flip();
143      *   do {
144      *     cr = encoder.encode(input, output, true);
145      *     output.flip();
146      *     doWrite(output);
147      *     output.compact();
148      *   } while (cr.isOverflow());
149      * </pre>
150      *
151      * @param input the input buffer to encode
152      * @param output the output buffer to receive the encoded results
153      * @param endOfInput set to {@code true} if there is no more input, and any
154      * remaining characters at the end of input will either be encoded or
155      * replaced as invalid.
156      * @return Either {@link java.nio.charset.CoderResult#UNDERFLOW}
157      * or {@link java.nio.charset.CoderResult#OVERFLOW}.  No other
158      * CoderResult value will be returned.  Characters or sequences
159      * that might conceivably return and invalid or unmappable
160      * character result (as part of the nio Charset API) are
161      * automatically replaced to avoid security implications.
162      */
encode(CharBuffer input, CharBuffer output, boolean endOfInput)163     public CoderResult encode(CharBuffer input, CharBuffer output, boolean endOfInput) {
164         if (input.hasRemaining()) {
165             if (input.hasArray() && output.hasArray()) {
166                 return encodeArrays(input, output, endOfInput);
167             } else {
168                 return encodeBuffers(input, output, endOfInput);
169             }
170         } else {
171             return CoderResult.UNDERFLOW;
172         }
173     }
174 
175     /**
176      * The core encoding loop used when both the input and output buffers
177      * are array backed.  The loop is expected to fetch the arrays and
178      * interact with the arrays directly for performance.
179      *
180      * @param input the input buffer.
181      * @param output the output buffer.
182      * @param endOfInput when true, this is the last input to encode
183      * @return UNDERFLOW or OVERFLOW
184      */
encodeArrays(CharBuffer input, CharBuffer output, boolean endOfInput)185     CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean endOfInput) {
186         throw new UnsupportedOperationException();
187     }
188 
189     /**
190      * The core encoding loop used when either or both input and output
191      * buffers are NOT array-backed.  E.g. they are direct buffers or
192      * perhaps the input buffer is a read-only wrapper.  In any case,
193      * this method is not currently implemented by any of the encoder
194      * implementations since it is not expected to be common use-case.
195      * The stub is included here for completeness and to demarcate
196      * where the non-array-backed use-case would be included.
197      *
198      * @param input the input buffer.
199      * @param output the output buffer.
200      * @param endOfInput when true, this is the last input to encode
201      * @return never returns.
202      * @throws UnsupportedOperationException -- always
203      */
encodeBuffers(CharBuffer input, CharBuffer output, boolean endOfInput)204     CoderResult encodeBuffers(CharBuffer input, CharBuffer output, boolean endOfInput)
205         throws UnsupportedOperationException
206     {
207         throw new UnsupportedOperationException();
208     }
209 
210     /**
211      * Returns the maximum encoded length (in chars) of an input sequence of
212      * {@code n} characters.
213      *
214      * @param n the number of characters of input
215      * @return the worst-case number of characters required to encode
216      */
maxEncodedLength(int n)217     abstract int maxEncodedLength(int n);
218 
219     /**
220      * Scans the input string for the first character index that requires
221      * encoding.  If the entire input does not require encoding then the
222      * length is returned.  This method is used by the Encode.forXYZ methods
223      * to return input strings unchanged when possible.
224      *
225      * @param input the input to check for encoding
226      * @param off the offset of the first character to check
227      * @param len the number of characters to check
228      * @return the index of the first character to encode.  The return value
229      * will be {@code off+len} if no characters in the input require encoding.
230      */
firstEncodedOffset(String input, int off, int len)231     abstract int firstEncodedOffset(String input, int off, int len);
232 
233     /**
234      * Internal helper method to properly position buffers after encoding up
235      * until an overflow.
236      *
237      * @param input the input buffer
238      * @param i the array offset in the input buffer (translated to position)
239      * @param output the output buffer
240      * @param j the array offset in the output buffer (translated to position)
241      * @return CoderResult.OVERFLOW
242      */
overflow(CharBuffer input, int i, CharBuffer output, int j)243     static CoderResult overflow(CharBuffer input, int i, CharBuffer output, int j) {
244         input.position(i - input.arrayOffset());
245         output.position(j - output.arrayOffset());
246         return CoderResult.OVERFLOW;
247     }
248 
249     /**
250      * Internal helper method to properly position buffers after encoding up
251      * until an underflow.
252      *
253      * @param input the input buffer
254      * @param i the array offset in the input buffer (translated to position)
255      * @param output the output buffer
256      * @param j the array offset in the output buffer (translated to position)
257      * @return CoderResult.UNDERFLOW
258      */
underflow(CharBuffer input, int i, CharBuffer output, int j)259     static CoderResult underflow(CharBuffer input, int i, CharBuffer output, int j) {
260         input.position(i - input.arrayOffset());
261         output.position(j - output.arrayOffset());
262         return CoderResult.UNDERFLOW;
263     }
264 }
265