• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 Jeff Ichnowski
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions
6 // are met:
7 //
8 //     * Redistributions of source code must retain the above
9 //       copyright notice, this list of conditions and the following
10 //       disclaimer.
11 //
12 //     * Redistributions in binary form must reproduce the above
13 //       copyright notice, this list of conditions and the following
14 //       disclaimer in the documentation and/or other materials
15 //       provided with the distribution.
16 //
17 //     * Neither the name of the OWASP nor the names of its
18 //       contributors may be used to endorse or promote products
19 //       derived from this software without specific prior written
20 //       permission.
21 //
22 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
27 // INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28 // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31 // STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
33 // OF THE POSSIBILITY OF SUCH DAMAGE.
34 package org.owasp.encoder;
35 
36 import java.nio.CharBuffer;
37 import java.nio.charset.CoderResult;
38 
39 /**
40  * JavaEncoder -- Encoder for Java based strings. Useful if in Java code
41  * generators to generate efficiently encoded strings for arbitrary data. This
42  * encoder uses the minimal sequence of characters required to encode a
43  * character (e.g. standard backslash escapes, such as "\n", "\\" , "\'", octal
44  * escapes, and unicode escapes). This encoder does NOT check UTF-16 surrogate
45  * pair sequences. The target output context supports mismatched UTF-16 pairs
46  * (e.g. it will compile, run, etc... with them).
47  *
48  * @author Jeff Ichnowski
49  */
50 class JavaEncoder extends Encoder {
51 
52     /**
53      * The length of a Unicode escape, e.g. "\\u1234".
54      */
55     static final int U_ESCAPE_LENGTH = 6;
56     /**
57      * The length of a octal escape sequence, e.g. "\377".
58      */
59     static final int OCT_ESCAPE_LENGTH = 4;
60     /**
61      * Number of bits to shift for each octal unit.
62      */
63     static final int OCT_SHIFT = 3;
64     /**
65      * The bit-mask for an octal unit.
66      */
67     static final int OCT_MASK = 7;
68 
69     @Override
maxEncodedLength(int n)70     protected int maxEncodedLength(int n) {
71         // "\\u####"
72         return n * U_ESCAPE_LENGTH;
73     }
74 
75     @Override
firstEncodedOffset(String input, int off, int len)76     protected int firstEncodedOffset(String input, int off, int len) {
77         final int n = off + len;
78         for (int i = off; i < n; ++i) {
79             char ch = input.charAt(i);
80             if (ch >= ' ' && ch <= '~') {
81                 if (ch == '\\' || ch == '\'' || ch == '\"') {
82                     return i;
83                 }
84             } else {
85                 return i;
86             }
87         }
88         return n;
89     }
90 
91     @Override
encodeArrays(CharBuffer input, CharBuffer output, boolean endOfInput)92     protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean endOfInput) {
93         final char[] in = input.array();
94         final char[] out = output.array();
95         int i = input.arrayOffset() + input.position();
96         final int n = input.arrayOffset() + input.limit();
97         int j = output.arrayOffset() + output.position();
98         final int m = output.arrayOffset() + output.limit();
99 
100         charLoop:
101         for (; i < n; ++i) {
102             final char ch = in[i];
103             if (ch >= ' ' && ch <= '~') {
104                 if (ch == '\\' || ch == '\'' || ch == '\"') {
105                     if (j + 1 >= m) {
106                         return overflow(input, i, output, j);
107                     }
108                     out[j++] = '\\';
109                     out[j++] = ch;
110                 } else {
111                     if (j >= m) {
112                         return overflow(input, i, output, j);
113                     }
114                     out[j++] = ch;
115                 }
116             } else {
117                 switch (ch) {
118                     case '\b':
119                         if (j + 1 >= m) {
120                             return overflow(input, i, output, j);
121                         }
122                         out[j++] = '\\';
123                         out[j++] = 'b';
124                         break;
125                     case '\t':
126                         if (j + 1 >= m) {
127                             return overflow(input, i, output, j);
128                         }
129                         out[j++] = '\\';
130                         out[j++] = 't';
131                         break;
132                     case '\n':
133                         if (j + 1 >= m) {
134                             return overflow(input, i, output, j);
135                         }
136                         out[j++] = '\\';
137                         out[j++] = 'n';
138                         break;
139                     case '\f':
140                         if (j + 1 >= m) {
141                             return overflow(input, i, output, j);
142                         }
143                         out[j++] = '\\';
144                         out[j++] = 'f';
145                         break;
146                     case '\r':
147                         if (j + 1 >= m) {
148                             return overflow(input, i, output, j);
149                         }
150                         out[j++] = '\\';
151                         out[j++] = 'r';
152                         break;
153                     default:
154                         if (ch <= '\377') {
155                             longEscapeNeeded:
156                             {
157                                 if (ch <= '\37') {
158                                     // "short" octal escapes: '\0' to '\37'
159                                     // cannot be followed by '0' to '7' thus
160                                     // require a lookahead to use.
161                                     if (i + 1 < n) {
162                                         char la = in[i + 1];
163                                         if ('0' <= la && la <= '7') {
164                                             break longEscapeNeeded;
165                                         }
166                                     } else if (!endOfInput) {
167                                         // need more characters to see if we can use
168                                         // a short octal escape.
169                                         break charLoop;
170                                     }
171 
172                                     if (ch <= '\7') {
173                                         if (j + 1 >= m) {
174                                             return overflow(input, i, output, j);
175                                         }
176                                         out[j++] = '\\';
177                                         out[j++] = (char) (ch + '0');
178                                     } else {
179                                         if (j + 2 >= m) {
180                                             return overflow(input, i, output, j);
181                                         }
182                                         out[j++] = '\\';
183                                         out[j++] = (char) ((ch >>> OCT_SHIFT) + '0');
184                                         out[j++] = (char) ((ch & OCT_MASK) + '0');
185                                     }
186 
187                                     continue;
188                                 }
189                             }
190 
191                             if (j + OCT_ESCAPE_LENGTH > m) {
192                                 return overflow(input, i, output, j);
193                             }
194                             out[j++] = '\\';
195                             out[j++] = (char) ((ch >>> 2 * OCT_SHIFT) + '0');
196                             out[j++] = (char) (((ch >>> OCT_SHIFT) & OCT_MASK) + '0');
197                             out[j++] = (char) ((ch & OCT_MASK) + '0');
198                         } else {
199                             if (j + U_ESCAPE_LENGTH > m) {
200                                 return overflow(input, i, output, j);
201                             }
202                             out[j++] = '\\';
203                             out[j++] = 'u';
204                             out[j++] = HEX[ch >>> 3 * HEX_SHIFT];
205                             out[j++] = HEX[(ch >>> 2 * HEX_SHIFT) & HEX_MASK];
206                             out[j++] = HEX[(ch >>> HEX_SHIFT) & HEX_MASK];
207                             out[j++] = HEX[ch & HEX_MASK];
208                         }
209                 }
210             }
211         }
212 
213         return underflow(input, i, output, j);
214     }
215 }
216