1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 17 package libcore.net; 18 19 import java.io.ByteArrayOutputStream; 20 import java.net.URISyntaxException; 21 import java.nio.ByteBuffer; 22 import java.nio.CharBuffer; 23 import java.nio.charset.CharacterCodingException; 24 import java.nio.charset.Charset; 25 import java.nio.charset.CharsetDecoder; 26 import java.nio.charset.CharsetEncoder; 27 import java.nio.charset.CoderResult; 28 import java.nio.charset.CodingErrorAction; 29 import java.nio.charset.StandardCharsets; 30 31 /** 32 * Encodes and decodes “application/x-www-form-urlencoded” content. 33 * 34 * Subclasses define “isRetained”, which decides which chars need to be escaped and which don’t. 35 * Output is encoded as UTF-8 by default. I.e, each character (or surrogate pair) is converted to 36 * its equivalent UTF-8 encoded byte sequence, which is then converted to it’s escaped form. 37 * e.g a 4 byte sequence might look like” %c6%ef%e0%e8” 38 */ 39 public abstract class UriCodec { 40 /** 41 * Returns true iff. ‘c’ does not need to be escaped. 42 * 'a’ - ‘z’ , ‘A’ - ‘Z’ and ‘0’ - ‘9’ are always considered valid (i.e, don’t need to be 43 * escaped. This set is referred to as the ``whitelist''. 44 */ isRetained(char c)45 protected abstract boolean isRetained(char c); 46 isWhitelisted(char c)47 private static boolean isWhitelisted(char c) { 48 return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9'); 49 } 50 isWhitelistedOrRetained(char c)51 private boolean isWhitelistedOrRetained(char c) { 52 return isWhitelisted(c) || isRetained(c); 53 } 54 55 /** 56 * Throw URISyntaxException if any of the characters in the range [start, end) are not valid 57 * according to this codec. 58 * - If a char is in the whitelist or retained, it is valid both escaped and unescaped. 59 * - All escaped octets appearing in the input are structurally valid hex, i.e convertible to 60 * decimals. 61 * 62 * On success, the substring [start, end) is returned. 63 * {@code name} is not used, except to generate debugging info. 64 */ validate(String uri, int start, int end, String name)65 public final String validate(String uri, int start, int end, String name) 66 throws URISyntaxException { 67 int i = start; 68 while (i < end) { 69 char c = uri.charAt(i++); 70 if (isWhitelistedOrRetained(c)) { 71 continue; 72 } 73 // c is either '%' or character not allowed in a uri. 74 if (c != '%') { 75 throw unexpectedCharacterException(uri, name, c, i - 1); 76 } 77 // Expect two characters representing a number in hex. 78 for (int j = 0; j < 2; j++) { 79 c = getNextCharacter(uri, i++, end, name); 80 if (hexCharToValue(c) < 0) { 81 throw unexpectedCharacterException(uri, name, c, i - 1); 82 } 83 } 84 } 85 return uri.substring(start, end); 86 } 87 88 /** 89 * Interprets a char as hex digits, returning a number from -1 (invalid char) to 15 ('f'). 90 */ hexCharToValue(char c)91 private static int hexCharToValue(char c) { 92 if('0' <= c && c <= '9') { 93 return c - '0'; 94 } 95 if ('a' <= c && c <= 'f') { 96 return 10 + c - 'a'; 97 } 98 if ('A' <= c && c <= 'F') { 99 return 10 + c - 'A'; 100 } 101 return -1; 102 } 103 unexpectedCharacterException( String uri, String name, char unexpected, int index)104 private static URISyntaxException unexpectedCharacterException( 105 String uri, String name, char unexpected, int index) { 106 String nameString = (name == null) ? "" : " in [" + name + "]"; 107 return new URISyntaxException( 108 uri, "Unexpected character" + nameString + ": " + unexpected, index); 109 } 110 getNextCharacter(String uri, int index, int end, String name)111 private static char getNextCharacter(String uri, int index, int end, String name) 112 throws URISyntaxException { 113 if (index >= end) { 114 String nameString = (name == null) ? "" : " in [" + name + "]"; 115 throw new URISyntaxException( 116 uri, "Unexpected end of string" + nameString, index); 117 } 118 return uri.charAt(index); 119 } 120 121 /** 122 * Throws {@link URISyntaxException} if any character in {@code uri} is neither whitelisted nor 123 * in {@code legal}. 124 */ validateSimple(String uri, String legal)125 public static void validateSimple(String uri, String legal) throws URISyntaxException { 126 for (int i = 0; i < uri.length(); i++) { 127 char c = uri.charAt(i); 128 if (!isWhitelisted(c) && legal.indexOf(c) < 0) { 129 throw unexpectedCharacterException(uri, null /* name */, c, i); 130 } 131 } 132 } 133 134 /** 135 * Encodes the string {@code s} as per the rules of this encoder (see class level comment). 136 * 137 * @throws IllegalArgumentException if the encoder is unable to encode a sequence of bytes. 138 */ encode(String s, Charset charset)139 public final String encode(String s, Charset charset) { 140 StringBuilder builder = new StringBuilder(s.length()); 141 appendEncoded(builder, s, charset, false); 142 return builder.toString(); 143 } 144 145 /** 146 * Encodes the string {@code s} as per the rules of this encoder (see class level comment). 147 * 148 * Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8). 149 */ appendEncoded(StringBuilder builder, String s)150 public final void appendEncoded(StringBuilder builder, String s) { 151 appendEncoded(builder, s, StandardCharsets.UTF_8, false); 152 } 153 154 /** 155 * Encodes the string {@code s} as per the rules of this encoder (see class level comment). 156 * 157 * Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8). 158 * This method must produce partially encoded output. What this means is that if encoded octets 159 * appear in the input string, they are passed through unmodified, instead of being double 160 * escaped. Consider a decoder operating on the global whitelist dealing with a string 161 * “foo%25bar”. With this method, the output will be “foo%25bar”, but with appendEncoded, it 162 * will be double encoded into “foo%2525bar”. 163 */ appendPartiallyEncoded(StringBuilder builder, String s)164 public final void appendPartiallyEncoded(StringBuilder builder, String s) { 165 appendEncoded(builder, s, StandardCharsets.UTF_8, true); 166 } 167 appendEncoded( StringBuilder builder, String s, Charset charset, boolean partiallyEncoded)168 private void appendEncoded( 169 StringBuilder builder, String s, Charset charset, boolean partiallyEncoded) { 170 CharsetEncoder encoder = charset.newEncoder() 171 .onMalformedInput(CodingErrorAction.REPORT) 172 .onUnmappableCharacter(CodingErrorAction.REPORT); 173 CharBuffer cBuffer = CharBuffer.allocate(s.length()); 174 for (int i = 0; i < s.length(); i++) { 175 char c = s.charAt(i); 176 if (c == '%' && partiallyEncoded) { 177 // In case there are characters waiting to be encoded. 178 flushEncodingCharBuffer(builder, encoder, cBuffer); 179 builder.append('%'); 180 continue; 181 } 182 183 if (c == ' ' && isRetained(' ')) { 184 flushEncodingCharBuffer(builder, encoder, cBuffer); 185 builder.append('+'); 186 continue; 187 } 188 189 if (isWhitelistedOrRetained(c)) { 190 flushEncodingCharBuffer(builder, encoder, cBuffer); 191 builder.append(c); 192 continue; 193 } 194 195 // Put the character in the queue for encoding. 196 cBuffer.put(c); 197 } 198 flushEncodingCharBuffer(builder, encoder, cBuffer); 199 } 200 flushEncodingCharBuffer( StringBuilder builder, CharsetEncoder encoder, CharBuffer cBuffer)201 private static void flushEncodingCharBuffer( 202 StringBuilder builder, 203 CharsetEncoder encoder, 204 CharBuffer cBuffer) { 205 if (cBuffer.position() == 0) { 206 return; 207 } 208 // We are reading from the buffer now. 209 cBuffer.flip(); 210 ByteBuffer byteBuffer = ByteBuffer.allocate( 211 cBuffer.remaining() * (int) Math.ceil(encoder.maxBytesPerChar())); 212 byteBuffer.position(0); 213 CoderResult result = encoder.encode(cBuffer, byteBuffer, true /* endOfInput */); 214 // According to the {@code CharsetEncoder#encode} spec, the method returns underflow 215 // and leaves an empty output when all bytes were processed correctly. 216 if (result != CoderResult.UNDERFLOW) { 217 throw new IllegalArgumentException( 218 "Error encoding, unexpected result [" 219 + result.toString() 220 + "] using encoder for [" 221 + encoder.charset().name() 222 + "]"); 223 } 224 if (cBuffer.hasRemaining()) { 225 throw new IllegalArgumentException( 226 "Encoder for [" + encoder.charset().name() + "] failed with underflow with " 227 + "remaining input [" + cBuffer + "]"); 228 } 229 // Need to flush in case the encoder saves internal state. 230 encoder.flush(byteBuffer); 231 if (result != CoderResult.UNDERFLOW) { 232 throw new IllegalArgumentException( 233 "Error encoding, unexpected result [" 234 + result.toString() 235 + "] flushing encoder for [" 236 + encoder.charset().name() 237 + "]"); 238 } 239 encoder.reset(); 240 241 byteBuffer.flip(); 242 // Write the encoded bytes. 243 while(byteBuffer.hasRemaining()) { 244 byte b = byteBuffer.get(); 245 builder.append('%'); 246 builder.append(intToHexDigit((b & 0xf0) >>> 4)); 247 builder.append(intToHexDigit(b & 0x0f)); 248 249 } 250 // Use the character buffer to write again. 251 cBuffer.flip(); 252 cBuffer.limit(cBuffer.capacity()); 253 } 254 intToHexDigit(int b)255 private static char intToHexDigit(int b) { 256 if (b < 10) { 257 return (char) ('0' + b); 258 } else { 259 return (char) ('A' + b - 10); 260 } 261 } 262 263 /** 264 * Decode a string according to the rules of this decoder. 265 * 266 * - if {@code convertPlus == true} all ‘+’ chars in the decoded output are converted to ‘ ‘ 267 * (white space) 268 * - if {@code throwOnFailure == true}, an {@link IllegalArgumentException} is thrown for 269 * invalid inputs. Else, U+FFFd is emitted to the output in place of invalid input octets. 270 */ decode( String s, boolean convertPlus, Charset charset, boolean throwOnFailure)271 public static String decode( 272 String s, boolean convertPlus, Charset charset, boolean throwOnFailure) { 273 StringBuilder builder = new StringBuilder(s.length()); 274 appendDecoded(builder, s, convertPlus, charset, throwOnFailure); 275 return builder.toString(); 276 } 277 278 /** 279 * Character to be output when there's an error decoding an input. 280 */ 281 private static final char INVALID_INPUT_CHARACTER = '\ufffd'; 282 appendDecoded( StringBuilder builder, String s, boolean convertPlus, Charset charset, boolean throwOnFailure)283 private static void appendDecoded( 284 StringBuilder builder, 285 String s, 286 boolean convertPlus, 287 Charset charset, 288 boolean throwOnFailure) { 289 CharsetDecoder decoder = charset.newDecoder() 290 .onMalformedInput(CodingErrorAction.REPLACE) 291 .replaceWith("\ufffd") 292 .onUnmappableCharacter(CodingErrorAction.REPORT); 293 // Holds the bytes corresponding to the escaped chars being read (empty if the last char 294 // wasn't a escaped char). 295 ByteBuffer byteBuffer = ByteBuffer.allocate(s.length()); 296 int i = 0; 297 while (i < s.length()) { 298 char c = s.charAt(i); 299 i++; 300 switch (c) { 301 case '+': 302 flushDecodingByteAccumulator( 303 builder, decoder, byteBuffer, throwOnFailure); 304 builder.append(convertPlus ? ' ' : '+'); 305 break; 306 case '%': 307 // Expect two characters representing a number in hex. 308 byte hexValue = 0; 309 for (int j = 0; j < 2; j++) { 310 try { 311 c = getNextCharacter(s, i, s.length(), null /* name */); 312 } catch (URISyntaxException e) { 313 // Unexpected end of input. 314 if (throwOnFailure) { 315 throw new IllegalArgumentException(e); 316 } else { 317 flushDecodingByteAccumulator( 318 builder, decoder, byteBuffer, throwOnFailure); 319 builder.append(INVALID_INPUT_CHARACTER); 320 return; 321 } 322 } 323 i++; 324 int newDigit = hexCharToValue(c); 325 if (newDigit < 0) { 326 if (throwOnFailure) { 327 throw new IllegalArgumentException( 328 unexpectedCharacterException(s, null /* name */, c, i - 1)); 329 } else { 330 flushDecodingByteAccumulator( 331 builder, decoder, byteBuffer, throwOnFailure); 332 builder.append(INVALID_INPUT_CHARACTER); 333 break; 334 } 335 } 336 hexValue = (byte) (hexValue * 0x10 + newDigit); 337 } 338 byteBuffer.put(hexValue); 339 break; 340 default: 341 flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure); 342 builder.append(c); 343 } 344 } 345 flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure); 346 } 347 flushDecodingByteAccumulator( StringBuilder builder, CharsetDecoder decoder, ByteBuffer byteBuffer, boolean throwOnFailure)348 private static void flushDecodingByteAccumulator( 349 StringBuilder builder, 350 CharsetDecoder decoder, 351 ByteBuffer byteBuffer, 352 boolean throwOnFailure) { 353 if (byteBuffer.position() == 0) { 354 return; 355 } 356 byteBuffer.flip(); 357 try { 358 builder.append(decoder.decode(byteBuffer)); 359 } catch (CharacterCodingException e) { 360 if (throwOnFailure) { 361 throw new IllegalArgumentException(e); 362 } else { 363 builder.append(INVALID_INPUT_CHARACTER); 364 } 365 } finally { 366 // Use the byte buffer to write again. 367 byteBuffer.flip(); 368 byteBuffer.limit(byteBuffer.capacity()); 369 } 370 } 371 372 /** 373 * Equivalent to {@code decode(s, false, UTF_8, true)} 374 */ decode(String s)375 public static String decode(String s) { 376 return decode( 377 s, false /* convertPlus */, StandardCharsets.UTF_8, true /* throwOnFailure */); 378 } 379 }