1 /* 2 * Copyright 2001-2004 The Apache Software Foundation. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package org.apache.commons.codec.binary; 18 19 import org.apache.commons.codec.BinaryDecoder; 20 import org.apache.commons.codec.BinaryEncoder; 21 import org.apache.commons.codec.DecoderException; 22 import org.apache.commons.codec.EncoderException; 23 24 /** 25 * Provides Base64 encoding and decoding as defined by RFC 2045. 26 * 27 * <p>This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> 28 * from RFC 2045 <cite>Multipurpose Internet Mail Extensions (MIME) Part One: 29 * Format of Internet Message Bodies</cite> by Freed and Borenstein.</p> 30 * 31 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a> 32 * @author Apache Software Foundation 33 * @since 1.0-dev 34 * @version $Id: Base64.java,v 1.20 2004/05/24 00:21:24 ggregory Exp $ 35 */ 36 public class Base64 implements BinaryEncoder, BinaryDecoder { 37 38 /** 39 * Chunk size per RFC 2045 section 6.8. 40 * 41 * <p>The {@value} character limit does not count the trailing CRLF, but counts 42 * all other characters, including any equal signs.</p> 43 * 44 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> 45 */ 46 static final int CHUNK_SIZE = 76; 47 48 /** 49 * Chunk separator per RFC 2045 section 2.1. 50 * 51 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> 52 */ 53 static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes(); 54 55 /** 56 * The base length. 57 */ 58 static final int BASELENGTH = 255; 59 60 /** 61 * Lookup length. 62 */ 63 static final int LOOKUPLENGTH = 64; 64 65 /** 66 * Used to calculate the number of bits in a byte. 67 */ 68 static final int EIGHTBIT = 8; 69 70 /** 71 * Used when encoding something which has fewer than 24 bits. 72 */ 73 static final int SIXTEENBIT = 16; 74 75 /** 76 * Used to determine how many bits data contains. 77 */ 78 static final int TWENTYFOURBITGROUP = 24; 79 80 /** 81 * Used to get the number of Quadruples. 82 */ 83 static final int FOURBYTE = 4; 84 85 /** 86 * Used to test the sign of a byte. 87 */ 88 static final int SIGN = -128; 89 90 /** 91 * Byte used to pad output. 92 */ 93 static final byte PAD = (byte) '='; 94 95 // Create arrays to hold the base64 characters and a 96 // lookup for base64 chars 97 private static byte[] base64Alphabet = new byte[BASELENGTH]; 98 private static byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH]; 99 100 // Populating the lookup and character arrays 101 static { 102 for (int i = 0; i < BASELENGTH; i++) { 103 base64Alphabet[i] = (byte) -1; 104 } 105 for (int i = 'Z'; i >= 'A'; i--) { 106 base64Alphabet[i] = (byte) (i - 'A'); 107 } 108 for (int i = 'z'; i >= 'a'; i--) { 109 base64Alphabet[i] = (byte) (i - 'a' + 26); 110 } 111 for (int i = '9'; i >= '0'; i--) { 112 base64Alphabet[i] = (byte) (i - '0' + 52); 113 } 114 115 base64Alphabet['+'] = 62; 116 base64Alphabet['/'] = 63; 117 118 for (int i = 0; i <= 25; i++) { 119 lookUpBase64Alphabet[i] = (byte) ('A' + i); 120 } 121 122 for (int i = 26, j = 0; i <= 51; i++, j++) { 123 lookUpBase64Alphabet[i] = (byte) ('a' + j); 124 } 125 126 for (int i = 52, j = 0; i <= 61; i++, j++) { 127 lookUpBase64Alphabet[i] = (byte) ('0' + j); 128 } 129 130 lookUpBase64Alphabet[62] = (byte) '+'; 131 lookUpBase64Alphabet[63] = (byte) '/'; 132 } 133 isBase64(byte octect)134 private static boolean isBase64(byte octect) { 135 if (octect == PAD) { 136 return true; 137 } else if (base64Alphabet[octect] == -1) { 138 return false; 139 } else { 140 return true; 141 } 142 } 143 144 /** 145 * Tests a given byte array to see if it contains 146 * only valid characters within the Base64 alphabet. 147 * 148 * @param arrayOctect byte array to test 149 * @return true if all bytes are valid characters in the Base64 150 * alphabet or if the byte array is empty; false, otherwise 151 */ isArrayByteBase64(byte[] arrayOctect)152 public static boolean isArrayByteBase64(byte[] arrayOctect) { 153 154 arrayOctect = discardWhitespace(arrayOctect); 155 156 int length = arrayOctect.length; 157 if (length == 0) { 158 // shouldn't a 0 length array be valid base64 data? 159 // return false; 160 return true; 161 } 162 for (int i = 0; i < length; i++) { 163 if (!isBase64(arrayOctect[i])) { 164 return false; 165 } 166 } 167 return true; 168 } 169 170 /** 171 * Encodes binary data using the base64 algorithm but 172 * does not chunk the output. 173 * 174 * @param binaryData binary data to encode 175 * @return Base64 characters 176 */ encodeBase64(byte[] binaryData)177 public static byte[] encodeBase64(byte[] binaryData) { 178 return encodeBase64(binaryData, false); 179 } 180 181 /** 182 * Encodes binary data using the base64 algorithm and chunks 183 * the encoded output into 76 character blocks 184 * 185 * @param binaryData binary data to encode 186 * @return Base64 characters chunked in 76 character blocks 187 */ encodeBase64Chunked(byte[] binaryData)188 public static byte[] encodeBase64Chunked(byte[] binaryData) { 189 return encodeBase64(binaryData, true); 190 } 191 192 193 /** 194 * Decodes an Object using the base64 algorithm. This method 195 * is provided in order to satisfy the requirements of the 196 * Decoder interface, and will throw a DecoderException if the 197 * supplied object is not of type byte[]. 198 * 199 * @param pObject Object to decode 200 * @return An object (of type byte[]) containing the 201 * binary data which corresponds to the byte[] supplied. 202 * @throws DecoderException if the parameter supplied is not 203 * of type byte[] 204 */ decode(Object pObject)205 public Object decode(Object pObject) throws DecoderException { 206 if (!(pObject instanceof byte[])) { 207 throw new DecoderException("Parameter supplied to Base64 decode is not a byte[]"); 208 } 209 return decode((byte[]) pObject); 210 } 211 212 /** 213 * Decodes a byte[] containing containing 214 * characters in the Base64 alphabet. 215 * 216 * @param pArray A byte array containing Base64 character data 217 * @return a byte array containing binary data 218 */ decode(byte[] pArray)219 public byte[] decode(byte[] pArray) { 220 return decodeBase64(pArray); 221 } 222 223 /** 224 * Encodes binary data using the base64 algorithm, optionally 225 * chunking the output into 76 character blocks. 226 * 227 * @param binaryData Array containing binary data to encode. 228 * @param isChunked if isChunked is true this encoder will chunk 229 * the base64 output into 76 character blocks 230 * @return Base64-encoded data. 231 */ encodeBase64(byte[] binaryData, boolean isChunked)232 public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) { 233 int lengthDataBits = binaryData.length * EIGHTBIT; 234 int fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP; 235 int numberTriplets = lengthDataBits / TWENTYFOURBITGROUP; 236 byte encodedData[] = null; 237 int encodedDataLength = 0; 238 int nbrChunks = 0; 239 240 if (fewerThan24bits != 0) { 241 //data not divisible by 24 bit 242 encodedDataLength = (numberTriplets + 1) * 4; 243 } else { 244 // 16 or 8 bit 245 encodedDataLength = numberTriplets * 4; 246 } 247 248 // If the output is to be "chunked" into 76 character sections, 249 // for compliance with RFC 2045 MIME, then it is important to 250 // allow for extra length to account for the separator(s) 251 if (isChunked) { 252 253 nbrChunks = 254 (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math.ceil((float) encodedDataLength / CHUNK_SIZE)); 255 encodedDataLength += nbrChunks * CHUNK_SEPARATOR.length; 256 } 257 258 encodedData = new byte[encodedDataLength]; 259 260 byte k = 0, l = 0, b1 = 0, b2 = 0, b3 = 0; 261 262 int encodedIndex = 0; 263 int dataIndex = 0; 264 int i = 0; 265 int nextSeparatorIndex = CHUNK_SIZE; 266 int chunksSoFar = 0; 267 268 //log.debug("number of triplets = " + numberTriplets); 269 for (i = 0; i < numberTriplets; i++) { 270 dataIndex = i * 3; 271 b1 = binaryData[dataIndex]; 272 b2 = binaryData[dataIndex + 1]; 273 b3 = binaryData[dataIndex + 2]; 274 275 //log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3); 276 277 l = (byte) (b2 & 0x0f); 278 k = (byte) (b1 & 0x03); 279 280 byte val1 = 281 ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); 282 byte val2 = 283 ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0); 284 byte val3 = 285 ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc); 286 287 encodedData[encodedIndex] = lookUpBase64Alphabet[val1]; 288 //log.debug( "val2 = " + val2 ); 289 //log.debug( "k4 = " + (k<<4) ); 290 //log.debug( "vak = " + (val2 | (k<<4)) ); 291 encodedData[encodedIndex + 1] = 292 lookUpBase64Alphabet[val2 | (k << 4)]; 293 encodedData[encodedIndex + 2] = 294 lookUpBase64Alphabet[(l << 2) | val3]; 295 encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f]; 296 297 encodedIndex += 4; 298 299 // If we are chunking, let's put a chunk separator down. 300 if (isChunked) { 301 // this assumes that CHUNK_SIZE % 4 == 0 302 if (encodedIndex == nextSeparatorIndex) { 303 System.arraycopy( 304 CHUNK_SEPARATOR, 305 0, 306 encodedData, 307 encodedIndex, 308 CHUNK_SEPARATOR.length); 309 chunksSoFar++; 310 nextSeparatorIndex = 311 (CHUNK_SIZE * (chunksSoFar + 1)) + 312 (chunksSoFar * CHUNK_SEPARATOR.length); 313 encodedIndex += CHUNK_SEPARATOR.length; 314 } 315 } 316 } 317 318 // form integral number of 6-bit groups 319 dataIndex = i * 3; 320 321 if (fewerThan24bits == EIGHTBIT) { 322 b1 = binaryData[dataIndex]; 323 k = (byte) (b1 & 0x03); 324 //log.debug("b1=" + b1); 325 //log.debug("b1<<2 = " + (b1>>2) ); 326 byte val1 = 327 ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); 328 encodedData[encodedIndex] = lookUpBase64Alphabet[val1]; 329 encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4]; 330 encodedData[encodedIndex + 2] = PAD; 331 encodedData[encodedIndex + 3] = PAD; 332 } else if (fewerThan24bits == SIXTEENBIT) { 333 334 b1 = binaryData[dataIndex]; 335 b2 = binaryData[dataIndex + 1]; 336 l = (byte) (b2 & 0x0f); 337 k = (byte) (b1 & 0x03); 338 339 byte val1 = 340 ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); 341 byte val2 = 342 ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0); 343 344 encodedData[encodedIndex] = lookUpBase64Alphabet[val1]; 345 encodedData[encodedIndex + 1] = 346 lookUpBase64Alphabet[val2 | (k << 4)]; 347 encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2]; 348 encodedData[encodedIndex + 3] = PAD; 349 } 350 351 if (isChunked) { 352 // we also add a separator to the end of the final chunk. 353 if (chunksSoFar < nbrChunks) { 354 System.arraycopy( 355 CHUNK_SEPARATOR, 356 0, 357 encodedData, 358 encodedDataLength - CHUNK_SEPARATOR.length, 359 CHUNK_SEPARATOR.length); 360 } 361 } 362 363 return encodedData; 364 } 365 366 /** 367 * Decodes Base64 data into octects 368 * 369 * @param base64Data Byte array containing Base64 data 370 * @return Array containing decoded data. 371 */ decodeBase64(byte[] base64Data)372 public static byte[] decodeBase64(byte[] base64Data) { 373 // RFC 2045 requires that we discard ALL non-Base64 characters 374 base64Data = discardNonBase64(base64Data); 375 376 // handle the edge case, so we don't have to worry about it later 377 if (base64Data.length == 0) { 378 return new byte[0]; 379 } 380 381 int numberQuadruple = base64Data.length / FOURBYTE; 382 byte decodedData[] = null; 383 byte b1 = 0, b2 = 0, b3 = 0, b4 = 0, marker0 = 0, marker1 = 0; 384 385 // Throw away anything not in base64Data 386 387 int encodedIndex = 0; 388 int dataIndex = 0; 389 { 390 // this sizes the output array properly - rlw 391 int lastData = base64Data.length; 392 // ignore the '=' padding 393 while (base64Data[lastData - 1] == PAD) { 394 if (--lastData == 0) { 395 return new byte[0]; 396 } 397 } 398 decodedData = new byte[lastData - numberQuadruple]; 399 } 400 401 for (int i = 0; i < numberQuadruple; i++) { 402 dataIndex = i * 4; 403 marker0 = base64Data[dataIndex + 2]; 404 marker1 = base64Data[dataIndex + 3]; 405 406 b1 = base64Alphabet[base64Data[dataIndex]]; 407 b2 = base64Alphabet[base64Data[dataIndex + 1]]; 408 409 if (marker0 != PAD && marker1 != PAD) { 410 //No PAD e.g 3cQl 411 b3 = base64Alphabet[marker0]; 412 b4 = base64Alphabet[marker1]; 413 414 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); 415 decodedData[encodedIndex + 1] = 416 (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf)); 417 decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4); 418 } else if (marker0 == PAD) { 419 //Two PAD e.g. 3c[Pad][Pad] 420 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); 421 } else if (marker1 == PAD) { 422 //One PAD e.g. 3cQ[Pad] 423 b3 = base64Alphabet[marker0]; 424 425 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); 426 decodedData[encodedIndex + 1] = 427 (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf)); 428 } 429 encodedIndex += 3; 430 } 431 return decodedData; 432 } 433 434 /** 435 * Discards any whitespace from a base-64 encoded block. 436 * 437 * @param data The base-64 encoded data to discard the whitespace 438 * from. 439 * @return The data, less whitespace (see RFC 2045). 440 */ discardWhitespace(byte[] data)441 static byte[] discardWhitespace(byte[] data) { 442 byte groomedData[] = new byte[data.length]; 443 int bytesCopied = 0; 444 445 for (int i = 0; i < data.length; i++) { 446 switch (data[i]) { 447 case (byte) ' ' : 448 case (byte) '\n' : 449 case (byte) '\r' : 450 case (byte) '\t' : 451 break; 452 default: 453 groomedData[bytesCopied++] = data[i]; 454 } 455 } 456 457 byte packedData[] = new byte[bytesCopied]; 458 459 System.arraycopy(groomedData, 0, packedData, 0, bytesCopied); 460 461 return packedData; 462 } 463 464 /** 465 * Discards any characters outside of the base64 alphabet, per 466 * the requirements on page 25 of RFC 2045 - "Any characters 467 * outside of the base64 alphabet are to be ignored in base64 468 * encoded data." 469 * 470 * @param data The base-64 encoded data to groom 471 * @return The data, less non-base64 characters (see RFC 2045). 472 */ discardNonBase64(byte[] data)473 static byte[] discardNonBase64(byte[] data) { 474 byte groomedData[] = new byte[data.length]; 475 int bytesCopied = 0; 476 477 for (int i = 0; i < data.length; i++) { 478 if (isBase64(data[i])) { 479 groomedData[bytesCopied++] = data[i]; 480 } 481 } 482 483 byte packedData[] = new byte[bytesCopied]; 484 485 System.arraycopy(groomedData, 0, packedData, 0, bytesCopied); 486 487 return packedData; 488 } 489 490 491 // Implementation of the Encoder Interface 492 493 /** 494 * Encodes an Object using the base64 algorithm. This method 495 * is provided in order to satisfy the requirements of the 496 * Encoder interface, and will throw an EncoderException if the 497 * supplied object is not of type byte[]. 498 * 499 * @param pObject Object to encode 500 * @return An object (of type byte[]) containing the 501 * base64 encoded data which corresponds to the byte[] supplied. 502 * @throws EncoderException if the parameter supplied is not 503 * of type byte[] 504 */ encode(Object pObject)505 public Object encode(Object pObject) throws EncoderException { 506 if (!(pObject instanceof byte[])) { 507 throw new EncoderException( 508 "Parameter supplied to Base64 encode is not a byte[]"); 509 } 510 return encode((byte[]) pObject); 511 } 512 513 /** 514 * Encodes a byte[] containing binary data, into a byte[] containing 515 * characters in the Base64 alphabet. 516 * 517 * @param pArray a byte array containing binary data 518 * @return A byte array containing only Base64 character data 519 */ encode(byte[] pArray)520 public byte[] encode(byte[] pArray) { 521 return encodeBase64(pArray, false); 522 } 523 524 } 525