1 /* Jackson JSON-processor. 2 * 3 * Copyright (c) 2007- Tatu Saloranta, tatu.saloranta@iki.fi 4 */ 5 package com.fasterxml.jackson.core; 6 7 import java.util.Arrays; 8 9 import com.fasterxml.jackson.core.util.ByteArrayBuilder; 10 11 /** 12 * Class used to define specific details of which 13 * variant of Base64 encoding/decoding is to be used. Although there is 14 * somewhat standard basic version (so-called "MIME Base64"), other variants 15 * exists, see <a href="http://en.wikipedia.org/wiki/Base64">Base64 Wikipedia entry</a> for details. 16 * 17 * @author Tatu Saloranta 18 */ 19 public final class Base64Variant 20 implements java.io.Serializable 21 { 22 private final static int INT_SPACE = 0x20; 23 24 // We'll only serialize name 25 private static final long serialVersionUID = 1L; 26 27 /** 28 * Placeholder used by "no padding" variant, to be used when a character 29 * value is needed. 30 */ 31 final static char PADDING_CHAR_NONE = '\0'; 32 33 /** 34 * Marker used to denote ascii characters that do not correspond 35 * to a 6-bit value (in this variant), and is not used as a padding 36 * character. 37 */ 38 public final static int BASE64_VALUE_INVALID = -1; 39 40 /** 41 * Marker used to denote ascii character (in decoding table) that 42 * is the padding character using this variant (if any). 43 */ 44 public final static int BASE64_VALUE_PADDING = -2; 45 46 /* 47 /********************************************************** 48 /* Encoding/decoding tables 49 /********************************************************** 50 */ 51 52 /** 53 * Decoding table used for base 64 decoding. 54 */ 55 private final transient int[] _asciiToBase64 = new int[128]; 56 57 /** 58 * Encoding table used for base 64 decoding when output is done 59 * as characters. 60 */ 61 private final transient char[] _base64ToAsciiC = new char[64]; 62 63 /** 64 * Alternative encoding table used for base 64 decoding when output is done 65 * as ascii bytes. 66 */ 67 private final transient byte[] _base64ToAsciiB = new byte[64]; 68 69 /* 70 /********************************************************** 71 /* Other configuration 72 /********************************************************** 73 */ 74 75 /** 76 * Symbolic name of variant; used for diagnostics/debugging. 77 *<p> 78 * Note that this is the only non-transient field; used when reading 79 * back from serialized state. 80 *<p> 81 * Also: must not be private, accessed from `BaseVariants` 82 */ 83 final String _name; 84 85 /** 86 * Whether this variant uses padding or not. 87 */ 88 private final transient boolean _usesPadding; 89 90 /** 91 * Character used for padding, if any ({@link #PADDING_CHAR_NONE} if not). 92 */ 93 private final transient char _paddingChar; 94 95 /** 96 * Maximum number of encoded base64 characters to output during encoding 97 * before adding a linefeed, if line length is to be limited 98 * ({@link java.lang.Integer#MAX_VALUE} if not limited). 99 *<p> 100 * Note: for some output modes (when writing attributes) linefeeds may 101 * need to be avoided, and this value ignored. 102 */ 103 private final transient int _maxLineLength; 104 105 /* 106 /********************************************************** 107 /* Life-cycle 108 /********************************************************** 109 */ 110 Base64Variant(String name, String base64Alphabet, boolean usesPadding, char paddingChar, int maxLineLength)111 public Base64Variant(String name, String base64Alphabet, boolean usesPadding, char paddingChar, int maxLineLength) 112 { 113 _name = name; 114 _usesPadding = usesPadding; 115 _paddingChar = paddingChar; 116 _maxLineLength = maxLineLength; 117 118 // Ok and then we need to create codec tables. 119 120 // First the main encoding table: 121 int alphaLen = base64Alphabet.length(); 122 if (alphaLen != 64) { 123 throw new IllegalArgumentException("Base64Alphabet length must be exactly 64 (was "+alphaLen+")"); 124 } 125 126 // And then secondary encoding table and decoding table: 127 base64Alphabet.getChars(0, alphaLen, _base64ToAsciiC, 0); 128 Arrays.fill(_asciiToBase64, BASE64_VALUE_INVALID); 129 for (int i = 0; i < alphaLen; ++i) { 130 char alpha = _base64ToAsciiC[i]; 131 _base64ToAsciiB[i] = (byte) alpha; 132 _asciiToBase64[alpha] = i; 133 } 134 135 // Plus if we use padding, add that in too 136 if (usesPadding) { 137 _asciiToBase64[(int) paddingChar] = BASE64_VALUE_PADDING; 138 } 139 } 140 141 /** 142 * "Copy constructor" that can be used when the base alphabet is identical 143 * to one used by another variant except for the maximum line length 144 * (and obviously, name). 145 */ Base64Variant(Base64Variant base, String name, int maxLineLength)146 public Base64Variant(Base64Variant base, String name, int maxLineLength) 147 { 148 this(base, name, base._usesPadding, base._paddingChar, maxLineLength); 149 } 150 151 /** 152 * "Copy constructor" that can be used when the base alphabet is identical 153 * to one used by another variant, but other details (padding, maximum 154 * line length) differ 155 */ Base64Variant(Base64Variant base, String name, boolean usesPadding, char paddingChar, int maxLineLength)156 public Base64Variant(Base64Variant base, String name, boolean usesPadding, char paddingChar, int maxLineLength) 157 { 158 _name = name; 159 byte[] srcB = base._base64ToAsciiB; 160 System.arraycopy(srcB, 0, this._base64ToAsciiB, 0, srcB.length); 161 char[] srcC = base._base64ToAsciiC; 162 System.arraycopy(srcC, 0, this._base64ToAsciiC, 0, srcC.length); 163 int[] srcV = base._asciiToBase64; 164 System.arraycopy(srcV, 0, this._asciiToBase64, 0, srcV.length); 165 166 _usesPadding = usesPadding; 167 _paddingChar = paddingChar; 168 _maxLineLength = maxLineLength; 169 } 170 171 /* 172 /********************************************************** 173 /* Serializable overrides 174 /********************************************************** 175 */ 176 177 /** 178 * Method used to "demote" deserialized instances back to 179 * canonical ones 180 */ readResolve()181 protected Object readResolve() { 182 return Base64Variants.valueOf(_name); 183 } 184 185 /* 186 /********************************************************** 187 /* Public accessors 188 /********************************************************** 189 */ 190 getName()191 public String getName() { return _name; } 192 usesPadding()193 public boolean usesPadding() { return _usesPadding; } usesPaddingChar(char c)194 public boolean usesPaddingChar(char c) { return c == _paddingChar; } usesPaddingChar(int ch)195 public boolean usesPaddingChar(int ch) { return ch == (int) _paddingChar; } getPaddingChar()196 public char getPaddingChar() { return _paddingChar; } getPaddingByte()197 public byte getPaddingByte() { return (byte)_paddingChar; } 198 getMaxLineLength()199 public int getMaxLineLength() { return _maxLineLength; } 200 201 /* 202 /********************************************************** 203 /* Decoding support 204 /********************************************************** 205 */ 206 207 /** 208 * @return 6-bit decoded value, if valid character; 209 */ decodeBase64Char(char c)210 public int decodeBase64Char(char c) 211 { 212 int ch = (int) c; 213 return (ch <= 127) ? _asciiToBase64[ch] : BASE64_VALUE_INVALID; 214 } 215 decodeBase64Char(int ch)216 public int decodeBase64Char(int ch) 217 { 218 return (ch <= 127) ? _asciiToBase64[ch] : BASE64_VALUE_INVALID; 219 } 220 decodeBase64Byte(byte b)221 public int decodeBase64Byte(byte b) 222 { 223 int ch = (int) b; 224 // note: cast retains sign, so it's from -128 to +127 225 if (ch < 0) { 226 return BASE64_VALUE_INVALID; 227 } 228 return _asciiToBase64[ch]; 229 } 230 231 /* 232 /********************************************************** 233 /* Encoding support 234 /********************************************************** 235 */ 236 encodeBase64BitsAsChar(int value)237 public char encodeBase64BitsAsChar(int value) 238 { 239 /* Let's assume caller has done necessary checks; this 240 * method must be fast and inlinable 241 */ 242 return _base64ToAsciiC[value]; 243 } 244 245 /** 246 * Method that encodes given right-aligned (LSB) 24-bit value 247 * into 4 base64 characters, stored in given result buffer. 248 */ encodeBase64Chunk(int b24, char[] buffer, int ptr)249 public int encodeBase64Chunk(int b24, char[] buffer, int ptr) 250 { 251 buffer[ptr++] = _base64ToAsciiC[(b24 >> 18) & 0x3F]; 252 buffer[ptr++] = _base64ToAsciiC[(b24 >> 12) & 0x3F]; 253 buffer[ptr++] = _base64ToAsciiC[(b24 >> 6) & 0x3F]; 254 buffer[ptr++] = _base64ToAsciiC[b24 & 0x3F]; 255 return ptr; 256 } 257 encodeBase64Chunk(StringBuilder sb, int b24)258 public void encodeBase64Chunk(StringBuilder sb, int b24) 259 { 260 sb.append(_base64ToAsciiC[(b24 >> 18) & 0x3F]); 261 sb.append(_base64ToAsciiC[(b24 >> 12) & 0x3F]); 262 sb.append(_base64ToAsciiC[(b24 >> 6) & 0x3F]); 263 sb.append(_base64ToAsciiC[b24 & 0x3F]); 264 } 265 266 /** 267 * Method that outputs partial chunk (which only encodes one 268 * or two bytes of data). Data given is still aligned same as if 269 * it as full data; that is, missing data is at the "right end" 270 * (LSB) of int. 271 * 272 * @param outputBytes Number of encoded bytes included (either 1 or 2) 273 */ encodeBase64Partial(int bits, int outputBytes, char[] buffer, int outPtr)274 public int encodeBase64Partial(int bits, int outputBytes, char[] buffer, int outPtr) 275 { 276 buffer[outPtr++] = _base64ToAsciiC[(bits >> 18) & 0x3F]; 277 buffer[outPtr++] = _base64ToAsciiC[(bits >> 12) & 0x3F]; 278 if (_usesPadding) { 279 buffer[outPtr++] = (outputBytes == 2) ? 280 _base64ToAsciiC[(bits >> 6) & 0x3F] : _paddingChar; 281 buffer[outPtr++] = _paddingChar; 282 } else { 283 if (outputBytes == 2) { 284 buffer[outPtr++] = _base64ToAsciiC[(bits >> 6) & 0x3F]; 285 } 286 } 287 return outPtr; 288 } 289 encodeBase64Partial(StringBuilder sb, int bits, int outputBytes)290 public void encodeBase64Partial(StringBuilder sb, int bits, int outputBytes) 291 { 292 sb.append(_base64ToAsciiC[(bits >> 18) & 0x3F]); 293 sb.append(_base64ToAsciiC[(bits >> 12) & 0x3F]); 294 if (_usesPadding) { 295 sb.append((outputBytes == 2) ? 296 _base64ToAsciiC[(bits >> 6) & 0x3F] : _paddingChar); 297 sb.append(_paddingChar); 298 } else { 299 if (outputBytes == 2) { 300 sb.append(_base64ToAsciiC[(bits >> 6) & 0x3F]); 301 } 302 } 303 } 304 encodeBase64BitsAsByte(int value)305 public byte encodeBase64BitsAsByte(int value) 306 { 307 // As with above, assuming it is 6-bit value 308 return _base64ToAsciiB[value]; 309 } 310 311 /** 312 * Method that encodes given right-aligned (LSB) 24-bit value 313 * into 4 base64 bytes (ascii), stored in given result buffer. 314 */ encodeBase64Chunk(int b24, byte[] buffer, int ptr)315 public int encodeBase64Chunk(int b24, byte[] buffer, int ptr) 316 { 317 buffer[ptr++] = _base64ToAsciiB[(b24 >> 18) & 0x3F]; 318 buffer[ptr++] = _base64ToAsciiB[(b24 >> 12) & 0x3F]; 319 buffer[ptr++] = _base64ToAsciiB[(b24 >> 6) & 0x3F]; 320 buffer[ptr++] = _base64ToAsciiB[b24 & 0x3F]; 321 return ptr; 322 } 323 324 /** 325 * Method that outputs partial chunk (which only encodes one 326 * or two bytes of data). Data given is still aligned same as if 327 * it as full data; that is, missing data is at the "right end" 328 * (LSB) of int. 329 * 330 * @param outputBytes Number of encoded bytes included (either 1 or 2) 331 */ encodeBase64Partial(int bits, int outputBytes, byte[] buffer, int outPtr)332 public int encodeBase64Partial(int bits, int outputBytes, byte[] buffer, int outPtr) 333 { 334 buffer[outPtr++] = _base64ToAsciiB[(bits >> 18) & 0x3F]; 335 buffer[outPtr++] = _base64ToAsciiB[(bits >> 12) & 0x3F]; 336 if (_usesPadding) { 337 byte pb = (byte) _paddingChar; 338 buffer[outPtr++] = (outputBytes == 2) ? 339 _base64ToAsciiB[(bits >> 6) & 0x3F] : pb; 340 buffer[outPtr++] = pb; 341 } else { 342 if (outputBytes == 2) { 343 buffer[outPtr++] = _base64ToAsciiB[(bits >> 6) & 0x3F]; 344 } 345 } 346 return outPtr; 347 } 348 349 /* 350 /********************************************************** 351 /* Convenience conversion methods for String to/from bytes 352 /* use case. 353 /********************************************************** 354 */ 355 356 /** 357 * Convenience method for converting given byte array as base64 encoded 358 * String using this variant's settings. 359 * Resulting value is "raw", that is, not enclosed in double-quotes. 360 * 361 * @param input Byte array to encode 362 */ encode(byte[] input)363 public String encode(byte[] input) 364 { 365 return encode(input, false); 366 } 367 368 /** 369 * Convenience method for converting given byte array as base64 encoded String 370 * using this variant's settings, optionally enclosed in double-quotes. 371 * Linefeeds added, if needed, are expressed as 2-character JSON (and Java source) 372 * escape sequence of backslash + `n`. 373 * 374 * @param input Byte array to encode 375 * @param addQuotes Whether to surround resulting value in double quotes or not 376 */ encode(byte[] input, boolean addQuotes)377 public String encode(byte[] input, boolean addQuotes) 378 { 379 final int inputEnd = input.length; 380 final StringBuilder sb = new StringBuilder(inputEnd + (inputEnd >> 2) + (inputEnd >> 3)); 381 if (addQuotes) { 382 sb.append('"'); 383 } 384 385 int chunksBeforeLF = getMaxLineLength() >> 2; 386 387 // Ok, first we loop through all full triplets of data: 388 int inputPtr = 0; 389 int safeInputEnd = inputEnd-3; // to get only full triplets 390 391 while (inputPtr <= safeInputEnd) { 392 // First, mash 3 bytes into lsb of 32-bit int 393 int b24 = ((int) input[inputPtr++]) << 8; 394 b24 |= ((int) input[inputPtr++]) & 0xFF; 395 b24 = (b24 << 8) | (((int) input[inputPtr++]) & 0xFF); 396 encodeBase64Chunk(sb, b24); 397 if (--chunksBeforeLF <= 0) { 398 // note: must quote in JSON value, so not really useful... 399 sb.append('\\'); 400 sb.append('n'); 401 chunksBeforeLF = getMaxLineLength() >> 2; 402 } 403 } 404 405 // And then we may have 1 or 2 leftover bytes to encode 406 int inputLeft = inputEnd - inputPtr; // 0, 1 or 2 407 if (inputLeft > 0) { // yes, but do we have room for output? 408 int b24 = ((int) input[inputPtr++]) << 16; 409 if (inputLeft == 2) { 410 b24 |= (((int) input[inputPtr++]) & 0xFF) << 8; 411 } 412 encodeBase64Partial(sb, b24, inputLeft); 413 } 414 415 if (addQuotes) { 416 sb.append('"'); 417 } 418 return sb.toString(); 419 } 420 421 /** 422 * Convenience method for converting given byte array as base64 encoded String 423 * using this variant's settings, optionally enclosed in double-quotes. 424 * Linefeed character to use is passed explicitly. 425 * 426 * @param input Byte array to encode 427 * @param addQuotes Whether to surround resulting value in double quotes or not 428 * 429 * @since 2.10 430 */ encode(byte[] input, boolean addQuotes, String linefeed)431 public String encode(byte[] input, boolean addQuotes, String linefeed) 432 { 433 final int inputEnd = input.length; 434 final StringBuilder sb = new StringBuilder(inputEnd + (inputEnd >> 2) + (inputEnd >> 3)); 435 if (addQuotes) { 436 sb.append('"'); 437 } 438 439 int chunksBeforeLF = getMaxLineLength() >> 2; 440 441 int inputPtr = 0; 442 int safeInputEnd = inputEnd-3; 443 444 while (inputPtr <= safeInputEnd) { 445 int b24 = ((int) input[inputPtr++]) << 8; 446 b24 |= ((int) input[inputPtr++]) & 0xFF; 447 b24 = (b24 << 8) | (((int) input[inputPtr++]) & 0xFF); 448 encodeBase64Chunk(sb, b24); 449 if (--chunksBeforeLF <= 0) { 450 sb.append(linefeed); 451 chunksBeforeLF = getMaxLineLength() >> 2; 452 } 453 } 454 int inputLeft = inputEnd - inputPtr; 455 if (inputLeft > 0) { 456 int b24 = ((int) input[inputPtr++]) << 16; 457 if (inputLeft == 2) { 458 b24 |= (((int) input[inputPtr++]) & 0xFF) << 8; 459 } 460 encodeBase64Partial(sb, b24, inputLeft); 461 } 462 463 if (addQuotes) { 464 sb.append('"'); 465 } 466 return sb.toString(); 467 } 468 469 /** 470 * Convenience method for decoding contents of a Base64-encoded String, 471 * using this variant's settings. 472 * 473 * @param input 474 * 475 * @since 2.3 476 * 477 * @throws IllegalArgumentException if input is not valid base64 encoded data 478 */ 479 @SuppressWarnings("resource") decode(String input)480 public byte[] decode(String input) throws IllegalArgumentException 481 { 482 ByteArrayBuilder b = new ByteArrayBuilder(); 483 decode(input, b); 484 return b.toByteArray(); 485 } 486 487 /** 488 * Convenience method for decoding contents of a Base64-encoded String, 489 * using this variant's settings 490 * and appending decoded binary data using provided {@link ByteArrayBuilder}. 491 *<p> 492 * NOTE: builder will NOT be reset before decoding (nor cleared afterwards); 493 * assumption is that caller will ensure it is given in proper state, and 494 * used as appropriate afterwards. 495 * 496 * @since 2.3 497 * 498 * @throws IllegalArgumentException if input is not valid base64 encoded data 499 */ decode(String str, ByteArrayBuilder builder)500 public void decode(String str, ByteArrayBuilder builder) throws IllegalArgumentException 501 { 502 int ptr = 0; 503 int len = str.length(); 504 505 main_loop: 506 while (true) { 507 // first, we'll skip preceding white space, if any 508 char ch; 509 do { 510 if (ptr >= len) { 511 break main_loop; 512 } 513 ch = str.charAt(ptr++); 514 } while (ch <= INT_SPACE); 515 int bits = decodeBase64Char(ch); 516 if (bits < 0) { 517 _reportInvalidBase64(ch, 0, null); 518 } 519 int decodedData = bits; 520 // then second base64 char; can't get padding yet, nor ws 521 if (ptr >= len) { 522 _reportBase64EOF(); 523 } 524 ch = str.charAt(ptr++); 525 bits = decodeBase64Char(ch); 526 if (bits < 0) { 527 _reportInvalidBase64(ch, 1, null); 528 } 529 decodedData = (decodedData << 6) | bits; 530 // third base64 char; can be padding, but not ws 531 if (ptr >= len) { 532 // but as per [JACKSON-631] can be end-of-input, iff not using padding 533 if (!usesPadding()) { 534 decodedData >>= 4; 535 builder.append(decodedData); 536 break; 537 } 538 _reportBase64EOF(); 539 } 540 ch = str.charAt(ptr++); 541 bits = decodeBase64Char(ch); 542 543 // First branch: can get padding (-> 1 byte) 544 if (bits < 0) { 545 if (bits != Base64Variant.BASE64_VALUE_PADDING) { 546 _reportInvalidBase64(ch, 2, null); 547 } 548 // Ok, must get padding 549 if (ptr >= len) { 550 _reportBase64EOF(); 551 } 552 ch = str.charAt(ptr++); 553 if (!usesPaddingChar(ch)) { 554 _reportInvalidBase64(ch, 3, "expected padding character '"+getPaddingChar()+"'"); 555 } 556 // Got 12 bits, only need 8, need to shift 557 decodedData >>= 4; 558 builder.append(decodedData); 559 continue; 560 } 561 // Nope, 2 or 3 bytes 562 decodedData = (decodedData << 6) | bits; 563 // fourth and last base64 char; can be padding, but not ws 564 if (ptr >= len) { 565 // but as per [JACKSON-631] can be end-of-input, iff not using padding 566 if (!usesPadding()) { 567 decodedData >>= 2; 568 builder.appendTwoBytes(decodedData); 569 break; 570 } 571 _reportBase64EOF(); 572 } 573 ch = str.charAt(ptr++); 574 bits = decodeBase64Char(ch); 575 if (bits < 0) { 576 if (bits != Base64Variant.BASE64_VALUE_PADDING) { 577 _reportInvalidBase64(ch, 3, null); 578 } 579 decodedData >>= 2; 580 builder.appendTwoBytes(decodedData); 581 } else { 582 // otherwise, our triple is now complete 583 decodedData = (decodedData << 6) | bits; 584 builder.appendThreeBytes(decodedData); 585 } 586 } 587 } 588 589 /* 590 /********************************************************** 591 /* Overridden standard methods 592 /********************************************************** 593 */ 594 595 @Override toString()596 public String toString() { return _name; } 597 598 @Override equals(Object o)599 public boolean equals(Object o) { 600 // identity comparison should be dine 601 return (o == this); 602 } 603 604 @Override hashCode()605 public int hashCode() { 606 return _name.hashCode(); 607 } 608 609 /* 610 /********************************************************** 611 /* Internal helper methods 612 /********************************************************** 613 */ 614 615 /** 616 * @param bindex Relative index within base64 character unit; between 0 617 * and 3 (as unit has exactly 4 characters) 618 */ _reportInvalidBase64(char ch, int bindex, String msg)619 protected void _reportInvalidBase64(char ch, int bindex, String msg) 620 throws IllegalArgumentException 621 { 622 String base; 623 if (ch <= INT_SPACE) { 624 base = "Illegal white space character (code 0x"+Integer.toHexString(ch)+") as character #"+(bindex+1)+" of 4-char base64 unit: can only used between units"; 625 } else if (usesPaddingChar(ch)) { 626 base = "Unexpected padding character ('"+getPaddingChar()+"') as character #"+(bindex+1)+" of 4-char base64 unit: padding only legal as 3rd or 4th character"; 627 } else if (!Character.isDefined(ch) || Character.isISOControl(ch)) { 628 // Not sure if we can really get here... ? (most illegal xml chars are caught at lower level) 629 base = "Illegal character (code 0x"+Integer.toHexString(ch)+") in base64 content"; 630 } else { 631 base = "Illegal character '"+ch+"' (code 0x"+Integer.toHexString(ch)+") in base64 content"; 632 } 633 if (msg != null) { 634 base = base + ": " + msg; 635 } 636 throw new IllegalArgumentException(base); 637 } 638 _reportBase64EOF()639 protected void _reportBase64EOF() throws IllegalArgumentException { 640 throw new IllegalArgumentException(missingPaddingMessage()); 641 } 642 643 /** 644 * Helper method that will construct a message to use in exceptions for cases where input ends 645 * prematurely in place where padding would be expected. 646 * 647 * @since 2.10 648 */ missingPaddingMessage()649 public String missingPaddingMessage() { 650 return String.format("Unexpected end of base64-encoded String: base64 variant '%s' expects padding (one or more '%c' characters) at the end", 651 getName(), getPaddingChar()); 652 } 653 654 } 655 656