1 /* 2 * Copyright (C) 2012 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 * in compliance with the License. You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software distributed under the License 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 * or implied. See the License for the specific language governing permissions and limitations under 12 * the License. 13 */ 14 15 package com.google.common.io; 16 17 import static com.google.common.base.Preconditions.checkArgument; 18 import static com.google.common.base.Preconditions.checkNotNull; 19 import static com.google.common.base.Preconditions.checkPositionIndexes; 20 import static com.google.common.base.Preconditions.checkState; 21 import static com.google.common.io.GwtWorkarounds.asCharInput; 22 import static com.google.common.io.GwtWorkarounds.stringBuilderOutput; 23 import static com.google.common.math.IntMath.divide; 24 import static com.google.common.math.IntMath.log2; 25 import static java.math.RoundingMode.CEILING; 26 import static java.math.RoundingMode.FLOOR; 27 import static java.math.RoundingMode.UNNECESSARY; 28 29 import com.google.common.annotations.Beta; 30 import com.google.common.annotations.GwtCompatible; 31 import com.google.common.base.Ascii; 32 import com.google.common.base.CharMatcher; 33 import com.google.common.io.GwtWorkarounds.ByteInput; 34 import com.google.common.io.GwtWorkarounds.ByteOutput; 35 import com.google.common.io.GwtWorkarounds.CharInput; 36 import com.google.common.io.GwtWorkarounds.CharOutput; 37 38 import java.io.IOException; 39 import java.util.Arrays; 40 41 import javax.annotation.CheckReturnValue; 42 import javax.annotation.Nullable; 43 44 /** 45 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 46 * strings. This class includes several constants for encoding schemes specified by <a 47 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 48 * 49 * <pre> {@code 50 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}</pre> 51 * 52 * <p>returns the string {@code "MZXW6==="}, and <pre> {@code 53 * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}</pre> 54 * 55 * <p>...returns the ASCII bytes of the string {@code "foo"}. 56 * 57 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with 58 * RFC 4648. Decoding rejects characters in the wrong case, though padding is optional. 59 * To modify encoding and decoding behavior, use configuration methods to obtain a new encoding 60 * with modified behavior: 61 * 62 * <pre> {@code 63 * BaseEncoding.base16().lowerCase().decode("deadbeef");}</pre> 64 * 65 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 66 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 67 * 68 * <pre> {@code 69 * // Do NOT do this 70 * BaseEncoding hex = BaseEncoding.base16(); 71 * hex.lowerCase(); // does nothing! 72 * return hex.decode("deadbeef"); // throws an IllegalArgumentException}</pre> 73 * 74 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to 75 * {@code x}, but the reverse does not necessarily hold. 76 * 77 * <p> 78 * <table> 79 * <tr> 80 * <th>Encoding 81 * <th>Alphabet 82 * <th>{@code char:byte} ratio 83 * <th>Default padding 84 * <th>Comments 85 * <tr> 86 * <td>{@link #base16()} 87 * <td>0-9 A-F 88 * <td>2.00 89 * <td>N/A 90 * <td>Traditional hexadecimal. Defaults to upper case. 91 * <tr> 92 * <td>{@link #base32()} 93 * <td>A-Z 2-7 94 * <td>1.60 95 * <td>= 96 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 97 * <tr> 98 * <td>{@link #base32Hex()} 99 * <td>0-9 A-V 100 * <td>1.60 101 * <td>= 102 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 103 * <tr> 104 * <td>{@link #base64()} 105 * <td>A-Z a-z 0-9 + / 106 * <td>1.33 107 * <td>= 108 * <td> 109 * <tr> 110 * <td>{@link #base64Url()} 111 * <td>A-Z a-z 0-9 - _ 112 * <td>1.33 113 * <td>= 114 * <td>Safe to use as filenames, or to pass in URLs without escaping 115 * </table> 116 * 117 * <p> 118 * All instances of this class are immutable, so they may be stored safely as static constants. 119 * 120 * @author Louis Wasserman 121 * @since 14.0 122 */ 123 @Beta 124 @GwtCompatible(emulated = true) 125 public abstract class BaseEncoding { 126 // TODO(user): consider adding encodeTo(Appendable, byte[], [int, int]) 127 BaseEncoding()128 BaseEncoding() {} 129 130 /** 131 * Exception indicating invalid base-encoded input encountered while decoding. 132 * 133 * @author Louis Wasserman 134 * @since 15.0 135 */ 136 public static final class DecodingException extends IOException { DecodingException(String message)137 DecodingException(String message) { 138 super(message); 139 } 140 DecodingException(Throwable cause)141 DecodingException(Throwable cause) { 142 super(cause); 143 } 144 } 145 146 /** 147 * Encodes the specified byte array, and returns the encoded {@code String}. 148 */ encode(byte[] bytes)149 public String encode(byte[] bytes) { 150 return encode(checkNotNull(bytes), 0, bytes.length); 151 } 152 153 /** 154 * Encodes the specified range of the specified byte array, and returns the encoded 155 * {@code String}. 156 */ encode(byte[] bytes, int off, int len)157 public final String encode(byte[] bytes, int off, int len) { 158 checkNotNull(bytes); 159 checkPositionIndexes(off, off + len, bytes.length); 160 CharOutput result = stringBuilderOutput(maxEncodedSize(len)); 161 ByteOutput byteOutput = encodingStream(result); 162 try { 163 for (int i = 0; i < len; i++) { 164 byteOutput.write(bytes[off + i]); 165 } 166 byteOutput.close(); 167 } catch (IOException impossible) { 168 throw new AssertionError("impossible"); 169 } 170 return result.toString(); 171 } 172 173 // TODO(user): document the extent of leniency, probably after adding ignore(CharMatcher) 174 extract(byte[] result, int length)175 private static byte[] extract(byte[] result, int length) { 176 if (length == result.length) { 177 return result; 178 } else { 179 byte[] trunc = new byte[length]; 180 System.arraycopy(result, 0, trunc, 0, length); 181 return trunc; 182 } 183 } 184 185 /** 186 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. 187 * This is the inverse operation to {@link #encode(byte[])}. 188 * 189 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 190 * encoding. 191 */ decode(CharSequence chars)192 public final byte[] decode(CharSequence chars) { 193 try { 194 return decodeChecked(chars); 195 } catch (DecodingException badInput) { 196 throw new IllegalArgumentException(badInput); 197 } 198 } 199 200 /** 201 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. 202 * This is the inverse operation to {@link #encode(byte[])}. 203 * 204 * @throws DecodingException if the input is not a valid encoded string according to this 205 * encoding. 206 */ decodeChecked(CharSequence chars)207 final byte[] decodeChecked(CharSequence chars) throws DecodingException { 208 chars = padding().trimTrailingFrom(chars); 209 ByteInput decodedInput = decodingStream(asCharInput(chars)); 210 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 211 int index = 0; 212 try { 213 for (int i = decodedInput.read(); i != -1; i = decodedInput.read()) { 214 tmp[index++] = (byte) i; 215 } 216 } catch (DecodingException badInput) { 217 throw badInput; 218 } catch (IOException impossible) { 219 throw new AssertionError(impossible); 220 } 221 return extract(tmp, index); 222 } 223 224 // Implementations for encoding/decoding 225 maxEncodedSize(int bytes)226 abstract int maxEncodedSize(int bytes); 227 encodingStream(CharOutput charOutput)228 abstract ByteOutput encodingStream(CharOutput charOutput); 229 maxDecodedSize(int chars)230 abstract int maxDecodedSize(int chars); 231 decodingStream(CharInput charInput)232 abstract ByteInput decodingStream(CharInput charInput); 233 padding()234 abstract CharMatcher padding(); 235 236 // Modified encoding generators 237 238 /** 239 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 240 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 241 * section 3.2</a>, Padding of Encoded Data. 242 */ 243 @CheckReturnValue omitPadding()244 public abstract BaseEncoding omitPadding(); 245 246 /** 247 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 248 * for padding. 249 * 250 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 251 * separator 252 */ 253 @CheckReturnValue withPadChar(char padChar)254 public abstract BaseEncoding withPadChar(char padChar); 255 256 /** 257 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 258 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 259 * are skipped over in decoding. 260 * 261 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 262 * string, or if {@code n <= 0} 263 * @throws UnsupportedOperationException if this encoding already uses a separator 264 */ 265 @CheckReturnValue withSeparator(String separator, int n)266 public abstract BaseEncoding withSeparator(String separator, int n); 267 268 /** 269 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 270 * uppercase letters. Padding and separator characters remain in their original case. 271 * 272 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 273 * lower-case characters 274 */ 275 @CheckReturnValue upperCase()276 public abstract BaseEncoding upperCase(); 277 278 /** 279 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 280 * lowercase letters. Padding and separator characters remain in their original case. 281 * 282 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 283 * lower-case characters 284 */ 285 @CheckReturnValue lowerCase()286 public abstract BaseEncoding lowerCase(); 287 288 private static final BaseEncoding BASE64 = new StandardBaseEncoding( 289 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 290 291 /** 292 * The "base64" base encoding specified by <a 293 * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding. 294 * (This is the same as the base 64 encoding from <a 295 * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 296 * 297 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 298 * omitted} or {@linkplain #withPadChar(char) replaced}. 299 * 300 * <p>No line feeds are added by default, as per <a 301 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 302 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 303 */ base64()304 public static BaseEncoding base64() { 305 return BASE64; 306 } 307 308 private static final BaseEncoding BASE64_URL = new StandardBaseEncoding( 309 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 310 311 /** 312 * The "base64url" encoding specified by <a 313 * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 314 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." 315 * (This is the same as the base 64 encoding with URL and filename safe alphabet from <a 316 * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 317 * 318 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 319 * omitted} or {@linkplain #withPadChar(char) replaced}. 320 * 321 * <p>No line feeds are added by default, as per <a 322 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 323 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 324 */ base64Url()325 public static BaseEncoding base64Url() { 326 return BASE64_URL; 327 } 328 329 private static final BaseEncoding BASE32 = 330 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 331 332 /** 333 * The "base32" encoding specified by <a 334 * href="http://tools.ietf.org/html/rfc4648#section-6">RFC 4648 section 6</a>, Base 32 Encoding. 335 * (This is the same as the base 32 encoding from <a 336 * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 337 * 338 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 339 * omitted} or {@linkplain #withPadChar(char) replaced}. 340 * 341 * <p>No line feeds are added by default, as per <a 342 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 343 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 344 */ base32()345 public static BaseEncoding base32() { 346 return BASE32; 347 } 348 349 private static final BaseEncoding BASE32_HEX = 350 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 351 352 /** 353 * The "base32hex" encoding specified by <a 354 * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 355 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 356 * 357 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 358 * omitted} or {@linkplain #withPadChar(char) replaced}. 359 * 360 * <p>No line feeds are added by default, as per <a 361 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 362 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 363 */ base32Hex()364 public static BaseEncoding base32Hex() { 365 return BASE32_HEX; 366 } 367 368 private static final BaseEncoding BASE16 = 369 new StandardBaseEncoding("base16()", "0123456789ABCDEF", null); 370 371 /** 372 * The "base16" encoding specified by <a 373 * href="http://tools.ietf.org/html/rfc4648#section-8">RFC 4648 section 8</a>, Base 16 Encoding. 374 * (This is the same as the base 16 encoding from <a 375 * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 376 * "hexadecimal" format. 377 * 378 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and 379 * {@link #omitPadding()} have no effect. 380 * 381 * <p>No line feeds are added by default, as per <a 382 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 383 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 384 */ base16()385 public static BaseEncoding base16() { 386 return BASE16; 387 } 388 389 private static final class Alphabet extends CharMatcher { 390 private final String name; 391 // this is meant to be immutable -- don't modify it! 392 private final char[] chars; 393 final int mask; 394 final int bitsPerChar; 395 final int charsPerChunk; 396 final int bytesPerChunk; 397 private final byte[] decodabet; 398 private final boolean[] validPadding; 399 Alphabet(String name, char[] chars)400 Alphabet(String name, char[] chars) { 401 this.name = checkNotNull(name); 402 this.chars = checkNotNull(chars); 403 try { 404 this.bitsPerChar = log2(chars.length, UNNECESSARY); 405 } catch (ArithmeticException e) { 406 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 407 } 408 409 /* 410 * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes 411 * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8. 412 */ 413 int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar)); 414 this.charsPerChunk = 8 / gcd; 415 this.bytesPerChunk = bitsPerChar / gcd; 416 417 this.mask = chars.length - 1; 418 419 byte[] decodabet = new byte[Ascii.MAX + 1]; 420 Arrays.fill(decodabet, (byte) -1); 421 for (int i = 0; i < chars.length; i++) { 422 char c = chars[i]; 423 checkArgument(CharMatcher.ASCII.matches(c), "Non-ASCII character: %s", c); 424 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 425 decodabet[c] = (byte) i; 426 } 427 this.decodabet = decodabet; 428 429 boolean[] validPadding = new boolean[charsPerChunk]; 430 for (int i = 0; i < bytesPerChunk; i++) { 431 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 432 } 433 this.validPadding = validPadding; 434 } 435 encode(int bits)436 char encode(int bits) { 437 return chars[bits]; 438 } 439 isValidPaddingStartPosition(int index)440 boolean isValidPaddingStartPosition(int index) { 441 return validPadding[index % charsPerChunk]; 442 } 443 decode(char ch)444 int decode(char ch) throws IOException { 445 if (ch > Ascii.MAX || decodabet[ch] == -1) { 446 throw new DecodingException("Unrecognized character: " + ch); 447 } 448 return decodabet[ch]; 449 } 450 hasLowerCase()451 private boolean hasLowerCase() { 452 for (char c : chars) { 453 if (Ascii.isLowerCase(c)) { 454 return true; 455 } 456 } 457 return false; 458 } 459 hasUpperCase()460 private boolean hasUpperCase() { 461 for (char c : chars) { 462 if (Ascii.isUpperCase(c)) { 463 return true; 464 } 465 } 466 return false; 467 } 468 upperCase()469 Alphabet upperCase() { 470 if (!hasLowerCase()) { 471 return this; 472 } else { 473 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 474 char[] upperCased = new char[chars.length]; 475 for (int i = 0; i < chars.length; i++) { 476 upperCased[i] = Ascii.toUpperCase(chars[i]); 477 } 478 return new Alphabet(name + ".upperCase()", upperCased); 479 } 480 } 481 lowerCase()482 Alphabet lowerCase() { 483 if (!hasUpperCase()) { 484 return this; 485 } else { 486 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 487 char[] lowerCased = new char[chars.length]; 488 for (int i = 0; i < chars.length; i++) { 489 lowerCased[i] = Ascii.toLowerCase(chars[i]); 490 } 491 return new Alphabet(name + ".lowerCase()", lowerCased); 492 } 493 } 494 495 @Override matches(char c)496 public boolean matches(char c) { 497 return CharMatcher.ASCII.matches(c) && decodabet[c] != -1; 498 } 499 500 @Override toString()501 public String toString() { 502 return name; 503 } 504 } 505 506 static final class StandardBaseEncoding extends BaseEncoding { 507 // TODO(user): provide a useful toString 508 private final Alphabet alphabet; 509 510 @Nullable 511 private final Character paddingChar; 512 StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar)513 StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) { 514 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 515 } 516 StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar)517 StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) { 518 this.alphabet = checkNotNull(alphabet); 519 checkArgument(paddingChar == null || !alphabet.matches(paddingChar), 520 "Padding character %s was already in alphabet", paddingChar); 521 this.paddingChar = paddingChar; 522 } 523 524 @Override padding()525 CharMatcher padding() { 526 return (paddingChar == null) ? CharMatcher.NONE : CharMatcher.is(paddingChar.charValue()); 527 } 528 529 @Override maxEncodedSize(int bytes)530 int maxEncodedSize(int bytes) { 531 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 532 } 533 534 @Override encodingStream(final CharOutput out)535 ByteOutput encodingStream(final CharOutput out) { 536 checkNotNull(out); 537 return new ByteOutput() { 538 int bitBuffer = 0; 539 int bitBufferLength = 0; 540 int writtenChars = 0; 541 542 @Override 543 public void write(byte b) throws IOException { 544 bitBuffer <<= 8; 545 bitBuffer |= b & 0xFF; 546 bitBufferLength += 8; 547 while (bitBufferLength >= alphabet.bitsPerChar) { 548 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) 549 & alphabet.mask; 550 out.write(alphabet.encode(charIndex)); 551 writtenChars++; 552 bitBufferLength -= alphabet.bitsPerChar; 553 } 554 } 555 556 @Override 557 public void flush() throws IOException { 558 out.flush(); 559 } 560 561 @Override 562 public void close() throws IOException { 563 if (bitBufferLength > 0) { 564 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) 565 & alphabet.mask; 566 out.write(alphabet.encode(charIndex)); 567 writtenChars++; 568 if (paddingChar != null) { 569 while (writtenChars % alphabet.charsPerChunk != 0) { 570 out.write(paddingChar.charValue()); 571 writtenChars++; 572 } 573 } 574 } 575 out.close(); 576 } 577 }; 578 } 579 580 @Override maxDecodedSize(int chars)581 int maxDecodedSize(int chars) { 582 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); 583 } 584 585 @Override decodingStream(final CharInput reader)586 ByteInput decodingStream(final CharInput reader) { 587 checkNotNull(reader); 588 return new ByteInput() { 589 int bitBuffer = 0; 590 int bitBufferLength = 0; 591 int readChars = 0; 592 boolean hitPadding = false; 593 final CharMatcher paddingMatcher = padding(); 594 595 @Override 596 public int read() throws IOException { 597 while (true) { 598 int readChar = reader.read(); 599 if (readChar == -1) { 600 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 601 throw new DecodingException("Invalid input length " + readChars); 602 } 603 return -1; 604 } 605 readChars++; 606 char ch = (char) readChar; 607 if (paddingMatcher.matches(ch)) { 608 if (!hitPadding 609 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 610 throw new DecodingException("Padding cannot start at index " + readChars); 611 } 612 hitPadding = true; 613 } else if (hitPadding) { 614 throw new DecodingException( 615 "Expected padding character but found '" + ch + "' at index " + readChars); 616 } else { 617 bitBuffer <<= alphabet.bitsPerChar; 618 bitBuffer |= alphabet.decode(ch); 619 bitBufferLength += alphabet.bitsPerChar; 620 621 if (bitBufferLength >= 8) { 622 bitBufferLength -= 8; 623 return (bitBuffer >> bitBufferLength) & 0xFF; 624 } 625 } 626 } 627 } 628 629 @Override 630 public void close() throws IOException { 631 reader.close(); 632 } 633 }; 634 } 635 636 @Override omitPadding()637 public BaseEncoding omitPadding() { 638 return (paddingChar == null) ? this : new StandardBaseEncoding(alphabet, null); 639 } 640 641 @Override withPadChar(char padChar)642 public BaseEncoding withPadChar(char padChar) { 643 if (8 % alphabet.bitsPerChar == 0 || 644 (paddingChar != null && paddingChar.charValue() == padChar)) { 645 return this; 646 } else { 647 return new StandardBaseEncoding(alphabet, padChar); 648 } 649 } 650 651 @Override withSeparator(String separator, int afterEveryChars)652 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 653 checkNotNull(separator); 654 checkArgument(padding().or(alphabet).matchesNoneOf(separator), 655 "Separator cannot contain alphabet or padding characters"); 656 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 657 } 658 659 private transient BaseEncoding upperCase; 660 private transient BaseEncoding lowerCase; 661 662 @Override upperCase()663 public BaseEncoding upperCase() { 664 BaseEncoding result = upperCase; 665 if (result == null) { 666 Alphabet upper = alphabet.upperCase(); 667 result = upperCase = 668 (upper == alphabet) ? this : new StandardBaseEncoding(upper, paddingChar); 669 } 670 return result; 671 } 672 673 @Override lowerCase()674 public BaseEncoding lowerCase() { 675 BaseEncoding result = lowerCase; 676 if (result == null) { 677 Alphabet lower = alphabet.lowerCase(); 678 result = lowerCase = 679 (lower == alphabet) ? this : new StandardBaseEncoding(lower, paddingChar); 680 } 681 return result; 682 } 683 684 @Override toString()685 public String toString() { 686 StringBuilder builder = new StringBuilder("BaseEncoding."); 687 builder.append(alphabet.toString()); 688 if (8 % alphabet.bitsPerChar != 0) { 689 if (paddingChar == null) { 690 builder.append(".omitPadding()"); 691 } else { 692 builder.append(".withPadChar(").append(paddingChar).append(')'); 693 } 694 } 695 return builder.toString(); 696 } 697 } 698 699 static CharInput ignoringInput(final CharInput delegate, final CharMatcher toIgnore) { 700 checkNotNull(delegate); 701 checkNotNull(toIgnore); 702 return new CharInput() { 703 @Override 704 public int read() throws IOException { 705 int readChar; 706 do { 707 readChar = delegate.read(); 708 } while (readChar != -1 && toIgnore.matches((char) readChar)); 709 return readChar; 710 } 711 712 @Override 713 public void close() throws IOException { 714 delegate.close(); 715 } 716 }; 717 } 718 719 static CharOutput separatingOutput( 720 final CharOutput delegate, final String separator, final int afterEveryChars) { 721 checkNotNull(delegate); 722 checkNotNull(separator); 723 checkArgument(afterEveryChars > 0); 724 return new CharOutput() { 725 int charsUntilSeparator = afterEveryChars; 726 727 @Override 728 public void write(char c) throws IOException { 729 if (charsUntilSeparator == 0) { 730 for (int i = 0; i < separator.length(); i++) { 731 delegate.write(separator.charAt(i)); 732 } 733 charsUntilSeparator = afterEveryChars; 734 } 735 delegate.write(c); 736 charsUntilSeparator--; 737 } 738 739 @Override 740 public void flush() throws IOException { 741 delegate.flush(); 742 } 743 744 @Override 745 public void close() throws IOException { 746 delegate.close(); 747 } 748 }; 749 } 750 751 static final class SeparatedBaseEncoding extends BaseEncoding { 752 private final BaseEncoding delegate; 753 private final String separator; 754 private final int afterEveryChars; 755 private final CharMatcher separatorChars; 756 757 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 758 this.delegate = checkNotNull(delegate); 759 this.separator = checkNotNull(separator); 760 this.afterEveryChars = afterEveryChars; 761 checkArgument( 762 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 763 this.separatorChars = CharMatcher.anyOf(separator).precomputed(); 764 } 765 766 @Override 767 CharMatcher padding() { 768 return delegate.padding(); 769 } 770 771 @Override 772 int maxEncodedSize(int bytes) { 773 int unseparatedSize = delegate.maxEncodedSize(bytes); 774 return unseparatedSize + separator.length() 775 * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 776 } 777 778 @Override 779 ByteOutput encodingStream(final CharOutput output) { 780 return delegate.encodingStream(separatingOutput(output, separator, afterEveryChars)); 781 } 782 783 @Override 784 int maxDecodedSize(int chars) { 785 return delegate.maxDecodedSize(chars); 786 } 787 788 @Override 789 ByteInput decodingStream(final CharInput input) { 790 return delegate.decodingStream(ignoringInput(input, separatorChars)); 791 } 792 793 @Override 794 public BaseEncoding omitPadding() { 795 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 796 } 797 798 @Override 799 public BaseEncoding withPadChar(char padChar) { 800 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 801 } 802 803 @Override 804 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 805 throw new UnsupportedOperationException("Already have a separator"); 806 } 807 808 @Override 809 public BaseEncoding upperCase() { 810 return delegate.upperCase().withSeparator(separator, afterEveryChars); 811 } 812 813 @Override 814 public BaseEncoding lowerCase() { 815 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 816 } 817 818 @Override 819 public String toString() { 820 return delegate.toString() + 821 ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 822 } 823 } 824 } 825