1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file or at 6 // https://developers.google.com/open-source/licenses/bsd 7 8 package com.google.protobuf; 9 10 import static com.google.protobuf.Internal.checkNotNull; 11 import static com.google.protobuf.TextFormatEscaper.escapeBytes; 12 import static java.lang.Integer.toHexString; 13 import static java.lang.System.identityHashCode; 14 15 import java.io.ByteArrayInputStream; 16 import java.io.ByteArrayOutputStream; 17 import java.io.IOException; 18 import java.io.InputStream; 19 import java.io.InvalidObjectException; 20 import java.io.ObjectInputStream; 21 import java.io.OutputStream; 22 import java.io.Serializable; 23 import java.io.UnsupportedEncodingException; 24 import java.nio.ByteBuffer; 25 import java.nio.ByteOrder; 26 import java.nio.InvalidMarkException; 27 import java.nio.charset.Charset; 28 import java.nio.charset.UnsupportedCharsetException; 29 import java.util.ArrayList; 30 import java.util.Arrays; 31 import java.util.Collection; 32 import java.util.Collections; 33 import java.util.Comparator; 34 import java.util.Iterator; 35 import java.util.List; 36 import java.util.Locale; 37 import java.util.NoSuchElementException; 38 39 /** 40 * Immutable sequence of bytes. Provides conversions to and from {@code byte[]}, {@link 41 * java.lang.String}, {@link ByteBuffer}, {@link InputStream}, {@link OutputStream}. Also provides a 42 * conversion to {@link CodedInputStream}. 43 * 44 * <p>Like {@link String}, the contents of a {@link ByteString} can never be observed to change, not 45 * even in the presence of a data race or incorrect API usage in the client code. 46 * 47 * <p>Substring is supported by sharing the reference to the immutable underlying bytes. 48 * Concatenation is likewise supported without copying (long strings) by building a tree of pieces 49 * in {@link RopeByteString}. 50 * 51 * @author crazybob@google.com Bob Lee 52 * @author kenton@google.com Kenton Varda 53 * @author carlanton@google.com Carl Haverl 54 * @author martinrb@google.com Martin Buchholz 55 */ 56 @CheckReturnValue 57 public abstract class ByteString implements Iterable<Byte>, Serializable { 58 private static final long serialVersionUID = 1L; 59 60 /** 61 * When two strings to be concatenated have a combined length shorter than this, we just copy 62 * their bytes on {@link #concat(ByteString)}. The trade-off is copy size versus the overhead of 63 * creating tree nodes in {@link RopeByteString}. 64 */ 65 static final int CONCATENATE_BY_COPY_SIZE = 128; 66 67 /** 68 * When copying an InputStream into a ByteString with .readFrom(), the chunks in the underlying 69 * rope start at 256 bytes, but double each iteration up to 8192 bytes. 70 */ 71 static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b 72 73 static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k 74 75 /** Empty {@code ByteString}. */ 76 public static final ByteString EMPTY = new LiteralByteString(Internal.EMPTY_BYTE_ARRAY); 77 78 /** 79 * An interface to efficiently copy {@code byte[]}. 80 * 81 * <p>One of the noticeable costs of copying a byte[] into a new array using {@code 82 * System.arraycopy} is nullification of a new buffer before the copy. It has been shown the 83 * Hotspot VM is capable to intrisicfy {@code Arrays.copyOfRange} operation to avoid this 84 * expensive nullification and provide substantial performance gain. Unfortunately this does not 85 * hold on Android runtimes and could make the copy slightly slower due to additional code in the 86 * {@code Arrays.copyOfRange}. Thus we provide two different implementation for array copier for 87 * Hotspot and Android runtimes. 88 */ 89 private interface ByteArrayCopier { 90 /** Copies the specified range of the specified array into a new array */ copyFrom(byte[] bytes, int offset, int size)91 byte[] copyFrom(byte[] bytes, int offset, int size); 92 } 93 94 /** Implementation of {@code ByteArrayCopier} which uses {@link System#arraycopy}. */ 95 private static final class SystemByteArrayCopier implements ByteArrayCopier { 96 @Override copyFrom(byte[] bytes, int offset, int size)97 public byte[] copyFrom(byte[] bytes, int offset, int size) { 98 byte[] copy = new byte[size]; 99 System.arraycopy(bytes, offset, copy, 0, size); 100 return copy; 101 } 102 } 103 104 /** Implementation of {@code ByteArrayCopier} which uses {@link Arrays#copyOfRange}. */ 105 private static final class ArraysByteArrayCopier implements ByteArrayCopier { 106 @Override copyFrom(byte[] bytes, int offset, int size)107 public byte[] copyFrom(byte[] bytes, int offset, int size) { 108 return Arrays.copyOfRange(bytes, offset, offset + size); 109 } 110 } 111 112 private static final ByteArrayCopier byteArrayCopier; 113 114 static { 115 byteArrayCopier = 116 Android.isOnAndroidDevice() ? new SystemByteArrayCopier() : new ArraysByteArrayCopier(); 117 } 118 119 /** 120 * Cached hash value. Intentionally accessed via a data race, which is safe because of the Java 121 * Memory Model's "no out-of-thin-air values" guarantees for ints. A value of 0 implies that the 122 * hash has not been set. 123 */ 124 private int hash = 0; 125 126 // This constructor is here to prevent subclassing outside of this package, ByteString()127 ByteString() {} 128 129 /** 130 * Gets the byte at the given index. This method should be used only for random access to 131 * individual bytes. To access bytes sequentially, use the {@link ByteIterator} returned by {@link 132 * #iterator()}, and call {@link #substring(int, int)} first if necessary. 133 * 134 * @param index index of byte 135 * @return the value 136 * @throws IndexOutOfBoundsException {@code index < 0 or index >= size} 137 */ byteAt(int index)138 public abstract byte byteAt(int index); 139 140 /** 141 * Gets the byte at the given index, assumes bounds checking has already been performed. 142 * 143 * @param index index of byte 144 * @return the value 145 * @throws IndexOutOfBoundsException {@code index < 0 or index >= size} 146 */ internalByteAt(int index)147 abstract byte internalByteAt(int index); 148 149 /** 150 * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString. To avoid 151 * auto-boxing, you may get the iterator manually and call {@link ByteIterator#nextByte()}. 152 * 153 * @return the iterator 154 */ 155 @Override iterator()156 public ByteIterator iterator() { 157 return new AbstractByteIterator() { 158 private int position = 0; 159 private final int limit = size(); 160 161 @Override 162 public boolean hasNext() { 163 return position < limit; 164 } 165 166 @Override 167 public byte nextByte() { 168 int currentPos = position; 169 if (currentPos >= limit) { 170 throw new NoSuchElementException(); 171 } 172 position = currentPos + 1; 173 return internalByteAt(currentPos); 174 } 175 }; 176 } 177 178 /** 179 * This interface extends {@code Iterator<Byte>}, so that we can return an unboxed {@code byte}. 180 */ 181 public interface ByteIterator extends Iterator<Byte> { 182 /** 183 * An alternative to {@link Iterator#next()} that returns an unboxed primitive {@code byte}. 184 * 185 * @return the next {@code byte} in the iteration 186 * @throws NoSuchElementException if the iteration has no more elements 187 */ nextByte()188 byte nextByte(); 189 } 190 191 abstract static class AbstractByteIterator implements ByteIterator { 192 @Override next()193 public final Byte next() { 194 // Boxing calls Byte.valueOf(byte), which does not instantiate. 195 return nextByte(); 196 } 197 198 @Override remove()199 public final void remove() { 200 throw new UnsupportedOperationException(); 201 } 202 } 203 204 /** 205 * Gets the number of bytes. 206 * 207 * @return size in bytes 208 */ size()209 public abstract int size(); 210 211 /** 212 * Returns {@code true} if the size is {@code 0}, {@code false} otherwise. 213 * 214 * @return true if this is zero bytes long 215 */ isEmpty()216 public final boolean isEmpty() { 217 return size() == 0; 218 } 219 220 /** Returns an empty {@code ByteString} of size {@code 0}. */ empty()221 public static final ByteString empty() { 222 return EMPTY; 223 } 224 225 // ================================================================= 226 // Comparison 227 228 private static final int UNSIGNED_BYTE_MASK = 0xFF; 229 230 /** 231 * Returns the value of the given byte as an integer, interpreting the byte as an unsigned value. 232 * That is, returns {@code value + 256} if {@code value} is negative; {@code value} itself 233 * otherwise. 234 * 235 * <p>Note: This code was copied from {@link com.google.common.primitives.UnsignedBytes#toInt}, as 236 * Guava libraries cannot be used in the {@code com.google.protobuf} package. 237 */ toInt(byte value)238 private static int toInt(byte value) { 239 return value & UNSIGNED_BYTE_MASK; 240 } 241 242 /** Returns the numeric value of the given character in hex, or -1 if invalid. */ hexDigit(char c)243 private static int hexDigit(char c) { 244 if (c >= '0' && c <= '9') { 245 return c - '0'; 246 } else if (c >= 'A' && c <= 'F') { 247 return c - 'A' + 10; 248 } else if (c >= 'a' && c <= 'f') { 249 return c - 'a' + 10; 250 } else { 251 return -1; 252 } 253 } 254 255 /** 256 * Returns the numeric value of the given character at index in hexString. 257 * 258 * @throws NumberFormatException if the hexString character is invalid. 259 */ extractHexDigit(String hexString, int index)260 private static int extractHexDigit(String hexString, int index) { 261 int digit = hexDigit(hexString.charAt(index)); 262 if (digit == -1) { 263 throw new NumberFormatException( 264 "Invalid hexString " 265 + hexString 266 + " must only contain [0-9a-fA-F] but contained " 267 + hexString.charAt(index) 268 + " at index " 269 + index); 270 } 271 return digit; 272 } 273 274 /** 275 * Compares two {@link ByteString}s lexicographically, treating their contents as unsigned byte 276 * values between 0 and 255 (inclusive). 277 * 278 * <p>For example, {@code (byte) -1} is considered to be greater than {@code (byte) 1} because it 279 * is interpreted as an unsigned value, {@code 255}. 280 */ 281 private static final Comparator<ByteString> UNSIGNED_LEXICOGRAPHICAL_COMPARATOR = 282 new Comparator<ByteString>() { 283 @Override 284 public int compare(ByteString former, ByteString latter) { 285 ByteIterator formerBytes = former.iterator(); 286 ByteIterator latterBytes = latter.iterator(); 287 288 while (formerBytes.hasNext() && latterBytes.hasNext()) { 289 int result = 290 Integer.compare(toInt(formerBytes.nextByte()), toInt(latterBytes.nextByte())); 291 if (result != 0) { 292 return result; 293 } 294 } 295 return Integer.compare(former.size(), latter.size()); 296 } 297 }; 298 299 /** 300 * Returns a {@link Comparator} which compares {@link ByteString}-s lexicographically as sequences 301 * of unsigned bytes (i.e. values between 0 and 255, inclusive). 302 * 303 * <p>For example, {@code (byte) -1} is considered to be greater than {@code (byte) 1} because it 304 * is interpreted as an unsigned value, {@code 255}: 305 * 306 * <ul> 307 * <li>{@code `-1` -> 0b11111111 (two's complement) -> 255} 308 * <li>{@code `1` -> 0b00000001 -> 1} 309 * </ul> 310 */ unsignedLexicographicalComparator()311 public static Comparator<ByteString> unsignedLexicographicalComparator() { 312 return UNSIGNED_LEXICOGRAPHICAL_COMPARATOR; 313 } 314 315 // ================================================================= 316 // ByteString -> substring 317 318 /** 319 * Return the substring from {@code beginIndex}, inclusive, to the end of the string. 320 * 321 * @param beginIndex start at this index 322 * @return substring sharing underlying data 323 * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or {@code beginIndex > size()}. 324 */ substring(int beginIndex)325 public final ByteString substring(int beginIndex) { 326 return substring(beginIndex, size()); 327 } 328 329 /** 330 * Return the substring from {@code beginIndex}, inclusive, to {@code endIndex}, exclusive. 331 * 332 * @param beginIndex start at this index 333 * @param endIndex the last character is the one before this index 334 * @return substring sharing underlying data 335 * @throws IndexOutOfBoundsException if {@code beginIndex < 0}, {@code endIndex > size()}, or 336 * {@code beginIndex > endIndex}. 337 */ substring(int beginIndex, int endIndex)338 public abstract ByteString substring(int beginIndex, int endIndex); 339 340 /** 341 * Tests if this bytestring starts with the specified prefix. Similar to {@link 342 * String#startsWith(String)} 343 * 344 * @param prefix the prefix. 345 * @return <code>true</code> if the byte sequence represented by the argument is a prefix of the 346 * byte sequence represented by this string; <code>false</code> otherwise. 347 */ startsWith(ByteString prefix)348 public final boolean startsWith(ByteString prefix) { 349 return size() >= prefix.size() && substring(0, prefix.size()).equals(prefix); 350 } 351 352 /** 353 * Tests if this bytestring ends with the specified suffix. Similar to {@link 354 * String#endsWith(String)} 355 * 356 * @param suffix the suffix. 357 * @return <code>true</code> if the byte sequence represented by the argument is a suffix of the 358 * byte sequence represented by this string; <code>false</code> otherwise. 359 */ endsWith(ByteString suffix)360 public final boolean endsWith(ByteString suffix) { 361 return size() >= suffix.size() && substring(size() - suffix.size()).equals(suffix); 362 } 363 364 // ================================================================= 365 // String -> ByteString 366 367 /** 368 * Returns a {@code ByteString} from a hexadecimal String. 369 * 370 * @param hexString String of hexadecimal digits to create {@code ByteString} from. 371 * @throws NumberFormatException if the hexString does not contain a parsable hex String. 372 */ fromHex(@ompileTimeConstant String hexString)373 public static ByteString fromHex(@CompileTimeConstant String hexString) { 374 if (hexString.length() % 2 != 0) { 375 throw new NumberFormatException( 376 "Invalid hexString " + hexString + " of length " + hexString.length() + " must be even."); 377 } 378 byte[] bytes = new byte[hexString.length() / 2]; 379 for (int i = 0; i < bytes.length; i++) { 380 int d1 = extractHexDigit(hexString, 2 * i); 381 int d2 = extractHexDigit(hexString, 2 * i + 1); 382 bytes[i] = (byte) (d1 << 4 | d2); 383 } 384 return new LiteralByteString(bytes); 385 } 386 387 // ================================================================= 388 // byte[] -> ByteString 389 390 /** 391 * Copies the given bytes into a {@code ByteString}. 392 * 393 * @param bytes source array 394 * @param offset offset in source array 395 * @param size number of bytes to copy 396 * @return new {@code ByteString} 397 * @throws IndexOutOfBoundsException if {@code offset} or {@code size} are out of bounds 398 */ copyFrom(byte[] bytes, int offset, int size)399 public static ByteString copyFrom(byte[] bytes, int offset, int size) { 400 checkRange(offset, offset + size, bytes.length); 401 return new LiteralByteString(byteArrayCopier.copyFrom(bytes, offset, size)); 402 } 403 404 /** 405 * Copies the given bytes into a {@code ByteString}. 406 * 407 * @param bytes to copy 408 * @return new {@code ByteString} 409 */ copyFrom(byte[] bytes)410 public static ByteString copyFrom(byte[] bytes) { 411 return copyFrom(bytes, 0, bytes.length); 412 } 413 414 /** 415 * Wraps the given bytes into a {@code ByteString}. Intended for internal usage within the 416 * library. 417 */ wrap(ByteBuffer buffer)418 static ByteString wrap(ByteBuffer buffer) { 419 if (buffer.hasArray()) { 420 final int offset = buffer.arrayOffset(); 421 return ByteString.wrap(buffer.array(), offset + buffer.position(), buffer.remaining()); 422 } else { 423 return new NioByteString(buffer); 424 } 425 } 426 427 // For use in tests nioByteString(ByteBuffer buffer)428 static ByteString nioByteString(ByteBuffer buffer) { 429 return new NioByteString(buffer); 430 } 431 432 /** 433 * Wraps the given bytes into a {@code ByteString}. Intended for internal usage within the library 434 * to force a classload of ByteString before LiteralByteString. 435 */ wrap(byte[] bytes)436 static ByteString wrap(byte[] bytes) { 437 // TODO: Return EMPTY when bytes are empty to reduce allocations? 438 return new LiteralByteString(bytes); 439 } 440 441 /** 442 * Wraps the given bytes into a {@code ByteString}. Intended for internal usage within the library 443 * to force a classload of ByteString before BoundedByteString and LiteralByteString. 444 */ wrap(byte[] bytes, int offset, int length)445 static ByteString wrap(byte[] bytes, int offset, int length) { 446 return new BoundedByteString(bytes, offset, length); 447 } 448 449 /** 450 * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into a {@code 451 * ByteString}. 452 * 453 * @param bytes source buffer 454 * @param size number of bytes to copy 455 * @return new {@code ByteString} 456 * @throws IndexOutOfBoundsException if {@code size > bytes.remaining()} 457 */ copyFrom(ByteBuffer bytes, int size)458 public static ByteString copyFrom(ByteBuffer bytes, int size) { 459 checkRange(0, size, bytes.remaining()); 460 byte[] copy = new byte[size]; 461 bytes.get(copy); 462 return new LiteralByteString(copy); 463 } 464 465 /** 466 * Copies the remaining bytes from a {@code java.nio.ByteBuffer} into a {@code ByteString}. 467 * 468 * @param bytes sourceBuffer 469 * @return new {@code ByteString} 470 */ copyFrom(ByteBuffer bytes)471 public static ByteString copyFrom(ByteBuffer bytes) { 472 return copyFrom(bytes, bytes.remaining()); 473 } 474 475 /** 476 * Encodes {@code text} into a sequence of bytes using the named charset and returns the result as 477 * a {@code ByteString}. 478 * 479 * @param text source string 480 * @param charsetName encoding to use 481 * @return new {@code ByteString} 482 * @throws UnsupportedEncodingException if the encoding isn't found 483 */ copyFrom(String text, String charsetName)484 public static ByteString copyFrom(String text, String charsetName) 485 throws UnsupportedEncodingException { 486 return new LiteralByteString(text.getBytes(charsetName)); 487 } 488 489 /** 490 * Encodes {@code text} into a sequence of bytes using the named charset and returns the result as 491 * a {@code ByteString}. 492 * 493 * @param text source string 494 * @param charset encode using this charset 495 * @return new {@code ByteString} 496 */ copyFrom(String text, Charset charset)497 public static ByteString copyFrom(String text, Charset charset) { 498 return new LiteralByteString(text.getBytes(charset)); 499 } 500 501 /** 502 * Encodes {@code text} into a sequence of UTF-8 bytes and returns the result as a {@code 503 * ByteString}. 504 * 505 * @param text source string 506 * @return new {@code ByteString} 507 */ copyFromUtf8(String text)508 public static ByteString copyFromUtf8(String text) { 509 return new LiteralByteString(text.getBytes(Internal.UTF_8)); 510 } 511 512 // ================================================================= 513 // InputStream -> ByteString 514 515 /** 516 * Completely reads the given stream's bytes into a {@code ByteString}, blocking if necessary 517 * until all bytes are read through to the end of the stream. 518 * 519 * <p><b>Performance notes:</b> The returned {@code ByteString} is an immutable tree of byte 520 * arrays ("chunks") of the stream data. The first chunk is small, with subsequent chunks each 521 * being double the size, up to 8K. 522 * 523 * <p>Each byte read from the input stream will be copied twice to ensure that the resulting 524 * ByteString is truly immutable. 525 * 526 * @param streamToDrain The source stream, which is read completely but not closed. 527 * @return A new {@code ByteString} which is made up of chunks of various sizes, depending on the 528 * behavior of the underlying stream. 529 * @throws IOException if there is a problem reading the underlying stream 530 * @throws IllegalArgumentException if the stream supplies more than Integer.MAX_VALUE bytes 531 */ readFrom(InputStream streamToDrain)532 public static ByteString readFrom(InputStream streamToDrain) throws IOException { 533 return readFrom(streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE); 534 } 535 536 /** 537 * Completely reads the given stream's bytes into a {@code ByteString}, blocking if necessary 538 * until all bytes are read through to the end of the stream. 539 * 540 * <p><b>Performance notes:</b> The returned {@code ByteString} is an immutable tree of byte 541 * arrays ("chunks") of the stream data. The chunkSize parameter sets the size of these byte 542 * arrays. 543 * 544 * <p>Each byte read from the input stream will be copied twice to ensure that the resulting 545 * ByteString is truly immutable. 546 * 547 * @param streamToDrain The source stream, which is read completely but not closed. 548 * @param chunkSize The size of the chunks in which to read the stream. 549 * @return A new {@code ByteString} which is made up of chunks of the given size. 550 * @throws IOException if there is a problem reading the underlying stream 551 * @throws IllegalArgumentException if the stream supplies more than Integer.MAX_VALUE bytes 552 */ readFrom(InputStream streamToDrain, int chunkSize)553 public static ByteString readFrom(InputStream streamToDrain, int chunkSize) throws IOException { 554 return readFrom(streamToDrain, chunkSize, chunkSize); 555 } 556 557 /** 558 * Helper method that takes the chunk size range as a parameter. 559 * 560 * @param streamToDrain the source stream, which is read completely but not closed 561 * @param minChunkSize the minimum size of the chunks in which to read the stream 562 * @param maxChunkSize the maximum size of the chunks in which to read the stream 563 * @return a new {@code ByteString} which is made up of chunks within the given size range 564 * @throws IOException if there is a problem reading the underlying stream 565 * @throws IllegalArgumentException if the stream supplies more than Integer.MAX_VALUE bytes 566 */ readFrom(InputStream streamToDrain, int minChunkSize, int maxChunkSize)567 public static ByteString readFrom(InputStream streamToDrain, int minChunkSize, int maxChunkSize) 568 throws IOException { 569 Collection<ByteString> results = new ArrayList<ByteString>(); 570 571 // copy the inbound bytes into a list of chunks; the chunk size 572 // grows exponentially to support both short and long streams. 573 int chunkSize = minChunkSize; 574 while (true) { 575 ByteString chunk = readChunk(streamToDrain, chunkSize); 576 if (chunk == null) { 577 break; 578 } 579 results.add(chunk); 580 chunkSize = Math.min(chunkSize * 2, maxChunkSize); 581 } 582 583 return ByteString.copyFrom(results); 584 } 585 586 /** 587 * Blocks until a chunk of the given size can be made from the stream, or EOF is reached. Calls 588 * read() repeatedly in case the given stream implementation doesn't completely fill the given 589 * buffer in one read() call. 590 * 591 * @return A chunk of the desired size, or else a chunk as large as was available when end of 592 * stream was reached. Returns null if the given stream had no more data in it. 593 */ readChunk(InputStream in, final int chunkSize)594 private static ByteString readChunk(InputStream in, final int chunkSize) throws IOException { 595 final byte[] buf = new byte[chunkSize]; 596 int bytesRead = 0; 597 while (bytesRead < chunkSize) { 598 final int count = in.read(buf, bytesRead, chunkSize - bytesRead); 599 if (count == -1) { 600 break; 601 } 602 bytesRead += count; 603 } 604 605 if (bytesRead == 0) { 606 return null; 607 } 608 609 // Always make a copy since InputStream could steal a reference to buf. 610 return ByteString.copyFrom(buf, 0, bytesRead); 611 } 612 613 // ================================================================= 614 // Multiple ByteStrings -> One ByteString 615 616 /** 617 * Concatenate the given {@code ByteString} to this one. Short concatenations, of total size 618 * smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are produced by copying the 619 * underlying bytes (as per Rope.java, <a 620 * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf"> 621 * BAP95 </a>. In general, the concatenate involves no copying. 622 * 623 * @param other string to concatenate 624 * @return a new {@code ByteString} instance 625 * @throws IllegalArgumentException if the combined size of the two byte strings exceeds 626 * Integer.MAX_VALUE 627 */ concat(ByteString other)628 public final ByteString concat(ByteString other) { 629 if (Integer.MAX_VALUE - size() < other.size()) { 630 throw new IllegalArgumentException( 631 "ByteString would be too long: " + size() + "+" + other.size()); 632 } 633 634 return RopeByteString.concatenate(this, other); 635 } 636 637 /** 638 * Concatenates all byte strings in the iterable and returns the result. This is designed to run 639 * in O(list size), not O(total bytes). 640 * 641 * <p>The returned {@code ByteString} is not necessarily a unique object. If the list is empty, 642 * the returned object is the singleton empty {@code ByteString}. If the list has only one 643 * element, that {@code ByteString} will be returned without copying. 644 * 645 * @param byteStrings strings to be concatenated 646 * @return new {@code ByteString} 647 * @throws IllegalArgumentException if the combined size of the byte strings exceeds 648 * Integer.MAX_VALUE 649 */ copyFrom(Iterable<ByteString> byteStrings)650 public static ByteString copyFrom(Iterable<ByteString> byteStrings) { 651 // Determine the size; 652 final int size; 653 if (!(byteStrings instanceof Collection)) { 654 int tempSize = 0; 655 for (Iterator<ByteString> iter = byteStrings.iterator(); 656 iter.hasNext(); 657 iter.next(), ++tempSize) {} 658 size = tempSize; 659 } else { 660 size = ((Collection<ByteString>) byteStrings).size(); 661 } 662 663 if (size == 0) { 664 return EMPTY; 665 } 666 667 return balancedConcat(byteStrings.iterator(), size); 668 } 669 670 // Internal function used by copyFrom(Iterable<ByteString>). 671 // Create a balanced concatenation of the next "length" elements from the 672 // iterable. balancedConcat(Iterator<ByteString> iterator, int length)673 private static ByteString balancedConcat(Iterator<ByteString> iterator, int length) { 674 if (length < 1) { 675 throw new IllegalArgumentException(String.format("length (%s) must be >= 1", length)); 676 } 677 ByteString result; 678 if (length == 1) { 679 result = iterator.next(); 680 } else { 681 int halfLength = length >>> 1; 682 ByteString left = balancedConcat(iterator, halfLength); 683 ByteString right = balancedConcat(iterator, length - halfLength); 684 result = left.concat(right); 685 } 686 return result; 687 } 688 689 // ================================================================= 690 // ByteString -> byte[] 691 692 /** 693 * Copies bytes into a buffer at the given offset. 694 * 695 * <p>To copy a subset of bytes, you call this method on the return value of {@link 696 * #substring(int, int)}. Example: {@code byteString.substring(start, end).copyTo(target, offset)} 697 * 698 * @param target buffer to copy into 699 * @param offset in the target buffer 700 * @throws IndexOutOfBoundsException if the offset is negative or too large 701 */ copyTo(byte[] target, int offset)702 public void copyTo(byte[] target, int offset) { 703 copyTo(target, 0, offset, size()); 704 } 705 706 /** 707 * Copies bytes into a buffer. 708 * 709 * @param target buffer to copy into 710 * @param sourceOffset offset within these bytes 711 * @param targetOffset offset within the target buffer 712 * @param numberToCopy number of bytes to copy 713 * @throws IndexOutOfBoundsException if an offset or size is negative or too large 714 * @deprecated Instead, call {@code byteString.substring(sourceOffset, sourceOffset + 715 * numberToCopy).copyTo(target, targetOffset)} 716 */ 717 @Deprecated copyTo(byte[] target, int sourceOffset, int targetOffset, int numberToCopy)718 public final void copyTo(byte[] target, int sourceOffset, int targetOffset, int numberToCopy) { 719 checkRange(sourceOffset, sourceOffset + numberToCopy, size()); 720 checkRange(targetOffset, targetOffset + numberToCopy, target.length); 721 if (numberToCopy > 0) { 722 copyToInternal(target, sourceOffset, targetOffset, numberToCopy); 723 } 724 } 725 726 /** 727 * Internal (package private) implementation of {@link #copyTo(byte[],int,int,int)}. It assumes 728 * that all error checking has already been performed and that {@code numberToCopy > 0}. 729 */ copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)730 protected abstract void copyToInternal( 731 byte[] target, int sourceOffset, int targetOffset, int numberToCopy); 732 733 /** 734 * Copies bytes into a ByteBuffer. 735 * 736 * <p>To copy a subset of bytes, you call this method on the return value of {@link 737 * #substring(int, int)}. Example: {@code byteString.substring(start, end).copyTo(target)} 738 * 739 * @param target ByteBuffer to copy into. 740 * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only 741 * @throws java.nio.BufferOverflowException if the {@code target}'s remaining() space is not large 742 * enough to hold the data. 743 */ copyTo(ByteBuffer target)744 public abstract void copyTo(ByteBuffer target); 745 746 /** 747 * Copies bytes to a {@code byte[]}. 748 * 749 * @return copied bytes 750 */ toByteArray()751 public final byte[] toByteArray() { 752 final int size = size(); 753 if (size == 0) { 754 return Internal.EMPTY_BYTE_ARRAY; 755 } 756 byte[] result = new byte[size]; 757 copyToInternal(result, 0, 0, size); 758 return result; 759 } 760 761 /** 762 * Writes a copy of the contents of this byte string to the specified output stream argument. 763 * 764 * @param out the output stream to which to write the data. 765 * @throws IOException if an I/O error occurs. 766 */ writeTo(OutputStream out)767 public abstract void writeTo(OutputStream out) throws IOException; 768 769 /** 770 * Writes a specified part of this byte string to an output stream. 771 * 772 * @param out the output stream to which to write the data. 773 * @param sourceOffset offset within these bytes 774 * @param numberToWrite number of bytes to write 775 * @throws IOException if an I/O error occurs. 776 * @throws IndexOutOfBoundsException if an offset or size is negative or too large 777 */ writeTo(OutputStream out, int sourceOffset, int numberToWrite)778 final void writeTo(OutputStream out, int sourceOffset, int numberToWrite) throws IOException { 779 checkRange(sourceOffset, sourceOffset + numberToWrite, size()); 780 if (numberToWrite > 0) { 781 writeToInternal(out, sourceOffset, numberToWrite); 782 } 783 } 784 785 /** 786 * Internal version of {@link #writeTo(OutputStream,int,int)} that assumes all error checking has 787 * already been done. 788 */ writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)789 abstract void writeToInternal(OutputStream out, int sourceOffset, int numberToWrite) 790 throws IOException; 791 792 /** 793 * Writes this {@link ByteString} to the provided {@link ByteOutput}. Calling this method may 794 * result in multiple operations on the target {@link ByteOutput}. 795 * 796 * <p>This method may expose internal backing buffers of the {@link ByteString} to the {@link 797 * ByteOutput} in order to avoid additional copying overhead. It would be possible for a malicious 798 * {@link ByteOutput} to corrupt the {@link ByteString}. Use with caution! 799 * 800 * @param byteOutput the output target to receive the bytes 801 * @throws IOException if an I/O error occurs 802 * @see UnsafeByteOperations#unsafeWriteTo(ByteString, ByteOutput) 803 */ writeTo(ByteOutput byteOutput)804 abstract void writeTo(ByteOutput byteOutput) throws IOException; 805 806 /** 807 * This method behaves exactly the same as {@link #writeTo(ByteOutput)} unless the {@link 808 * ByteString} is a rope. For ropes, the leaf nodes are written in reverse order to the {@code 809 * byteOutput}. 810 * 811 * @param byteOutput the output target to receive the bytes 812 * @throws IOException if an I/O error occurs 813 * @see UnsafeByteOperations#unsafeWriteToReverse(ByteString, ByteOutput) 814 */ writeToReverse(ByteOutput byteOutput)815 abstract void writeToReverse(ByteOutput byteOutput) throws IOException; 816 817 /** 818 * Constructs a read-only {@code java.nio.ByteBuffer} whose content is equal to the contents of 819 * this byte string. The result uses the same backing array as the byte string, if possible. 820 * 821 * @return wrapped bytes 822 */ asReadOnlyByteBuffer()823 public abstract ByteBuffer asReadOnlyByteBuffer(); 824 825 /** 826 * Constructs a list of read-only {@code java.nio.ByteBuffer} objects such that the concatenation 827 * of their contents is equal to the contents of this byte string. The result uses the same 828 * backing arrays as the byte string. 829 * 830 * <p>By returning a list, implementations of this method may be able to avoid copying even when 831 * there are multiple backing arrays. 832 * 833 * @return a list of wrapped bytes 834 */ asReadOnlyByteBufferList()835 public abstract List<ByteBuffer> asReadOnlyByteBufferList(); 836 837 /** 838 * Constructs a new {@code String} by decoding the bytes using the specified charset. 839 * 840 * @param charsetName encode using this charset 841 * @return new string 842 * @throws UnsupportedEncodingException if charset isn't recognized 843 */ toString(String charsetName)844 public final String toString(String charsetName) throws UnsupportedEncodingException { 845 try { 846 return toString(Charset.forName(charsetName)); 847 } catch (UnsupportedCharsetException e) { 848 UnsupportedEncodingException exception = new UnsupportedEncodingException(charsetName); 849 exception.initCause(e); 850 throw exception; 851 } 852 } 853 854 /** 855 * Constructs a new {@code String} by decoding the bytes using the specified charset. Returns the 856 * same empty String if empty. 857 * 858 * @param charset encode using this charset 859 * @return new string 860 */ toString(Charset charset)861 public final String toString(Charset charset) { 862 return size() == 0 ? "" : toStringInternal(charset); 863 } 864 865 /** 866 * Constructs a new {@code String} by decoding the bytes using the specified charset. 867 * 868 * @param charset encode using this charset 869 * @return new string 870 */ toStringInternal(Charset charset)871 protected abstract String toStringInternal(Charset charset); 872 873 // ================================================================= 874 // UTF-8 decoding 875 876 /** 877 * Constructs a new {@code String} by decoding the bytes as UTF-8. 878 * 879 * @return new string using UTF-8 encoding 880 */ toStringUtf8()881 public final String toStringUtf8() { 882 return toString(Internal.UTF_8); 883 } 884 885 /** 886 * Tells whether this {@code ByteString} represents a well-formed UTF-8 byte sequence, such that 887 * the original bytes can be converted to a String object and then round tripped back to bytes 888 * without loss. 889 * 890 * <p>More precisely, returns {@code true} whenever: 891 * 892 * <pre>{@code 893 * Arrays.equals(byteString.toByteArray(), 894 * new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8")) 895 * }</pre> 896 * 897 * <p>This method returns {@code false} for "overlong" byte sequences, as well as for 3-byte 898 * sequences that would map to a surrogate character, in accordance with the restricted definition 899 * of UTF-8 introduced in Unicode 3.1. Note that the UTF-8 decoder included in Oracle's JDK has 900 * been modified to also reject "overlong" byte sequences, but (as of 2011) still accepts 3-byte 901 * surrogate character byte sequences. 902 * 903 * <p>See the Unicode Standard,<br> 904 * Table 3-6. <em>UTF-8 Bit Distribution</em>,<br> 905 * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>. 906 * 907 * @return whether the bytes in this {@code ByteString} are a well-formed UTF-8 byte sequence 908 */ isValidUtf8()909 public abstract boolean isValidUtf8(); 910 911 /** 912 * Tells whether the given byte sequence is a well-formed, malformed, or incomplete UTF-8 byte 913 * sequence. This method accepts and returns a partial state result, allowing the bytes for a 914 * complete UTF-8 byte sequence to be composed from multiple {@code ByteString} segments. 915 * 916 * @param state either {@code 0} (if this is the initial decoding operation) or the value returned 917 * from a call to a partial decoding method for the previous bytes 918 * @param offset offset of the first byte to check 919 * @param length number of bytes to check 920 * @return {@code -1} if the partial byte sequence is definitely malformed, {@code 0} if it is 921 * well-formed (no additional input needed), or, if the byte sequence is "incomplete", i.e. 922 * apparently terminated in the middle of a character, an opaque integer "state" value 923 * containing enough information to decode the character when passed to a subsequent 924 * invocation of a partial decoding method. 925 */ partialIsValidUtf8(int state, int offset, int length)926 protected abstract int partialIsValidUtf8(int state, int offset, int length); 927 928 // ================================================================= 929 // equals() and hashCode() 930 931 @Override 932 public abstract boolean equals(Object o); 933 934 /** Base class for leaf {@link ByteString}s (i.e. non-ropes). */ 935 abstract static class LeafByteString extends ByteString { 936 private static final long serialVersionUID = 1L; 937 938 @Override getTreeDepth()939 protected final int getTreeDepth() { 940 return 0; 941 } 942 943 @Override isBalanced()944 protected final boolean isBalanced() { 945 return true; 946 } 947 948 @Override writeToReverse(ByteOutput byteOutput)949 void writeToReverse(ByteOutput byteOutput) throws IOException { 950 writeTo(byteOutput); 951 } 952 953 /** 954 * Check equality of the substring of given length of this object starting at zero with another 955 * {@code ByteString} substring starting at offset. 956 * 957 * @param other what to compare a substring in 958 * @param offset offset into other 959 * @param length number of bytes to compare 960 * @return true for equality of substrings, else false. 961 */ equalsRange(ByteString other, int offset, int length)962 abstract boolean equalsRange(ByteString other, int offset, int length); 963 LeafByteString()964 private LeafByteString() {} 965 } 966 967 /** 968 * Compute the hashCode using the traditional algorithm from {@link ByteString}. 969 * 970 * @return hashCode value 971 */ 972 @Override hashCode()973 public final int hashCode() { 974 int h = hash; 975 976 if (h == 0) { 977 int size = size(); 978 h = partialHash(size, 0, size); 979 if (h == 0) { 980 h = 1; 981 } 982 hash = h; 983 } 984 return h; 985 } 986 987 // ================================================================= 988 // Input stream 989 990 /** 991 * Creates an {@code InputStream} which can be used to read the bytes. 992 * 993 * <p>The {@link InputStream} returned by this method is guaranteed to be completely non-blocking. 994 * The method {@link InputStream#available()} returns the number of bytes remaining in the stream. 995 * The methods {@link InputStream#read(byte[])}, {@link InputStream#read(byte[],int,int)} and 996 * {@link InputStream#skip(long)} will read/skip as many bytes as are available. The method {@link 997 * InputStream#markSupported()} returns {@code true}. 998 * 999 * <p>The methods in the returned {@link InputStream} might <b>not</b> be thread safe. 1000 * 1001 * @return an input stream that returns the bytes of this byte string. 1002 */ newInput()1003 public abstract InputStream newInput(); 1004 1005 /** 1006 * Creates a {@link CodedInputStream} which can be used to read the bytes. Using this is often 1007 * more efficient than creating a {@link CodedInputStream} that wraps the result of {@link 1008 * #newInput()}. 1009 * 1010 * @return stream based on wrapped data 1011 */ newCodedInput()1012 public abstract CodedInputStream newCodedInput(); 1013 1014 // ================================================================= 1015 // Output stream 1016 1017 /** 1018 * Creates a new {@link Output} with the given initial capacity. Call {@link 1019 * Output#toByteString()} to create the {@code ByteString} instance. 1020 * 1021 * <p>A {@link ByteString.Output} offers the same functionality as a {@link 1022 * ByteArrayOutputStream}, except that it returns a {@link ByteString} rather than a {@code byte} 1023 * array. 1024 * 1025 * @param initialCapacity estimate of number of bytes to be written 1026 * @return {@code OutputStream} for building a {@code ByteString} 1027 */ newOutput(int initialCapacity)1028 public static Output newOutput(int initialCapacity) { 1029 return new Output(initialCapacity); 1030 } 1031 1032 /** 1033 * Creates a new {@link Output}. Call {@link Output#toByteString()} to create the {@code 1034 * ByteString} instance. 1035 * 1036 * <p>A {@link ByteString.Output} offers the same functionality as a {@link 1037 * ByteArrayOutputStream}, except that it returns a {@link ByteString} rather than a {@code byte 1038 * array}. 1039 * 1040 * @return {@code OutputStream} for building a {@code ByteString} 1041 */ newOutput()1042 public static Output newOutput() { 1043 return new Output(CONCATENATE_BY_COPY_SIZE); 1044 } 1045 1046 /** 1047 * Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to create the {@code 1048 * ByteString} instance. 1049 */ 1050 public static final class Output extends OutputStream { 1051 // Implementation note. 1052 // The public methods of this class must be synchronized. ByteStrings 1053 // are guaranteed to be immutable. Without some sort of locking, it could 1054 // be possible for one thread to call toByteString(), while another thread 1055 // is still modifying the underlying byte array. 1056 1057 private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; 1058 // argument passed by user, indicating initial capacity. 1059 private final int initialCapacity; 1060 // ByteStrings to be concatenated to create the result 1061 private final ArrayList<ByteString> flushedBuffers; 1062 // Total number of bytes in the ByteStrings of flushedBuffers 1063 private int flushedBuffersTotalBytes; 1064 // Current buffer to which we are writing 1065 private byte[] buffer; 1066 // Location in buffer[] to which we write the next byte. 1067 private int bufferPos; 1068 1069 /** 1070 * Creates a new ByteString output stream with the specified initial capacity. 1071 * 1072 * @param initialCapacity the initial capacity of the output stream. 1073 */ Output(int initialCapacity)1074 Output(int initialCapacity) { 1075 if (initialCapacity < 0) { 1076 throw new IllegalArgumentException("Buffer size < 0"); 1077 } 1078 this.initialCapacity = initialCapacity; 1079 this.flushedBuffers = new ArrayList<ByteString>(); 1080 this.buffer = new byte[initialCapacity]; 1081 } 1082 1083 @Override write(int b)1084 public synchronized void write(int b) { 1085 if (bufferPos == buffer.length) { 1086 flushFullBuffer(1); 1087 } 1088 buffer[bufferPos++] = (byte) b; 1089 } 1090 1091 @Override write(byte[] b, int offset, int length)1092 public synchronized void write(byte[] b, int offset, int length) { 1093 if (length <= buffer.length - bufferPos) { 1094 // The bytes can fit into the current buffer. 1095 System.arraycopy(b, offset, buffer, bufferPos, length); 1096 bufferPos += length; 1097 } else { 1098 // Use up the current buffer 1099 int copySize = buffer.length - bufferPos; 1100 System.arraycopy(b, offset, buffer, bufferPos, copySize); 1101 offset += copySize; 1102 length -= copySize; 1103 // Flush the buffer, and get a new buffer at least big enough to cover 1104 // what we still need to output 1105 flushFullBuffer(length); 1106 System.arraycopy(b, offset, buffer, /* count= */ 0, length); 1107 bufferPos = length; 1108 } 1109 } 1110 1111 /** 1112 * Creates a byte string with the size and contents of this output stream. This does not create 1113 * a new copy of the underlying bytes. If the stream size grows dynamically, the runtime is 1114 * O(log n) in respect to the number of bytes written to the {@link Output}. If the stream size 1115 * stays within the initial capacity, the runtime is O(1). 1116 * 1117 * @return the current contents of this output stream, as a byte string. 1118 */ toByteString()1119 public synchronized ByteString toByteString() { 1120 flushLastBuffer(); 1121 return ByteString.copyFrom(flushedBuffers); 1122 } 1123 1124 /** 1125 * Writes the complete contents of this byte array output stream to the specified output stream 1126 * argument. 1127 * 1128 * @param out the output stream to which to write the data. 1129 * @throws IOException if an I/O error occurs. 1130 */ writeTo(OutputStream out)1131 public void writeTo(OutputStream out) throws IOException { 1132 ByteString[] cachedFlushBuffers; 1133 byte[] cachedBuffer; 1134 int cachedBufferPos; 1135 synchronized (this) { 1136 // Copy the information we need into local variables so as to hold 1137 // the lock for as short a time as possible. 1138 cachedFlushBuffers = flushedBuffers.toArray(new ByteString[0]); 1139 cachedBuffer = buffer; 1140 cachedBufferPos = bufferPos; 1141 } 1142 for (ByteString byteString : cachedFlushBuffers) { 1143 byteString.writeTo(out); 1144 } 1145 1146 out.write(Arrays.copyOf(cachedBuffer, cachedBufferPos)); 1147 } 1148 1149 /** 1150 * Returns the current size of the output stream. 1151 * 1152 * @return the current size of the output stream 1153 */ size()1154 public synchronized int size() { 1155 return flushedBuffersTotalBytes + bufferPos; 1156 } 1157 1158 /** 1159 * Resets this stream, so that all currently accumulated output in the output stream is 1160 * discarded. The output stream can be used again, reusing the already allocated buffer space. 1161 */ reset()1162 public synchronized void reset() { 1163 flushedBuffers.clear(); 1164 flushedBuffersTotalBytes = 0; 1165 bufferPos = 0; 1166 } 1167 1168 @Override toString()1169 public String toString() { 1170 return String.format( 1171 "<ByteString.Output@%s size=%d>", 1172 Integer.toHexString(System.identityHashCode(this)), size()); 1173 } 1174 1175 /** 1176 * Internal function used by writers. The current buffer is full, and the writer needs a new 1177 * buffer whose size is at least the specified minimum size. 1178 */ flushFullBuffer(int minSize)1179 private void flushFullBuffer(int minSize) { 1180 flushedBuffers.add(new LiteralByteString(buffer)); 1181 flushedBuffersTotalBytes += buffer.length; 1182 // We want to increase our total capacity by 50%, but as a minimum, 1183 // the new buffer should also at least be >= minSize and 1184 // >= initial Capacity. 1185 int newSize = Math.max(initialCapacity, Math.max(minSize, flushedBuffersTotalBytes >>> 1)); 1186 buffer = new byte[newSize]; 1187 bufferPos = 0; 1188 } 1189 1190 /** 1191 * Internal function used by {@link #toByteString()}. The current buffer may or may not be full, 1192 * but it needs to be flushed. 1193 */ flushLastBuffer()1194 private void flushLastBuffer() { 1195 if (bufferPos < buffer.length) { 1196 if (bufferPos > 0) { 1197 byte[] bufferCopy = Arrays.copyOf(buffer, bufferPos); 1198 flushedBuffers.add(new LiteralByteString(bufferCopy)); 1199 } 1200 // We reuse this buffer for further writes. 1201 } else { 1202 // Buffer is completely full. Huzzah. 1203 flushedBuffers.add(new LiteralByteString(buffer)); 1204 // 99% of the time, we're not going to use this OutputStream again. 1205 // We set buffer to an empty byte stream so that we're handling this 1206 // case without wasting space. In the rare case that more writes 1207 // *do* occur, this empty buffer will be flushed and an appropriately 1208 // sized new buffer will be created. 1209 buffer = EMPTY_BYTE_ARRAY; 1210 } 1211 flushedBuffersTotalBytes += bufferPos; 1212 bufferPos = 0; 1213 } 1214 } 1215 1216 /** 1217 * Constructs a new {@code ByteString} builder, which allows you to efficiently construct a {@code 1218 * ByteString} by writing to a {@link CodedOutputStream}. Using this is much more efficient than 1219 * calling {@code newOutput()} and wrapping that in a {@code CodedOutputStream}. 1220 * 1221 * <p>This is package-private because it's a somewhat confusing interface. Users can call {@link 1222 * Message#toByteString()} instead of calling this directly. 1223 * 1224 * @param size The target byte size of the {@code ByteString}. You must write exactly this many 1225 * bytes before building the result. 1226 * @return the builder 1227 */ newCodedBuilder(int size)1228 static CodedBuilder newCodedBuilder(int size) { 1229 return new CodedBuilder(size); 1230 } 1231 1232 /** See {@link ByteString#newCodedBuilder(int)}. */ 1233 static final class CodedBuilder { 1234 private final CodedOutputStream output; 1235 private final byte[] buffer; 1236 CodedBuilder(int size)1237 private CodedBuilder(int size) { 1238 buffer = new byte[size]; 1239 output = CodedOutputStream.newInstance(buffer); 1240 } 1241 build()1242 public ByteString build() { 1243 output.checkNoSpaceLeft(); 1244 1245 // We can be confident that the CodedOutputStream will not modify the 1246 // underlying bytes anymore because it already wrote all of them. So, 1247 // no need to make a copy. 1248 return new LiteralByteString(buffer); 1249 } 1250 getCodedOutput()1251 public CodedOutputStream getCodedOutput() { 1252 return output; 1253 } 1254 } 1255 1256 // ================================================================= 1257 // Methods {@link RopeByteString} needs on instances, which aren't part of the 1258 // public API. 1259 1260 /** 1261 * Return the depth of the tree representing this {@code ByteString}, if any, whose root is this 1262 * node. If this is a leaf node, return 0. 1263 * 1264 * @return tree depth or zero 1265 */ getTreeDepth()1266 protected abstract int getTreeDepth(); 1267 1268 /** 1269 * Return {@code true} if this ByteString is literal (a leaf node) or a flat-enough tree in the 1270 * sense of {@link RopeByteString}. 1271 * 1272 * @return true if the tree is flat enough 1273 */ isBalanced()1274 protected abstract boolean isBalanced(); 1275 1276 /** 1277 * Return the cached hash code if available. 1278 * 1279 * @return value of cached hash code or 0 if not computed yet 1280 */ peekCachedHashCode()1281 protected final int peekCachedHashCode() { 1282 return hash; 1283 } 1284 1285 /** 1286 * Compute the hash across the value bytes starting with the given hash, and return the result. 1287 * This is used to compute the hash across strings represented as a set of pieces by allowing the 1288 * hash computation to be continued from piece to piece. 1289 * 1290 * @param h starting hash value 1291 * @param offset offset into this value to start looking at data values 1292 * @param length number of data values to include in the hash computation 1293 * @return ending hash value 1294 */ partialHash(int h, int offset, int length)1295 protected abstract int partialHash(int h, int offset, int length); 1296 1297 /** 1298 * Checks that the given index falls within the specified array size. 1299 * 1300 * @param index the index position to be tested 1301 * @param size the length of the array 1302 * @throws IndexOutOfBoundsException if the index does not fall within the array. 1303 */ checkIndex(int index, int size)1304 static void checkIndex(int index, int size) { 1305 if ((index | (size - (index + 1))) < 0) { 1306 if (index < 0) { 1307 throw new ArrayIndexOutOfBoundsException("Index < 0: " + index); 1308 } 1309 throw new ArrayIndexOutOfBoundsException("Index > length: " + index + ", " + size); 1310 } 1311 } 1312 1313 /** 1314 * Checks that the given range falls within the bounds of an array 1315 * 1316 * @param startIndex the start index of the range (inclusive) 1317 * @param endIndex the end index of the range (exclusive) 1318 * @param size the size of the array. 1319 * @return the length of the range. 1320 * @throws IndexOutOfBoundsException some or all of the range falls outside of the array. 1321 */ 1322 @CanIgnoreReturnValue checkRange(int startIndex, int endIndex, int size)1323 static int checkRange(int startIndex, int endIndex, int size) { 1324 final int length = endIndex - startIndex; 1325 if ((startIndex | endIndex | length | (size - endIndex)) < 0) { 1326 if (startIndex < 0) { 1327 throw new IndexOutOfBoundsException("Beginning index: " + startIndex + " < 0"); 1328 } 1329 if (endIndex < startIndex) { 1330 throw new IndexOutOfBoundsException( 1331 "Beginning index larger than ending index: " + startIndex + ", " + endIndex); 1332 } 1333 // endIndex >= size 1334 throw new IndexOutOfBoundsException("End index: " + endIndex + " >= " + size); 1335 } 1336 return length; 1337 } 1338 1339 @Override toString()1340 public final String toString() { 1341 return String.format( 1342 Locale.ROOT, 1343 "<ByteString@%s size=%d contents=\"%s\">", 1344 toHexString(identityHashCode(this)), 1345 size(), 1346 truncateAndEscapeForDisplay()); 1347 } 1348 truncateAndEscapeForDisplay()1349 private String truncateAndEscapeForDisplay() { 1350 final int limit = 50; 1351 1352 return size() <= limit ? escapeBytes(this) : escapeBytes(substring(0, limit - 3)) + "..."; 1353 } 1354 1355 /** 1356 * This class implements a {@link com.google.protobuf.ByteString} backed by a single array of 1357 * bytes, contiguous in memory. It supports substring by pointing to only a sub-range of the 1358 * underlying byte array, meaning that a substring will reference the full byte-array of the 1359 * string it's made from, exactly as with {@link String}. 1360 * 1361 * @author carlanton@google.com (Carl Haverl) 1362 */ 1363 // Keep this class private to avoid deadlocks in classloading across threads as ByteString's 1364 // static initializer loads LiteralByteString and another thread loads LiteralByteString. 1365 private static class LiteralByteString extends ByteString.LeafByteString { 1366 private static final long serialVersionUID = 1L; 1367 1368 protected final byte[] bytes; 1369 1370 /** 1371 * Creates a {@code LiteralByteString} backed by the given array, without copying. 1372 * 1373 * @param bytes array to wrap 1374 */ LiteralByteString(byte[] bytes)1375 LiteralByteString(byte[] bytes) { 1376 if (bytes == null) { 1377 throw new NullPointerException(); 1378 } 1379 this.bytes = bytes; 1380 } 1381 1382 @Override byteAt(int index)1383 public byte byteAt(int index) { 1384 // Unlike most methods in this class, this one is a direct implementation 1385 // ignoring the potential offset because we need to do range-checking in the 1386 // substring case anyway. 1387 return bytes[index]; 1388 } 1389 1390 @Override internalByteAt(int index)1391 byte internalByteAt(int index) { 1392 return bytes[index]; 1393 } 1394 1395 @Override size()1396 public int size() { 1397 return bytes.length; 1398 } 1399 1400 // ================================================================= 1401 // ByteString -> substring 1402 1403 @Override substring(int beginIndex, int endIndex)1404 public final ByteString substring(int beginIndex, int endIndex) { 1405 final int length = checkRange(beginIndex, endIndex, size()); 1406 1407 if (length == 0) { 1408 return ByteString.EMPTY; 1409 } 1410 1411 return new BoundedByteString(bytes, getOffsetIntoBytes() + beginIndex, length); 1412 } 1413 1414 // ================================================================= 1415 // ByteString -> byte[] 1416 1417 @Override copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)1418 protected void copyToInternal( 1419 byte[] target, int sourceOffset, int targetOffset, int numberToCopy) { 1420 // Optimized form, not for subclasses, since we don't call 1421 // getOffsetIntoBytes() or check the 'numberToCopy' parameter. 1422 // TODO: Is not calling getOffsetIntoBytes really saving that much? 1423 System.arraycopy(bytes, sourceOffset, target, targetOffset, numberToCopy); 1424 } 1425 1426 @Override copyTo(ByteBuffer target)1427 public final void copyTo(ByteBuffer target) { 1428 target.put(bytes, getOffsetIntoBytes(), size()); // Copies bytes 1429 } 1430 1431 @Override asReadOnlyByteBuffer()1432 public final ByteBuffer asReadOnlyByteBuffer() { 1433 return ByteBuffer.wrap(bytes, getOffsetIntoBytes(), size()).asReadOnlyBuffer(); 1434 } 1435 1436 @Override asReadOnlyByteBufferList()1437 public final List<ByteBuffer> asReadOnlyByteBufferList() { 1438 return Collections.singletonList(asReadOnlyByteBuffer()); 1439 } 1440 1441 @Override writeTo(OutputStream outputStream)1442 public final void writeTo(OutputStream outputStream) throws IOException { 1443 outputStream.write(toByteArray()); 1444 } 1445 1446 @Override writeToInternal(OutputStream outputStream, int sourceOffset, int numberToWrite)1447 final void writeToInternal(OutputStream outputStream, int sourceOffset, int numberToWrite) 1448 throws IOException { 1449 outputStream.write(bytes, getOffsetIntoBytes() + sourceOffset, numberToWrite); 1450 } 1451 1452 @Override writeTo(ByteOutput output)1453 final void writeTo(ByteOutput output) throws IOException { 1454 output.writeLazy(bytes, getOffsetIntoBytes(), size()); 1455 } 1456 1457 @Override toStringInternal(Charset charset)1458 protected final String toStringInternal(Charset charset) { 1459 return new String(bytes, getOffsetIntoBytes(), size(), charset); 1460 } 1461 1462 // ================================================================= 1463 // UTF-8 decoding 1464 1465 @Override isValidUtf8()1466 public final boolean isValidUtf8() { 1467 int offset = getOffsetIntoBytes(); 1468 return Utf8.isValidUtf8(bytes, offset, offset + size()); 1469 } 1470 1471 @Override partialIsValidUtf8(int state, int offset, int length)1472 protected final int partialIsValidUtf8(int state, int offset, int length) { 1473 int index = getOffsetIntoBytes() + offset; 1474 return Utf8.partialIsValidUtf8(state, bytes, index, index + length); 1475 } 1476 1477 // ================================================================= 1478 // equals() and hashCode() 1479 1480 @Override equals(Object other)1481 public final boolean equals(Object other) { 1482 if (other == this) { 1483 return true; 1484 } 1485 if (!(other instanceof ByteString)) { 1486 return false; 1487 } 1488 1489 if (size() != ((ByteString) other).size()) { 1490 return false; 1491 } 1492 if (size() == 0) { 1493 return true; 1494 } 1495 1496 if (other instanceof LiteralByteString) { 1497 LiteralByteString otherAsLiteral = (LiteralByteString) other; 1498 // If we know the hash codes and they are not equal, we know the byte 1499 // strings are not equal. 1500 int thisHash = peekCachedHashCode(); 1501 int thatHash = otherAsLiteral.peekCachedHashCode(); 1502 if (thisHash != 0 && thatHash != 0 && thisHash != thatHash) { 1503 return false; 1504 } 1505 1506 return equalsRange((LiteralByteString) other, 0, size()); 1507 } else { 1508 // RopeByteString and NioByteString. 1509 return other.equals(this); 1510 } 1511 } 1512 1513 /** 1514 * Check equality of the substring of given length of this object starting at zero with another 1515 * {@code LiteralByteString} substring starting at offset. 1516 * 1517 * @param other what to compare a substring in 1518 * @param offset offset into other 1519 * @param length number of bytes to compare 1520 * @return true for equality of substrings, else false. 1521 */ 1522 @Override equalsRange(ByteString other, int offset, int length)1523 final boolean equalsRange(ByteString other, int offset, int length) { 1524 if (length > other.size()) { 1525 throw new IllegalArgumentException("Length too large: " + length + size()); 1526 } 1527 if (offset + length > other.size()) { 1528 throw new IllegalArgumentException( 1529 "Ran off end of other: " + offset + ", " + length + ", " + other.size()); 1530 } 1531 1532 if (other instanceof LiteralByteString) { 1533 LiteralByteString lbsOther = (LiteralByteString) other; 1534 byte[] thisBytes = bytes; 1535 byte[] otherBytes = lbsOther.bytes; 1536 int thisLimit = getOffsetIntoBytes() + length; 1537 for (int thisIndex = getOffsetIntoBytes(), 1538 otherIndex = lbsOther.getOffsetIntoBytes() + offset; 1539 (thisIndex < thisLimit); 1540 ++thisIndex, ++otherIndex) { 1541 if (thisBytes[thisIndex] != otherBytes[otherIndex]) { 1542 return false; 1543 } 1544 } 1545 return true; 1546 } 1547 1548 return other.substring(offset, offset + length).equals(substring(0, length)); 1549 } 1550 1551 @Override partialHash(int h, int offset, int length)1552 protected final int partialHash(int h, int offset, int length) { 1553 return Internal.partialHash(h, bytes, getOffsetIntoBytes() + offset, length); 1554 } 1555 1556 // ================================================================= 1557 // Input stream 1558 1559 @Override newInput()1560 public final InputStream newInput() { 1561 return new ByteArrayInputStream(bytes, getOffsetIntoBytes(), size()); // No copy 1562 } 1563 1564 @Override newCodedInput()1565 public final CodedInputStream newCodedInput() { 1566 // We trust CodedInputStream not to modify the bytes, or to give anyone 1567 // else access to them. 1568 return CodedInputStream.newInstance( 1569 bytes, getOffsetIntoBytes(), size(), /* bufferIsImmutable= */ true); 1570 } 1571 1572 // ================================================================= 1573 // Internal methods 1574 1575 /** 1576 * Offset into {@code bytes[]} to use, non-zero for substrings. 1577 * 1578 * @return always 0 for this class 1579 */ getOffsetIntoBytes()1580 protected int getOffsetIntoBytes() { 1581 return 0; 1582 } 1583 } 1584 1585 /** 1586 * This class is used to represent the substring of a {@link ByteString} over a single byte array. 1587 * In terms of the public API of {@link ByteString}, you end up here by calling {@link 1588 * ByteString#copyFrom(byte[])} followed by {@link ByteString#substring(int, int)}. 1589 * 1590 * <p>This class contains most of the overhead involved in creating a substring from a {@link 1591 * LiteralByteString}. The overhead involves some range-checking and two extra fields. 1592 * 1593 * @author carlanton@google.com (Carl Haverl) 1594 */ 1595 // Keep this class private to avoid deadlocks in classloading across threads as ByteString's 1596 // static initializer loads LiteralByteString and another thread loads BoundedByteString. 1597 private static final class BoundedByteString extends LiteralByteString { 1598 private final int bytesOffset; 1599 private final int bytesLength; 1600 1601 /** 1602 * Creates a {@code BoundedByteString} backed by the sub-range of given array, without copying. 1603 * 1604 * @param bytes array to wrap 1605 * @param offset index to first byte to use in bytes 1606 * @param length number of bytes to use from bytes 1607 * @throws IllegalArgumentException if {@code offset < 0}, {@code length < 0}, or if {@code 1608 * offset + length > bytes.length}. 1609 */ BoundedByteString(byte[] bytes, int offset, int length)1610 BoundedByteString(byte[] bytes, int offset, int length) { 1611 super(bytes); 1612 checkRange(offset, offset + length, bytes.length); 1613 1614 this.bytesOffset = offset; 1615 this.bytesLength = length; 1616 } 1617 1618 /** 1619 * Gets the byte at the given index. Throws {@link ArrayIndexOutOfBoundsException} for 1620 * backwards-compatibility reasons although it would more properly be {@link 1621 * IndexOutOfBoundsException}. 1622 * 1623 * @param index index of byte 1624 * @return the value 1625 * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size 1626 */ 1627 @Override byteAt(int index)1628 public byte byteAt(int index) { 1629 // We must check the index ourselves as we cannot rely on Java array index 1630 // checking for substrings. 1631 checkIndex(index, size()); 1632 return bytes[bytesOffset + index]; 1633 } 1634 1635 @Override internalByteAt(int index)1636 byte internalByteAt(int index) { 1637 return bytes[bytesOffset + index]; 1638 } 1639 1640 @Override size()1641 public int size() { 1642 return bytesLength; 1643 } 1644 1645 @Override getOffsetIntoBytes()1646 protected int getOffsetIntoBytes() { 1647 return bytesOffset; 1648 } 1649 1650 // ================================================================= 1651 // ByteString -> byte[] 1652 1653 @Override copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)1654 protected void copyToInternal( 1655 byte[] target, int sourceOffset, int targetOffset, int numberToCopy) { 1656 System.arraycopy( 1657 bytes, getOffsetIntoBytes() + sourceOffset, target, targetOffset, numberToCopy); 1658 } 1659 1660 // ================================================================= 1661 // Serializable 1662 1663 private static final long serialVersionUID = 1L; 1664 writeReplace()1665 Object writeReplace() { 1666 return ByteString.wrap(toByteArray()); 1667 } 1668 readObject(@uppressWarnings"unused") ObjectInputStream in)1669 private void readObject(@SuppressWarnings("unused") ObjectInputStream in) throws IOException { 1670 throw new InvalidObjectException( 1671 "BoundedByteStream instances are not to be serialized directly"); 1672 } 1673 } 1674 1675 /** A {@link ByteString} that wraps around a {@link ByteBuffer}. */ 1676 // Keep this class private to avoid deadlocks in classloading across threads as ByteString's 1677 // static initializer loads LiteralByteString and another thread loads BoundedByteString. 1678 private static final class NioByteString extends ByteString.LeafByteString { 1679 private final ByteBuffer buffer; 1680 NioByteString(ByteBuffer buffer)1681 NioByteString(ByteBuffer buffer) { 1682 checkNotNull(buffer, "buffer"); 1683 1684 // Use native byte order for fast fixed32/64 operations. 1685 this.buffer = buffer.slice().order(ByteOrder.nativeOrder()); 1686 } 1687 1688 // ================================================================= 1689 // Serializable 1690 1691 /** Magic method that lets us override serialization behavior. */ writeReplace()1692 private Object writeReplace() { 1693 return ByteString.copyFrom(buffer.slice()); 1694 } 1695 1696 /** Magic method that lets us override deserialization behavior. */ readObject(@uppressWarnings"unused") ObjectInputStream in)1697 private void readObject(@SuppressWarnings("unused") ObjectInputStream in) throws IOException { 1698 throw new InvalidObjectException("NioByteString instances are not to be serialized directly"); 1699 } 1700 1701 // ================================================================= 1702 1703 @Override byteAt(int index)1704 public byte byteAt(int index) { 1705 try { 1706 return buffer.get(index); 1707 } catch (ArrayIndexOutOfBoundsException e) { 1708 throw e; 1709 } catch (IndexOutOfBoundsException e) { 1710 throw new ArrayIndexOutOfBoundsException(e.getMessage()); 1711 } 1712 } 1713 1714 @Override internalByteAt(int index)1715 public byte internalByteAt(int index) { 1716 // it isn't possible to avoid the bounds checking inside of ByteBuffer, so just use the 1717 // default 1718 // implementation. 1719 return byteAt(index); 1720 } 1721 1722 @Override size()1723 public int size() { 1724 return buffer.remaining(); 1725 } 1726 1727 @Override substring(int beginIndex, int endIndex)1728 public ByteString substring(int beginIndex, int endIndex) { 1729 try { 1730 ByteBuffer slice = slice(beginIndex, endIndex); 1731 return new NioByteString(slice); 1732 } catch (ArrayIndexOutOfBoundsException e) { 1733 throw e; 1734 } catch (IndexOutOfBoundsException e) { 1735 throw new ArrayIndexOutOfBoundsException(e.getMessage()); 1736 } 1737 } 1738 1739 @Override copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)1740 protected void copyToInternal( 1741 byte[] target, int sourceOffset, int targetOffset, int numberToCopy) { 1742 ByteBuffer slice = buffer.slice(); 1743 Java8Compatibility.position(slice, sourceOffset); 1744 slice.get(target, targetOffset, numberToCopy); 1745 } 1746 1747 @Override copyTo(ByteBuffer target)1748 public void copyTo(ByteBuffer target) { 1749 target.put(buffer.slice()); 1750 } 1751 1752 @Override writeTo(OutputStream out)1753 public void writeTo(OutputStream out) throws IOException { 1754 out.write(toByteArray()); 1755 } 1756 1757 @Override equalsRange(ByteString other, int offset, int length)1758 boolean equalsRange(ByteString other, int offset, int length) { 1759 return substring(0, length).equals(other.substring(offset, offset + length)); 1760 } 1761 1762 @Override writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)1763 void writeToInternal(OutputStream out, int sourceOffset, int numberToWrite) throws IOException { 1764 if (buffer.hasArray()) { 1765 // Optimized write for array-backed buffers. 1766 // Note that we're taking the risk that a malicious OutputStream could modify the array. 1767 int bufferOffset = buffer.arrayOffset() + buffer.position() + sourceOffset; 1768 out.write(buffer.array(), bufferOffset, numberToWrite); 1769 return; 1770 } 1771 1772 ByteBufferWriter.write(slice(sourceOffset, sourceOffset + numberToWrite), out); 1773 } 1774 1775 @Override writeTo(ByteOutput output)1776 void writeTo(ByteOutput output) throws IOException { 1777 output.writeLazy(buffer.slice()); 1778 } 1779 1780 @Override asReadOnlyByteBuffer()1781 public ByteBuffer asReadOnlyByteBuffer() { 1782 return buffer.asReadOnlyBuffer(); 1783 } 1784 1785 @Override asReadOnlyByteBufferList()1786 public List<ByteBuffer> asReadOnlyByteBufferList() { 1787 return Collections.singletonList(asReadOnlyByteBuffer()); 1788 } 1789 1790 @Override toStringInternal(Charset charset)1791 protected String toStringInternal(Charset charset) { 1792 final byte[] bytes; 1793 final int offset; 1794 final int length; 1795 if (buffer.hasArray()) { 1796 bytes = buffer.array(); 1797 offset = buffer.arrayOffset() + buffer.position(); 1798 length = buffer.remaining(); 1799 } else { 1800 // TODO: Can we optimize this? 1801 bytes = toByteArray(); 1802 offset = 0; 1803 length = bytes.length; 1804 } 1805 return new String(bytes, offset, length, charset); 1806 } 1807 1808 @Override isValidUtf8()1809 public boolean isValidUtf8() { 1810 return Utf8.isValidUtf8(buffer); 1811 } 1812 1813 @Override partialIsValidUtf8(int state, int offset, int length)1814 protected int partialIsValidUtf8(int state, int offset, int length) { 1815 return Utf8.partialIsValidUtf8(state, buffer, offset, offset + length); 1816 } 1817 1818 @Override equals(Object other)1819 public boolean equals(Object other) { 1820 if (other == this) { 1821 return true; 1822 } 1823 if (!(other instanceof ByteString)) { 1824 return false; 1825 } 1826 ByteString otherString = ((ByteString) other); 1827 if (size() != otherString.size()) { 1828 return false; 1829 } 1830 if (size() == 0) { 1831 return true; 1832 } 1833 if (other instanceof NioByteString) { 1834 return buffer.equals(((NioByteString) other).buffer); 1835 } 1836 if (other instanceof RopeByteString) { 1837 return other.equals(this); 1838 } 1839 return buffer.equals(otherString.asReadOnlyByteBuffer()); 1840 } 1841 1842 @Override partialHash(int h, int offset, int length)1843 protected int partialHash(int h, int offset, int length) { 1844 for (int i = offset; i < offset + length; i++) { 1845 h = h * 31 + buffer.get(i); 1846 } 1847 return h; 1848 } 1849 1850 @Override newInput()1851 public InputStream newInput() { 1852 return new InputStream() { 1853 private final ByteBuffer buf = buffer.slice(); 1854 1855 @Override 1856 public void mark(int readlimit) { 1857 Java8Compatibility.mark(buf); 1858 } 1859 1860 @Override 1861 public boolean markSupported() { 1862 return true; 1863 } 1864 1865 @Override 1866 public void reset() throws IOException { 1867 try { 1868 Java8Compatibility.reset(buf); 1869 } catch (InvalidMarkException e) { 1870 throw new IOException(e); 1871 } 1872 } 1873 1874 @Override 1875 public int available() throws IOException { 1876 return buf.remaining(); 1877 } 1878 1879 @Override 1880 public int read() throws IOException { 1881 if (!buf.hasRemaining()) { 1882 return -1; 1883 } 1884 return buf.get() & 0xFF; 1885 } 1886 1887 @Override 1888 public int read(byte[] bytes, int off, int len) throws IOException { 1889 if (!buf.hasRemaining()) { 1890 return -1; 1891 } 1892 1893 len = Math.min(len, buf.remaining()); 1894 buf.get(bytes, off, len); 1895 return len; 1896 } 1897 }; 1898 } 1899 1900 @Override newCodedInput()1901 public CodedInputStream newCodedInput() { 1902 return CodedInputStream.newInstance(buffer, true); 1903 } 1904 1905 /** 1906 * Creates a slice of a range of this buffer. 1907 * 1908 * @param beginIndex the beginning index of the slice (inclusive). 1909 * @param endIndex the end index of the slice (exclusive). 1910 * @return the requested slice. 1911 */ slice(int beginIndex, int endIndex)1912 private ByteBuffer slice(int beginIndex, int endIndex) { 1913 if (beginIndex < buffer.position() || endIndex > buffer.limit() || beginIndex > endIndex) { 1914 throw new IllegalArgumentException( 1915 String.format("Invalid indices [%d, %d]", beginIndex, endIndex)); 1916 } 1917 1918 ByteBuffer slice = buffer.slice(); 1919 Java8Compatibility.position(slice, beginIndex - buffer.position()); 1920 Java8Compatibility.limit(slice, endIndex - buffer.position()); 1921 return slice; 1922 } 1923 } 1924 } 1925