1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import static com.google.protobuf.TextFormatEscaper.escapeBytes; 34 import static java.lang.Integer.toHexString; 35 import static java.lang.System.identityHashCode; 36 37 import java.io.ByteArrayInputStream; 38 import java.io.ByteArrayOutputStream; 39 import java.io.IOException; 40 import java.io.InputStream; 41 import java.io.InvalidObjectException; 42 import java.io.ObjectInputStream; 43 import java.io.OutputStream; 44 import java.io.Serializable; 45 import java.io.UnsupportedEncodingException; 46 import java.nio.ByteBuffer; 47 import java.nio.charset.Charset; 48 import java.nio.charset.UnsupportedCharsetException; 49 import java.util.ArrayList; 50 import java.util.Arrays; 51 import java.util.Collection; 52 import java.util.Collections; 53 import java.util.Comparator; 54 import java.util.Iterator; 55 import java.util.List; 56 import java.util.Locale; 57 import java.util.NoSuchElementException; 58 59 /** 60 * Immutable sequence of bytes. Provides conversions to and from {@code byte[]}, {@link 61 * java.lang.String}, {@link ByteBuffer}, {@link InputStream}, {@link OutputStream}. Also provides a 62 * conversion to {@link CodedInputStream}. 63 * 64 * <p>Like {@link String}, the contents of a {@link ByteString} can never be observed to change, not 65 * even in the presence of a data race or incorrect API usage in the client code. 66 * 67 * <p>Substring is supported by sharing the reference to the immutable underlying bytes. 68 * Concatenation is likewise supported without copying (long strings) by building a tree of pieces 69 * in {@link RopeByteString}. 70 * 71 * @author crazybob@google.com Bob Lee 72 * @author kenton@google.com Kenton Varda 73 * @author carlanton@google.com Carl Haverl 74 * @author martinrb@google.com Martin Buchholz 75 */ 76 public abstract class ByteString implements Iterable<Byte>, Serializable { 77 78 /** 79 * When two strings to be concatenated have a combined length shorter than this, we just copy 80 * their bytes on {@link #concat(ByteString)}. The trade-off is copy size versus the overhead of 81 * creating tree nodes in {@link RopeByteString}. 82 */ 83 static final int CONCATENATE_BY_COPY_SIZE = 128; 84 85 /** 86 * When copying an InputStream into a ByteString with .readFrom(), the chunks in the underlying 87 * rope start at 256 bytes, but double each iteration up to 8192 bytes. 88 */ 89 static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b 90 91 static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k 92 93 /** Empty {@code ByteString}. */ 94 public static final ByteString EMPTY = new LiteralByteString(Internal.EMPTY_BYTE_ARRAY); 95 96 /** 97 * An interface to efficiently copy {@code byte[]}. 98 * 99 * <p>One of the noticeable costs of copying a byte[] into a new array using {@code 100 * System.arraycopy} is nullification of a new buffer before the copy. It has been shown the 101 * Hotspot VM is capable to intrisicfy {@code Arrays.copyOfRange} operation to avoid this 102 * expensive nullification and provide substantial performance gain. Unfortunately this does not 103 * hold on Android runtimes and could make the copy slightly slower due to additional code in the 104 * {@code Arrays.copyOfRange}. Thus we provide two different implementation for array copier for 105 * Hotspot and Android runtimes. 106 */ 107 private interface ByteArrayCopier { 108 /** Copies the specified range of the specified array into a new array */ copyFrom(byte[] bytes, int offset, int size)109 byte[] copyFrom(byte[] bytes, int offset, int size); 110 } 111 112 /** Implementation of {@code ByteArrayCopier} which uses {@link System#arraycopy}. */ 113 private static final class SystemByteArrayCopier implements ByteArrayCopier { 114 @Override copyFrom(byte[] bytes, int offset, int size)115 public byte[] copyFrom(byte[] bytes, int offset, int size) { 116 byte[] copy = new byte[size]; 117 System.arraycopy(bytes, offset, copy, 0, size); 118 return copy; 119 } 120 } 121 122 /** Implementation of {@code ByteArrayCopier} which uses {@link Arrays#copyOfRange}. */ 123 private static final class ArraysByteArrayCopier implements ByteArrayCopier { 124 @Override copyFrom(byte[] bytes, int offset, int size)125 public byte[] copyFrom(byte[] bytes, int offset, int size) { 126 return Arrays.copyOfRange(bytes, offset, offset + size); 127 } 128 } 129 130 private static final ByteArrayCopier byteArrayCopier; 131 132 static { 133 byteArrayCopier = 134 Android.isOnAndroidDevice() ? new SystemByteArrayCopier() : new ArraysByteArrayCopier(); 135 } 136 137 /** 138 * Cached hash value. Intentionally accessed via a data race, which is safe because of the Java 139 * Memory Model's "no out-of-thin-air values" guarantees for ints. A value of 0 implies that the 140 * hash has not been set. 141 */ 142 private int hash = 0; 143 144 // This constructor is here to prevent subclassing outside of this package, ByteString()145 ByteString() {} 146 147 /** 148 * Gets the byte at the given index. This method should be used only for random access to 149 * individual bytes. To access bytes sequentially, use the {@link ByteIterator} returned by {@link 150 * #iterator()}, and call {@link #substring(int, int)} first if necessary. 151 * 152 * @param index index of byte 153 * @return the value 154 * @throws IndexOutOfBoundsException {@code index < 0 or index >= size} 155 */ byteAt(int index)156 public abstract byte byteAt(int index); 157 158 /** 159 * Gets the byte at the given index, assumes bounds checking has already been performed. 160 * 161 * @param index index of byte 162 * @return the value 163 * @throws IndexOutOfBoundsException {@code index < 0 or index >= size} 164 */ internalByteAt(int index)165 abstract byte internalByteAt(int index); 166 167 /** 168 * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString. To avoid 169 * auto-boxing, you may get the iterator manually and call {@link ByteIterator#nextByte()}. 170 * 171 * @return the iterator 172 */ 173 @Override iterator()174 public ByteIterator iterator() { 175 return new AbstractByteIterator() { 176 private int position = 0; 177 private final int limit = size(); 178 179 @Override 180 public boolean hasNext() { 181 return position < limit; 182 } 183 184 @Override 185 public byte nextByte() { 186 int currentPos = position; 187 if (currentPos >= limit) { 188 throw new NoSuchElementException(); 189 } 190 position = currentPos + 1; 191 return internalByteAt(currentPos); 192 } 193 }; 194 } 195 196 /** 197 * This interface extends {@code Iterator<Byte>}, so that we can return an unboxed {@code byte}. 198 */ 199 public interface ByteIterator extends Iterator<Byte> { 200 /** 201 * An alternative to {@link Iterator#next()} that returns an unboxed primitive {@code byte}. 202 * 203 * @return the next {@code byte} in the iteration 204 * @throws NoSuchElementException if the iteration has no more elements 205 */ nextByte()206 byte nextByte(); 207 } 208 209 abstract static class AbstractByteIterator implements ByteIterator { 210 @Override next()211 public final Byte next() { 212 // Boxing calls Byte.valueOf(byte), which does not instantiate. 213 return nextByte(); 214 } 215 216 @Override remove()217 public final void remove() { 218 throw new UnsupportedOperationException(); 219 } 220 } 221 222 /** 223 * Gets the number of bytes. 224 * 225 * @return size in bytes 226 */ size()227 public abstract int size(); 228 229 /** 230 * Returns {@code true} if the size is {@code 0}, {@code false} otherwise. 231 * 232 * @return true if this is zero bytes long 233 */ isEmpty()234 public final boolean isEmpty() { 235 return size() == 0; 236 } 237 238 // ================================================================= 239 // Comparison 240 241 private static final int UNSIGNED_BYTE_MASK = 0xFF; 242 243 /** 244 * Returns the value of the given byte as an integer, interpreting the byte as an unsigned value. 245 * That is, returns {@code value + 256} if {@code value} is negative; {@code value} itself 246 * otherwise. 247 * 248 * <p>Note: This code was copied from {@link com.google.common.primitives.UnsignedBytes#toInt}, as 249 * Guava libraries cannot be used in the {@code com.google.protobuf} package. 250 */ toInt(byte value)251 private static int toInt(byte value) { 252 return value & UNSIGNED_BYTE_MASK; 253 } 254 255 /** 256 * Compares two {@link ByteString}s lexicographically, treating their contents as unsigned byte 257 * values between 0 and 255 (inclusive). 258 * 259 * <p>For example, {@code (byte) -1} is considered to be greater than {@code (byte) 1} because it 260 * is interpreted as an unsigned value, {@code 255}. 261 */ 262 private static final Comparator<ByteString> UNSIGNED_LEXICOGRAPHICAL_COMPARATOR = 263 new Comparator<ByteString>() { 264 @Override 265 public int compare(ByteString former, ByteString latter) { 266 ByteIterator formerBytes = former.iterator(); 267 ByteIterator latterBytes = latter.iterator(); 268 269 while (formerBytes.hasNext() && latterBytes.hasNext()) { 270 // Note: This code was copied from com.google.common.primitives.UnsignedBytes#compare, 271 // as Guava libraries cannot be used in the {@code com.google.protobuf} package. 272 int result = 273 Integer.compare(toInt(formerBytes.nextByte()), toInt(latterBytes.nextByte())); 274 if (result != 0) { 275 return result; 276 } 277 } 278 279 return Integer.compare(former.size(), latter.size()); 280 } 281 }; 282 283 /** 284 * Returns a {@link Comparator} which compares {@link ByteString}-s lexicographically 285 * as sequences of unsigned bytes (i.e. values between 0 and 255, inclusive). 286 * 287 * <p>For example, {@code (byte) -1} is considered to be greater than {@code (byte) 1} because it 288 * is interpreted as an unsigned value, {@code 255}: 289 * 290 * <ul> 291 * <li>{@code `-1` -> 0b11111111 (two's complement) -> 255} 292 * <li>{@code `1` -> 0b00000001 -> 1} 293 * </ul> 294 */ unsignedLexicographicalComparator()295 public static Comparator<ByteString> unsignedLexicographicalComparator() { 296 return UNSIGNED_LEXICOGRAPHICAL_COMPARATOR; 297 } 298 299 // ================================================================= 300 // ByteString -> substring 301 302 /** 303 * Return the substring from {@code beginIndex}, inclusive, to the end of the string. 304 * 305 * @param beginIndex start at this index 306 * @return substring sharing underlying data 307 * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or {@code beginIndex > size()}. 308 */ substring(int beginIndex)309 public final ByteString substring(int beginIndex) { 310 return substring(beginIndex, size()); 311 } 312 313 /** 314 * Return the substring from {@code beginIndex}, inclusive, to {@code endIndex}, exclusive. 315 * 316 * @param beginIndex start at this index 317 * @param endIndex the last character is the one before this index 318 * @return substring sharing underlying data 319 * @throws IndexOutOfBoundsException if {@code beginIndex < 0}, {@code endIndex > size()}, or 320 * {@code beginIndex > endIndex}. 321 */ substring(int beginIndex, int endIndex)322 public abstract ByteString substring(int beginIndex, int endIndex); 323 324 /** 325 * Tests if this bytestring starts with the specified prefix. Similar to {@link 326 * String#startsWith(String)} 327 * 328 * @param prefix the prefix. 329 * @return <code>true</code> if the byte sequence represented by the argument is a prefix of the 330 * byte sequence represented by this string; <code>false</code> otherwise. 331 */ startsWith(ByteString prefix)332 public final boolean startsWith(ByteString prefix) { 333 return size() >= prefix.size() && substring(0, prefix.size()).equals(prefix); 334 } 335 336 /** 337 * Tests if this bytestring ends with the specified suffix. Similar to {@link 338 * String#endsWith(String)} 339 * 340 * @param suffix the suffix. 341 * @return <code>true</code> if the byte sequence represented by the argument is a suffix of the 342 * byte sequence represented by this string; <code>false</code> otherwise. 343 */ endsWith(ByteString suffix)344 public final boolean endsWith(ByteString suffix) { 345 return size() >= suffix.size() && substring(size() - suffix.size()).equals(suffix); 346 } 347 348 // ================================================================= 349 // byte[] -> ByteString 350 351 /** 352 * Copies the given bytes into a {@code ByteString}. 353 * 354 * @param bytes source array 355 * @param offset offset in source array 356 * @param size number of bytes to copy 357 * @return new {@code ByteString} 358 * @throws IndexOutOfBoundsException if {@code offset} or {@code size} are out of bounds 359 */ copyFrom(byte[] bytes, int offset, int size)360 public static ByteString copyFrom(byte[] bytes, int offset, int size) { 361 checkRange(offset, offset + size, bytes.length); 362 return new LiteralByteString(byteArrayCopier.copyFrom(bytes, offset, size)); 363 } 364 365 /** 366 * Copies the given bytes into a {@code ByteString}. 367 * 368 * @param bytes to copy 369 * @return new {@code ByteString} 370 */ copyFrom(byte[] bytes)371 public static ByteString copyFrom(byte[] bytes) { 372 return copyFrom(bytes, 0, bytes.length); 373 } 374 375 /** Wraps the given bytes into a {@code ByteString}. Intended for internal only usage. */ wrap(ByteBuffer buffer)376 static ByteString wrap(ByteBuffer buffer) { 377 if (buffer.hasArray()) { 378 final int offset = buffer.arrayOffset(); 379 return ByteString.wrap(buffer.array(), offset + buffer.position(), buffer.remaining()); 380 } else { 381 return new NioByteString(buffer); 382 } 383 } 384 385 /** 386 * Wraps the given bytes into a {@code ByteString}. Intended for internal only usage to force a 387 * classload of ByteString before LiteralByteString. 388 */ wrap(byte[] bytes)389 static ByteString wrap(byte[] bytes) { 390 // TODO(dweis): Return EMPTY when bytes are empty to reduce allocations? 391 return new LiteralByteString(bytes); 392 } 393 394 /** 395 * Wraps the given bytes into a {@code ByteString}. Intended for internal only usage to force a 396 * classload of ByteString before BoundedByteString and LiteralByteString. 397 */ wrap(byte[] bytes, int offset, int length)398 static ByteString wrap(byte[] bytes, int offset, int length) { 399 return new BoundedByteString(bytes, offset, length); 400 } 401 402 /** 403 * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into a {@code 404 * ByteString}. 405 * 406 * @param bytes source buffer 407 * @param size number of bytes to copy 408 * @return new {@code ByteString} 409 * @throws IndexOutOfBoundsException if {@code size > bytes.remaining()} 410 */ copyFrom(ByteBuffer bytes, int size)411 public static ByteString copyFrom(ByteBuffer bytes, int size) { 412 checkRange(0, size, bytes.remaining()); 413 byte[] copy = new byte[size]; 414 bytes.get(copy); 415 return new LiteralByteString(copy); 416 } 417 418 /** 419 * Copies the remaining bytes from a {@code java.nio.ByteBuffer} into a {@code ByteString}. 420 * 421 * @param bytes sourceBuffer 422 * @return new {@code ByteString} 423 */ copyFrom(ByteBuffer bytes)424 public static ByteString copyFrom(ByteBuffer bytes) { 425 return copyFrom(bytes, bytes.remaining()); 426 } 427 428 /** 429 * Encodes {@code text} into a sequence of bytes using the named charset and returns the result as 430 * a {@code ByteString}. 431 * 432 * @param text source string 433 * @param charsetName encoding to use 434 * @return new {@code ByteString} 435 * @throws UnsupportedEncodingException if the encoding isn't found 436 */ copyFrom(String text, String charsetName)437 public static ByteString copyFrom(String text, String charsetName) 438 throws UnsupportedEncodingException { 439 return new LiteralByteString(text.getBytes(charsetName)); 440 } 441 442 /** 443 * Encodes {@code text} into a sequence of bytes using the named charset and returns the result as 444 * a {@code ByteString}. 445 * 446 * @param text source string 447 * @param charset encode using this charset 448 * @return new {@code ByteString} 449 */ copyFrom(String text, Charset charset)450 public static ByteString copyFrom(String text, Charset charset) { 451 return new LiteralByteString(text.getBytes(charset)); 452 } 453 454 /** 455 * Encodes {@code text} into a sequence of UTF-8 bytes and returns the result as a {@code 456 * ByteString}. 457 * 458 * @param text source string 459 * @return new {@code ByteString} 460 */ copyFromUtf8(String text)461 public static ByteString copyFromUtf8(String text) { 462 return new LiteralByteString(text.getBytes(Internal.UTF_8)); 463 } 464 465 // ================================================================= 466 // InputStream -> ByteString 467 468 /** 469 * Completely reads the given stream's bytes into a {@code ByteString}, blocking if necessary 470 * until all bytes are read through to the end of the stream. 471 * 472 * <p><b>Performance notes:</b> The returned {@code ByteString} is an immutable tree of byte 473 * arrays ("chunks") of the stream data. The first chunk is small, with subsequent chunks each 474 * being double the size, up to 8K. 475 * 476 * <p>Each byte read from the input stream will be copied twice to ensure that the resulting 477 * ByteString is truly immutable. 478 * 479 * @param streamToDrain The source stream, which is read completely but not closed. 480 * @return A new {@code ByteString} which is made up of chunks of various sizes, depending on the 481 * behavior of the underlying stream. 482 * @throws IOException IOException is thrown if there is a problem reading the underlying stream. 483 */ readFrom(InputStream streamToDrain)484 public static ByteString readFrom(InputStream streamToDrain) throws IOException { 485 return readFrom(streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE); 486 } 487 488 /** 489 * Completely reads the given stream's bytes into a {@code ByteString}, blocking if necessary 490 * until all bytes are read through to the end of the stream. 491 * 492 * <p><b>Performance notes:</b> The returned {@code ByteString} is an immutable tree of byte 493 * arrays ("chunks") of the stream data. The chunkSize parameter sets the size of these byte 494 * arrays. 495 * 496 * <p>Each byte read from the input stream will be copied twice to ensure that the resulting 497 * ByteString is truly immutable. 498 * 499 * @param streamToDrain The source stream, which is read completely but not closed. 500 * @param chunkSize The size of the chunks in which to read the stream. 501 * @return A new {@code ByteString} which is made up of chunks of the given size. 502 * @throws IOException IOException is thrown if there is a problem reading the underlying stream. 503 */ readFrom(InputStream streamToDrain, int chunkSize)504 public static ByteString readFrom(InputStream streamToDrain, int chunkSize) throws IOException { 505 return readFrom(streamToDrain, chunkSize, chunkSize); 506 } 507 508 // Helper method that takes the chunk size range as a parameter. readFrom(InputStream streamToDrain, int minChunkSize, int maxChunkSize)509 public static ByteString readFrom(InputStream streamToDrain, int minChunkSize, int maxChunkSize) 510 throws IOException { 511 Collection<ByteString> results = new ArrayList<ByteString>(); 512 513 // copy the inbound bytes into a list of chunks; the chunk size 514 // grows exponentially to support both short and long streams. 515 int chunkSize = minChunkSize; 516 while (true) { 517 ByteString chunk = readChunk(streamToDrain, chunkSize); 518 if (chunk == null) { 519 break; 520 } 521 results.add(chunk); 522 chunkSize = Math.min(chunkSize * 2, maxChunkSize); 523 } 524 525 return ByteString.copyFrom(results); 526 } 527 528 /** 529 * Blocks until a chunk of the given size can be made from the stream, or EOF is reached. Calls 530 * read() repeatedly in case the given stream implementation doesn't completely fill the given 531 * buffer in one read() call. 532 * 533 * @return A chunk of the desired size, or else a chunk as large as was available when end of 534 * stream was reached. Returns null if the given stream had no more data in it. 535 */ readChunk(InputStream in, final int chunkSize)536 private static ByteString readChunk(InputStream in, final int chunkSize) throws IOException { 537 final byte[] buf = new byte[chunkSize]; 538 int bytesRead = 0; 539 while (bytesRead < chunkSize) { 540 final int count = in.read(buf, bytesRead, chunkSize - bytesRead); 541 if (count == -1) { 542 break; 543 } 544 bytesRead += count; 545 } 546 547 if (bytesRead == 0) { 548 return null; 549 } 550 551 // Always make a copy since InputStream could steal a reference to buf. 552 return ByteString.copyFrom(buf, 0, bytesRead); 553 } 554 555 // ================================================================= 556 // Multiple ByteStrings -> One ByteString 557 558 /** 559 * Concatenate the given {@code ByteString} to this one. Short concatenations, of total size 560 * smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are produced by copying the 561 * underlying bytes (as per Rope.java, <a 562 * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf"> 563 * BAP95 </a>. In general, the concatenate involves no copying. 564 * 565 * @param other string to concatenate 566 * @return a new {@code ByteString} instance 567 */ concat(ByteString other)568 public final ByteString concat(ByteString other) { 569 if (Integer.MAX_VALUE - size() < other.size()) { 570 throw new IllegalArgumentException( 571 "ByteString would be too long: " + size() + "+" + other.size()); 572 } 573 574 return RopeByteString.concatenate(this, other); 575 } 576 577 /** 578 * Concatenates all byte strings in the iterable and returns the result. This is designed to run 579 * in O(list size), not O(total bytes). 580 * 581 * <p>The returned {@code ByteString} is not necessarily a unique object. If the list is empty, 582 * the returned object is the singleton empty {@code ByteString}. If the list has only one 583 * element, that {@code ByteString} will be returned without copying. 584 * 585 * @param byteStrings strings to be concatenated 586 * @return new {@code ByteString} 587 */ copyFrom(Iterable<ByteString> byteStrings)588 public static ByteString copyFrom(Iterable<ByteString> byteStrings) { 589 // Determine the size; 590 final int size; 591 if (!(byteStrings instanceof Collection)) { 592 int tempSize = 0; 593 for (Iterator<ByteString> iter = byteStrings.iterator(); 594 iter.hasNext(); 595 iter.next(), ++tempSize) {} 596 size = tempSize; 597 } else { 598 size = ((Collection<ByteString>) byteStrings).size(); 599 } 600 601 if (size == 0) { 602 return EMPTY; 603 } 604 605 return balancedConcat(byteStrings.iterator(), size); 606 } 607 608 // Internal function used by copyFrom(Iterable<ByteString>). 609 // Create a balanced concatenation of the next "length" elements from the 610 // iterable. balancedConcat(Iterator<ByteString> iterator, int length)611 private static ByteString balancedConcat(Iterator<ByteString> iterator, int length) { 612 if (length < 1) { 613 throw new IllegalArgumentException(String.format("length (%s) must be >= 1", length)); 614 } 615 ByteString result; 616 if (length == 1) { 617 result = iterator.next(); 618 } else { 619 int halfLength = length >>> 1; 620 ByteString left = balancedConcat(iterator, halfLength); 621 ByteString right = balancedConcat(iterator, length - halfLength); 622 result = left.concat(right); 623 } 624 return result; 625 } 626 627 // ================================================================= 628 // ByteString -> byte[] 629 630 /** 631 * Copies bytes into a buffer at the given offset. 632 * 633 * <p>To copy a subset of bytes, you call this method on the return value of {@link 634 * #substring(int, int)}. Example: {@code byteString.substring(start, end).copyTo(target, offset)} 635 * 636 * @param target buffer to copy into 637 * @param offset in the target buffer 638 * @throws IndexOutOfBoundsException if the offset is negative or too large 639 */ copyTo(byte[] target, int offset)640 public void copyTo(byte[] target, int offset) { 641 copyTo(target, 0, offset, size()); 642 } 643 644 /** 645 * Copies bytes into a buffer. 646 * 647 * @param target buffer to copy into 648 * @param sourceOffset offset within these bytes 649 * @param targetOffset offset within the target buffer 650 * @param numberToCopy number of bytes to copy 651 * @throws IndexOutOfBoundsException if an offset or size is negative or too large 652 * @deprecated Instead, call {@code byteString.substring(sourceOffset, sourceOffset + 653 * numberToCopy).copyTo(target, targetOffset)} 654 */ 655 @Deprecated copyTo(byte[] target, int sourceOffset, int targetOffset, int numberToCopy)656 public final void copyTo(byte[] target, int sourceOffset, int targetOffset, int numberToCopy) { 657 checkRange(sourceOffset, sourceOffset + numberToCopy, size()); 658 checkRange(targetOffset, targetOffset + numberToCopy, target.length); 659 if (numberToCopy > 0) { 660 copyToInternal(target, sourceOffset, targetOffset, numberToCopy); 661 } 662 } 663 664 /** 665 * Internal (package private) implementation of {@link #copyTo(byte[],int,int,int)}. It assumes 666 * that all error checking has already been performed and that {@code numberToCopy > 0}. 667 */ copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)668 protected abstract void copyToInternal( 669 byte[] target, int sourceOffset, int targetOffset, int numberToCopy); 670 671 /** 672 * Copies bytes into a ByteBuffer. 673 * 674 * <p>To copy a subset of bytes, you call this method on the return value of {@link 675 * #substring(int, int)}. Example: {@code byteString.substring(start, end).copyTo(target)} 676 * 677 * @param target ByteBuffer to copy into. 678 * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only 679 * @throws java.nio.BufferOverflowException if the {@code target}'s remaining() space is not large 680 * enough to hold the data. 681 */ copyTo(ByteBuffer target)682 public abstract void copyTo(ByteBuffer target); 683 684 /** 685 * Copies bytes to a {@code byte[]}. 686 * 687 * @return copied bytes 688 */ toByteArray()689 public final byte[] toByteArray() { 690 final int size = size(); 691 if (size == 0) { 692 return Internal.EMPTY_BYTE_ARRAY; 693 } 694 byte[] result = new byte[size]; 695 copyToInternal(result, 0, 0, size); 696 return result; 697 } 698 699 /** 700 * Writes a copy of the contents of this byte string to the specified output stream argument. 701 * 702 * @param out the output stream to which to write the data. 703 * @throws IOException if an I/O error occurs. 704 */ writeTo(OutputStream out)705 public abstract void writeTo(OutputStream out) throws IOException; 706 707 /** 708 * Writes a specified part of this byte string to an output stream. 709 * 710 * @param out the output stream to which to write the data. 711 * @param sourceOffset offset within these bytes 712 * @param numberToWrite number of bytes to write 713 * @throws IOException if an I/O error occurs. 714 * @throws IndexOutOfBoundsException if an offset or size is negative or too large 715 */ writeTo(OutputStream out, int sourceOffset, int numberToWrite)716 final void writeTo(OutputStream out, int sourceOffset, int numberToWrite) throws IOException { 717 checkRange(sourceOffset, sourceOffset + numberToWrite, size()); 718 if (numberToWrite > 0) { 719 writeToInternal(out, sourceOffset, numberToWrite); 720 } 721 } 722 723 /** 724 * Internal version of {@link #writeTo(OutputStream,int,int)} that assumes all error checking has 725 * already been done. 726 */ writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)727 abstract void writeToInternal(OutputStream out, int sourceOffset, int numberToWrite) 728 throws IOException; 729 730 /** 731 * Writes this {@link ByteString} to the provided {@link ByteOutput}. Calling this method may 732 * result in multiple operations on the target {@link ByteOutput}. 733 * 734 * <p>This method may expose internal backing buffers of the {@link ByteString} to the {@link 735 * ByteOutput} in order to avoid additional copying overhead. It would be possible for a malicious 736 * {@link ByteOutput} to corrupt the {@link ByteString}. Use with caution! 737 * 738 * @param byteOutput the output target to receive the bytes 739 * @throws IOException if an I/O error occurs 740 * @see UnsafeByteOperations#unsafeWriteTo(ByteString, ByteOutput) 741 */ writeTo(ByteOutput byteOutput)742 abstract void writeTo(ByteOutput byteOutput) throws IOException; 743 744 /** 745 * This method behaves exactly the same as {@link #writeTo(ByteOutput)} unless the {@link 746 * ByteString} is a rope. For ropes, the leaf nodes are written in reverse order to the {@code 747 * byteOutput}. 748 * 749 * @param byteOutput the output target to receive the bytes 750 * @throws IOException if an I/O error occurs 751 * @see UnsafeByteOperations#unsafeWriteToReverse(ByteString, ByteOutput) 752 */ writeToReverse(ByteOutput byteOutput)753 abstract void writeToReverse(ByteOutput byteOutput) throws IOException; 754 755 /** 756 * Constructs a read-only {@code java.nio.ByteBuffer} whose content is equal to the contents of 757 * this byte string. The result uses the same backing array as the byte string, if possible. 758 * 759 * @return wrapped bytes 760 */ asReadOnlyByteBuffer()761 public abstract ByteBuffer asReadOnlyByteBuffer(); 762 763 /** 764 * Constructs a list of read-only {@code java.nio.ByteBuffer} objects such that the concatenation 765 * of their contents is equal to the contents of this byte string. The result uses the same 766 * backing arrays as the byte string. 767 * 768 * <p>By returning a list, implementations of this method may be able to avoid copying even when 769 * there are multiple backing arrays. 770 * 771 * @return a list of wrapped bytes 772 */ asReadOnlyByteBufferList()773 public abstract List<ByteBuffer> asReadOnlyByteBufferList(); 774 775 /** 776 * Constructs a new {@code String} by decoding the bytes using the specified charset. 777 * 778 * @param charsetName encode using this charset 779 * @return new string 780 * @throws UnsupportedEncodingException if charset isn't recognized 781 */ toString(String charsetName)782 public final String toString(String charsetName) throws UnsupportedEncodingException { 783 try { 784 return toString(Charset.forName(charsetName)); 785 } catch (UnsupportedCharsetException e) { 786 UnsupportedEncodingException exception = new UnsupportedEncodingException(charsetName); 787 exception.initCause(e); 788 throw exception; 789 } 790 } 791 792 /** 793 * Constructs a new {@code String} by decoding the bytes using the specified charset. Returns the 794 * same empty String if empty. 795 * 796 * @param charset encode using this charset 797 * @return new string 798 */ toString(Charset charset)799 public final String toString(Charset charset) { 800 return size() == 0 ? "" : toStringInternal(charset); 801 } 802 803 /** 804 * Constructs a new {@code String} by decoding the bytes using the specified charset. 805 * 806 * @param charset encode using this charset 807 * @return new string 808 */ toStringInternal(Charset charset)809 protected abstract String toStringInternal(Charset charset); 810 811 // ================================================================= 812 // UTF-8 decoding 813 814 /** 815 * Constructs a new {@code String} by decoding the bytes as UTF-8. 816 * 817 * @return new string using UTF-8 encoding 818 */ toStringUtf8()819 public final String toStringUtf8() { 820 return toString(Internal.UTF_8); 821 } 822 823 /** 824 * Tells whether this {@code ByteString} represents a well-formed UTF-8 byte sequence, such that 825 * the original bytes can be converted to a String object and then round tripped back to bytes 826 * without loss. 827 * 828 * <p>More precisely, returns {@code true} whenever: 829 * 830 * <pre>{@code 831 * Arrays.equals(byteString.toByteArray(), 832 * new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8")) 833 * }</pre> 834 * 835 * <p>This method returns {@code false} for "overlong" byte sequences, as well as for 3-byte 836 * sequences that would map to a surrogate character, in accordance with the restricted definition 837 * of UTF-8 introduced in Unicode 3.1. Note that the UTF-8 decoder included in Oracle's JDK has 838 * been modified to also reject "overlong" byte sequences, but (as of 2011) still accepts 3-byte 839 * surrogate character byte sequences. 840 * 841 * <p>See the Unicode Standard,<br> 842 * Table 3-6. <em>UTF-8 Bit Distribution</em>,<br> 843 * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>. 844 * 845 * @return whether the bytes in this {@code ByteString} are a well-formed UTF-8 byte sequence 846 */ isValidUtf8()847 public abstract boolean isValidUtf8(); 848 849 /** 850 * Tells whether the given byte sequence is a well-formed, malformed, or incomplete UTF-8 byte 851 * sequence. This method accepts and returns a partial state result, allowing the bytes for a 852 * complete UTF-8 byte sequence to be composed from multiple {@code ByteString} segments. 853 * 854 * @param state either {@code 0} (if this is the initial decoding operation) or the value returned 855 * from a call to a partial decoding method for the previous bytes 856 * @param offset offset of the first byte to check 857 * @param length number of bytes to check 858 * @return {@code -1} if the partial byte sequence is definitely malformed, {@code 0} if it is 859 * well-formed (no additional input needed), or, if the byte sequence is "incomplete", i.e. 860 * apparently terminated in the middle of a character, an opaque integer "state" value 861 * containing enough information to decode the character when passed to a subsequent 862 * invocation of a partial decoding method. 863 */ partialIsValidUtf8(int state, int offset, int length)864 protected abstract int partialIsValidUtf8(int state, int offset, int length); 865 866 // ================================================================= 867 // equals() and hashCode() 868 869 @Override 870 public abstract boolean equals(Object o); 871 872 /** Base class for leaf {@link ByteString}s (i.e. non-ropes). */ 873 abstract static class LeafByteString extends ByteString { 874 @Override getTreeDepth()875 protected final int getTreeDepth() { 876 return 0; 877 } 878 879 @Override isBalanced()880 protected final boolean isBalanced() { 881 return true; 882 } 883 884 @Override writeToReverse(ByteOutput byteOutput)885 void writeToReverse(ByteOutput byteOutput) throws IOException { 886 writeTo(byteOutput); 887 } 888 889 /** 890 * Check equality of the substring of given length of this object starting at zero with another 891 * {@code ByteString} substring starting at offset. 892 * 893 * @param other what to compare a substring in 894 * @param offset offset into other 895 * @param length number of bytes to compare 896 * @return true for equality of substrings, else false. 897 */ equalsRange(ByteString other, int offset, int length)898 abstract boolean equalsRange(ByteString other, int offset, int length); 899 } 900 901 /** 902 * Compute the hashCode using the traditional algorithm from {@link ByteString}. 903 * 904 * @return hashCode value 905 */ 906 @Override hashCode()907 public final int hashCode() { 908 int h = hash; 909 910 if (h == 0) { 911 int size = size(); 912 h = partialHash(size, 0, size); 913 if (h == 0) { 914 h = 1; 915 } 916 hash = h; 917 } 918 return h; 919 } 920 921 // ================================================================= 922 // Input stream 923 924 /** 925 * Creates an {@code InputStream} which can be used to read the bytes. 926 * 927 * <p>The {@link InputStream} returned by this method is guaranteed to be completely non-blocking. 928 * The method {@link InputStream#available()} returns the number of bytes remaining in the stream. 929 * The methods {@link InputStream#read(byte[])}, {@link InputStream#read(byte[],int,int)} and 930 * {@link InputStream#skip(long)} will read/skip as many bytes as are available. The method {@link 931 * InputStream#markSupported()} returns {@code true}. 932 * 933 * <p>The methods in the returned {@link InputStream} might <b>not</b> be thread safe. 934 * 935 * @return an input stream that returns the bytes of this byte string. 936 */ newInput()937 public abstract InputStream newInput(); 938 939 /** 940 * Creates a {@link CodedInputStream} which can be used to read the bytes. Using this is often 941 * more efficient than creating a {@link CodedInputStream} that wraps the result of {@link 942 * #newInput()}. 943 * 944 * @return stream based on wrapped data 945 */ newCodedInput()946 public abstract CodedInputStream newCodedInput(); 947 948 // ================================================================= 949 // Output stream 950 951 /** 952 * Creates a new {@link Output} with the given initial capacity. Call {@link 953 * Output#toByteString()} to create the {@code ByteString} instance. 954 * 955 * <p>A {@link ByteString.Output} offers the same functionality as a {@link 956 * ByteArrayOutputStream}, except that it returns a {@link ByteString} rather than a {@code byte} 957 * array. 958 * 959 * @param initialCapacity estimate of number of bytes to be written 960 * @return {@code OutputStream} for building a {@code ByteString} 961 */ newOutput(int initialCapacity)962 public static Output newOutput(int initialCapacity) { 963 return new Output(initialCapacity); 964 } 965 966 /** 967 * Creates a new {@link Output}. Call {@link Output#toByteString()} to create the {@code 968 * ByteString} instance. 969 * 970 * <p>A {@link ByteString.Output} offers the same functionality as a {@link 971 * ByteArrayOutputStream}, except that it returns a {@link ByteString} rather than a {@code byte 972 * array}. 973 * 974 * @return {@code OutputStream} for building a {@code ByteString} 975 */ newOutput()976 public static Output newOutput() { 977 return new Output(CONCATENATE_BY_COPY_SIZE); 978 } 979 980 /** 981 * Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to create the {@code 982 * ByteString} instance. 983 */ 984 public static final class Output extends OutputStream { 985 // Implementation note. 986 // The public methods of this class must be synchronized. ByteStrings 987 // are guaranteed to be immutable. Without some sort of locking, it could 988 // be possible for one thread to call toByteSring(), while another thread 989 // is still modifying the underlying byte array. 990 991 private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; 992 // argument passed by user, indicating initial capacity. 993 private final int initialCapacity; 994 // ByteStrings to be concatenated to create the result 995 private final ArrayList<ByteString> flushedBuffers; 996 // Total number of bytes in the ByteStrings of flushedBuffers 997 private int flushedBuffersTotalBytes; 998 // Current buffer to which we are writing 999 private byte[] buffer; 1000 // Location in buffer[] to which we write the next byte. 1001 private int bufferPos; 1002 1003 /** 1004 * Creates a new ByteString output stream with the specified initial capacity. 1005 * 1006 * @param initialCapacity the initial capacity of the output stream. 1007 */ Output(int initialCapacity)1008 Output(int initialCapacity) { 1009 if (initialCapacity < 0) { 1010 throw new IllegalArgumentException("Buffer size < 0"); 1011 } 1012 this.initialCapacity = initialCapacity; 1013 this.flushedBuffers = new ArrayList<ByteString>(); 1014 this.buffer = new byte[initialCapacity]; 1015 } 1016 1017 @Override write(int b)1018 public synchronized void write(int b) { 1019 if (bufferPos == buffer.length) { 1020 flushFullBuffer(1); 1021 } 1022 buffer[bufferPos++] = (byte) b; 1023 } 1024 1025 @Override write(byte[] b, int offset, int length)1026 public synchronized void write(byte[] b, int offset, int length) { 1027 if (length <= buffer.length - bufferPos) { 1028 // The bytes can fit into the current buffer. 1029 System.arraycopy(b, offset, buffer, bufferPos, length); 1030 bufferPos += length; 1031 } else { 1032 // Use up the current buffer 1033 int copySize = buffer.length - bufferPos; 1034 System.arraycopy(b, offset, buffer, bufferPos, copySize); 1035 offset += copySize; 1036 length -= copySize; 1037 // Flush the buffer, and get a new buffer at least big enough to cover 1038 // what we still need to output 1039 flushFullBuffer(length); 1040 System.arraycopy(b, offset, buffer, /* count= */ 0, length); 1041 bufferPos = length; 1042 } 1043 } 1044 1045 /** 1046 * Creates a byte string. Its size is the current size of this output stream and its output has 1047 * been copied to it. 1048 * 1049 * @return the current contents of this output stream, as a byte string. 1050 */ toByteString()1051 public synchronized ByteString toByteString() { 1052 flushLastBuffer(); 1053 return ByteString.copyFrom(flushedBuffers); 1054 } 1055 1056 /** Implement java.util.Arrays.copyOf() for jdk 1.5. */ copyArray(byte[] buffer, int length)1057 private byte[] copyArray(byte[] buffer, int length) { 1058 byte[] result = new byte[length]; 1059 System.arraycopy(buffer, 0, result, 0, Math.min(buffer.length, length)); 1060 return result; 1061 } 1062 1063 /** 1064 * Writes the complete contents of this byte array output stream to the specified output stream 1065 * argument. 1066 * 1067 * @param out the output stream to which to write the data. 1068 * @throws IOException if an I/O error occurs. 1069 */ writeTo(OutputStream out)1070 public void writeTo(OutputStream out) throws IOException { 1071 ByteString[] cachedFlushBuffers; 1072 byte[] cachedBuffer; 1073 int cachedBufferPos; 1074 synchronized (this) { 1075 // Copy the information we need into local variables so as to hold 1076 // the lock for as short a time as possible. 1077 cachedFlushBuffers = flushedBuffers.toArray(new ByteString[flushedBuffers.size()]); 1078 cachedBuffer = buffer; 1079 cachedBufferPos = bufferPos; 1080 } 1081 for (ByteString byteString : cachedFlushBuffers) { 1082 byteString.writeTo(out); 1083 } 1084 1085 out.write(copyArray(cachedBuffer, cachedBufferPos)); 1086 } 1087 1088 /** 1089 * Returns the current size of the output stream. 1090 * 1091 * @return the current size of the output stream 1092 */ size()1093 public synchronized int size() { 1094 return flushedBuffersTotalBytes + bufferPos; 1095 } 1096 1097 /** 1098 * Resets this stream, so that all currently accumulated output in the output stream is 1099 * discarded. The output stream can be used again, reusing the already allocated buffer space. 1100 */ reset()1101 public synchronized void reset() { 1102 flushedBuffers.clear(); 1103 flushedBuffersTotalBytes = 0; 1104 bufferPos = 0; 1105 } 1106 1107 @Override toString()1108 public String toString() { 1109 return String.format( 1110 "<ByteString.Output@%s size=%d>", 1111 Integer.toHexString(System.identityHashCode(this)), size()); 1112 } 1113 1114 /** 1115 * Internal function used by writers. The current buffer is full, and the writer needs a new 1116 * buffer whose size is at least the specified minimum size. 1117 */ flushFullBuffer(int minSize)1118 private void flushFullBuffer(int minSize) { 1119 flushedBuffers.add(new LiteralByteString(buffer)); 1120 flushedBuffersTotalBytes += buffer.length; 1121 // We want to increase our total capacity by 50%, but as a minimum, 1122 // the new buffer should also at least be >= minSize and 1123 // >= initial Capacity. 1124 int newSize = Math.max(initialCapacity, Math.max(minSize, flushedBuffersTotalBytes >>> 1)); 1125 buffer = new byte[newSize]; 1126 bufferPos = 0; 1127 } 1128 1129 /** 1130 * Internal function used by {@link #toByteString()}. The current buffer may or may not be full, 1131 * but it needs to be flushed. 1132 */ flushLastBuffer()1133 private void flushLastBuffer() { 1134 if (bufferPos < buffer.length) { 1135 if (bufferPos > 0) { 1136 byte[] bufferCopy = copyArray(buffer, bufferPos); 1137 flushedBuffers.add(new LiteralByteString(bufferCopy)); 1138 } 1139 // We reuse this buffer for further writes. 1140 } else { 1141 // Buffer is completely full. Huzzah. 1142 flushedBuffers.add(new LiteralByteString(buffer)); 1143 // 99% of the time, we're not going to use this OutputStream again. 1144 // We set buffer to an empty byte stream so that we're handling this 1145 // case without wasting space. In the rare case that more writes 1146 // *do* occur, this empty buffer will be flushed and an appropriately 1147 // sized new buffer will be created. 1148 buffer = EMPTY_BYTE_ARRAY; 1149 } 1150 flushedBuffersTotalBytes += bufferPos; 1151 bufferPos = 0; 1152 } 1153 } 1154 1155 /** 1156 * Constructs a new {@code ByteString} builder, which allows you to efficiently construct a {@code 1157 * ByteString} by writing to a {@link CodedOutputStream}. Using this is much more efficient than 1158 * calling {@code newOutput()} and wrapping that in a {@code CodedOutputStream}. 1159 * 1160 * <p>This is package-private because it's a somewhat confusing interface. Users can call {@link 1161 * Message#toByteString()} instead of calling this directly. 1162 * 1163 * @param size The target byte size of the {@code ByteString}. You must write exactly this many 1164 * bytes before building the result. 1165 * @return the builder 1166 */ newCodedBuilder(int size)1167 static CodedBuilder newCodedBuilder(int size) { 1168 return new CodedBuilder(size); 1169 } 1170 1171 /** See {@link ByteString#newCodedBuilder(int)}. */ 1172 static final class CodedBuilder { 1173 private final CodedOutputStream output; 1174 private final byte[] buffer; 1175 CodedBuilder(int size)1176 private CodedBuilder(int size) { 1177 buffer = new byte[size]; 1178 output = CodedOutputStream.newInstance(buffer); 1179 } 1180 build()1181 public ByteString build() { 1182 output.checkNoSpaceLeft(); 1183 1184 // We can be confident that the CodedOutputStream will not modify the 1185 // underlying bytes anymore because it already wrote all of them. So, 1186 // no need to make a copy. 1187 return new LiteralByteString(buffer); 1188 } 1189 getCodedOutput()1190 public CodedOutputStream getCodedOutput() { 1191 return output; 1192 } 1193 } 1194 1195 // ================================================================= 1196 // Methods {@link RopeByteString} needs on instances, which aren't part of the 1197 // public API. 1198 1199 /** 1200 * Return the depth of the tree representing this {@code ByteString}, if any, whose root is this 1201 * node. If this is a leaf node, return 0. 1202 * 1203 * @return tree depth or zero 1204 */ getTreeDepth()1205 protected abstract int getTreeDepth(); 1206 1207 /** 1208 * Return {@code true} if this ByteString is literal (a leaf node) or a flat-enough tree in the 1209 * sense of {@link RopeByteString}. 1210 * 1211 * @return true if the tree is flat enough 1212 */ isBalanced()1213 protected abstract boolean isBalanced(); 1214 1215 /** 1216 * Return the cached hash code if available. 1217 * 1218 * @return value of cached hash code or 0 if not computed yet 1219 */ peekCachedHashCode()1220 protected final int peekCachedHashCode() { 1221 return hash; 1222 } 1223 1224 /** 1225 * Compute the hash across the value bytes starting with the given hash, and return the result. 1226 * This is used to compute the hash across strings represented as a set of pieces by allowing the 1227 * hash computation to be continued from piece to piece. 1228 * 1229 * @param h starting hash value 1230 * @param offset offset into this value to start looking at data values 1231 * @param length number of data values to include in the hash computation 1232 * @return ending hash value 1233 */ partialHash(int h, int offset, int length)1234 protected abstract int partialHash(int h, int offset, int length); 1235 1236 /** 1237 * Checks that the given index falls within the specified array size. 1238 * 1239 * @param index the index position to be tested 1240 * @param size the length of the array 1241 * @throws IndexOutOfBoundsException if the index does not fall within the array. 1242 */ checkIndex(int index, int size)1243 static void checkIndex(int index, int size) { 1244 if ((index | (size - (index + 1))) < 0) { 1245 if (index < 0) { 1246 throw new ArrayIndexOutOfBoundsException("Index < 0: " + index); 1247 } 1248 throw new ArrayIndexOutOfBoundsException("Index > length: " + index + ", " + size); 1249 } 1250 } 1251 1252 /** 1253 * Checks that the given range falls within the bounds of an array 1254 * 1255 * @param startIndex the start index of the range (inclusive) 1256 * @param endIndex the end index of the range (exclusive) 1257 * @param size the size of the array. 1258 * @return the length of the range. 1259 * @throws IndexOutOfBoundsException some or all of the range falls outside of the array. 1260 */ checkRange(int startIndex, int endIndex, int size)1261 static int checkRange(int startIndex, int endIndex, int size) { 1262 final int length = endIndex - startIndex; 1263 if ((startIndex | endIndex | length | (size - endIndex)) < 0) { 1264 if (startIndex < 0) { 1265 throw new IndexOutOfBoundsException("Beginning index: " + startIndex + " < 0"); 1266 } 1267 if (endIndex < startIndex) { 1268 throw new IndexOutOfBoundsException( 1269 "Beginning index larger than ending index: " + startIndex + ", " + endIndex); 1270 } 1271 // endIndex >= size 1272 throw new IndexOutOfBoundsException("End index: " + endIndex + " >= " + size); 1273 } 1274 return length; 1275 } 1276 1277 @Override toString()1278 public final String toString() { 1279 return String.format( 1280 Locale.ROOT, 1281 "<ByteString@%s size=%d contents=\"%s\">", 1282 toHexString(identityHashCode(this)), 1283 size(), 1284 truncateAndEscapeForDisplay()); 1285 } 1286 truncateAndEscapeForDisplay()1287 private String truncateAndEscapeForDisplay() { 1288 final int limit = 50; 1289 1290 return size() <= limit ? escapeBytes(this) : escapeBytes(substring(0, limit - 3)) + "..."; 1291 } 1292 1293 /** 1294 * This class implements a {@link com.google.protobuf.ByteString} backed by a single array of 1295 * bytes, contiguous in memory. It supports substring by pointing to only a sub-range of the 1296 * underlying byte array, meaning that a substring will reference the full byte-array of the 1297 * string it's made from, exactly as with {@link String}. 1298 * 1299 * @author carlanton@google.com (Carl Haverl) 1300 */ 1301 // Keep this class private to avoid deadlocks in classloading across threads as ByteString's 1302 // static initializer loads LiteralByteString and another thread loads LiteralByteString. 1303 private static class LiteralByteString extends ByteString.LeafByteString { 1304 private static final long serialVersionUID = 1L; 1305 1306 protected final byte[] bytes; 1307 1308 /** 1309 * Creates a {@code LiteralByteString} backed by the given array, without copying. 1310 * 1311 * @param bytes array to wrap 1312 */ LiteralByteString(byte[] bytes)1313 LiteralByteString(byte[] bytes) { 1314 if (bytes == null) { 1315 throw new NullPointerException(); 1316 } 1317 this.bytes = bytes; 1318 } 1319 1320 @Override byteAt(int index)1321 public byte byteAt(int index) { 1322 // Unlike most methods in this class, this one is a direct implementation 1323 // ignoring the potential offset because we need to do range-checking in the 1324 // substring case anyway. 1325 return bytes[index]; 1326 } 1327 1328 @Override internalByteAt(int index)1329 byte internalByteAt(int index) { 1330 return bytes[index]; 1331 } 1332 1333 @Override size()1334 public int size() { 1335 return bytes.length; 1336 } 1337 1338 // ================================================================= 1339 // ByteString -> substring 1340 1341 @Override substring(int beginIndex, int endIndex)1342 public final ByteString substring(int beginIndex, int endIndex) { 1343 final int length = checkRange(beginIndex, endIndex, size()); 1344 1345 if (length == 0) { 1346 return ByteString.EMPTY; 1347 } 1348 1349 return new BoundedByteString(bytes, getOffsetIntoBytes() + beginIndex, length); 1350 } 1351 1352 // ================================================================= 1353 // ByteString -> byte[] 1354 1355 @Override copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)1356 protected void copyToInternal( 1357 byte[] target, int sourceOffset, int targetOffset, int numberToCopy) { 1358 // Optimized form, not for subclasses, since we don't call 1359 // getOffsetIntoBytes() or check the 'numberToCopy' parameter. 1360 // TODO(nathanmittler): Is not calling getOffsetIntoBytes really saving that much? 1361 System.arraycopy(bytes, sourceOffset, target, targetOffset, numberToCopy); 1362 } 1363 1364 @Override copyTo(ByteBuffer target)1365 public final void copyTo(ByteBuffer target) { 1366 target.put(bytes, getOffsetIntoBytes(), size()); // Copies bytes 1367 } 1368 1369 @Override asReadOnlyByteBuffer()1370 public final ByteBuffer asReadOnlyByteBuffer() { 1371 return ByteBuffer.wrap(bytes, getOffsetIntoBytes(), size()).asReadOnlyBuffer(); 1372 } 1373 1374 @Override asReadOnlyByteBufferList()1375 public final List<ByteBuffer> asReadOnlyByteBufferList() { 1376 return Collections.singletonList(asReadOnlyByteBuffer()); 1377 } 1378 1379 @Override writeTo(OutputStream outputStream)1380 public final void writeTo(OutputStream outputStream) throws IOException { 1381 outputStream.write(toByteArray()); 1382 } 1383 1384 @Override writeToInternal(OutputStream outputStream, int sourceOffset, int numberToWrite)1385 final void writeToInternal(OutputStream outputStream, int sourceOffset, int numberToWrite) 1386 throws IOException { 1387 outputStream.write(bytes, getOffsetIntoBytes() + sourceOffset, numberToWrite); 1388 } 1389 1390 @Override writeTo(ByteOutput output)1391 final void writeTo(ByteOutput output) throws IOException { 1392 output.writeLazy(bytes, getOffsetIntoBytes(), size()); 1393 } 1394 1395 @Override toStringInternal(Charset charset)1396 protected final String toStringInternal(Charset charset) { 1397 return new String(bytes, getOffsetIntoBytes(), size(), charset); 1398 } 1399 1400 // ================================================================= 1401 // UTF-8 decoding 1402 1403 @Override isValidUtf8()1404 public final boolean isValidUtf8() { 1405 int offset = getOffsetIntoBytes(); 1406 return Utf8.isValidUtf8(bytes, offset, offset + size()); 1407 } 1408 1409 @Override partialIsValidUtf8(int state, int offset, int length)1410 protected final int partialIsValidUtf8(int state, int offset, int length) { 1411 int index = getOffsetIntoBytes() + offset; 1412 return Utf8.partialIsValidUtf8(state, bytes, index, index + length); 1413 } 1414 1415 // ================================================================= 1416 // equals() and hashCode() 1417 1418 @Override equals(Object other)1419 public final boolean equals(Object other) { 1420 if (other == this) { 1421 return true; 1422 } 1423 if (!(other instanceof ByteString)) { 1424 return false; 1425 } 1426 1427 if (size() != ((ByteString) other).size()) { 1428 return false; 1429 } 1430 if (size() == 0) { 1431 return true; 1432 } 1433 1434 if (other instanceof LiteralByteString) { 1435 LiteralByteString otherAsLiteral = (LiteralByteString) other; 1436 // If we know the hash codes and they are not equal, we know the byte 1437 // strings are not equal. 1438 int thisHash = peekCachedHashCode(); 1439 int thatHash = otherAsLiteral.peekCachedHashCode(); 1440 if (thisHash != 0 && thatHash != 0 && thisHash != thatHash) { 1441 return false; 1442 } 1443 1444 return equalsRange((LiteralByteString) other, 0, size()); 1445 } else { 1446 // RopeByteString and NioByteString. 1447 return other.equals(this); 1448 } 1449 } 1450 1451 /** 1452 * Check equality of the substring of given length of this object starting at zero with another 1453 * {@code LiteralByteString} substring starting at offset. 1454 * 1455 * @param other what to compare a substring in 1456 * @param offset offset into other 1457 * @param length number of bytes to compare 1458 * @return true for equality of substrings, else false. 1459 */ 1460 @Override equalsRange(ByteString other, int offset, int length)1461 final boolean equalsRange(ByteString other, int offset, int length) { 1462 if (length > other.size()) { 1463 throw new IllegalArgumentException("Length too large: " + length + size()); 1464 } 1465 if (offset + length > other.size()) { 1466 throw new IllegalArgumentException( 1467 "Ran off end of other: " + offset + ", " + length + ", " + other.size()); 1468 } 1469 1470 if (other instanceof LiteralByteString) { 1471 LiteralByteString lbsOther = (LiteralByteString) other; 1472 byte[] thisBytes = bytes; 1473 byte[] otherBytes = lbsOther.bytes; 1474 int thisLimit = getOffsetIntoBytes() + length; 1475 for (int thisIndex = getOffsetIntoBytes(), 1476 otherIndex = lbsOther.getOffsetIntoBytes() + offset; 1477 (thisIndex < thisLimit); 1478 ++thisIndex, ++otherIndex) { 1479 if (thisBytes[thisIndex] != otherBytes[otherIndex]) { 1480 return false; 1481 } 1482 } 1483 return true; 1484 } 1485 1486 return other.substring(offset, offset + length).equals(substring(0, length)); 1487 } 1488 1489 @Override partialHash(int h, int offset, int length)1490 protected final int partialHash(int h, int offset, int length) { 1491 return Internal.partialHash(h, bytes, getOffsetIntoBytes() + offset, length); 1492 } 1493 1494 // ================================================================= 1495 // Input stream 1496 1497 @Override newInput()1498 public final InputStream newInput() { 1499 return new ByteArrayInputStream(bytes, getOffsetIntoBytes(), size()); // No copy 1500 } 1501 1502 @Override newCodedInput()1503 public final CodedInputStream newCodedInput() { 1504 // We trust CodedInputStream not to modify the bytes, or to give anyone 1505 // else access to them. 1506 return CodedInputStream.newInstance( 1507 bytes, getOffsetIntoBytes(), size(), /* bufferIsImmutable= */ true); 1508 } 1509 1510 // ================================================================= 1511 // Internal methods 1512 1513 /** 1514 * Offset into {@code bytes[]} to use, non-zero for substrings. 1515 * 1516 * @return always 0 for this class 1517 */ getOffsetIntoBytes()1518 protected int getOffsetIntoBytes() { 1519 return 0; 1520 } 1521 } 1522 1523 /** 1524 * This class is used to represent the substring of a {@link ByteString} over a single byte array. 1525 * In terms of the public API of {@link ByteString}, you end up here by calling {@link 1526 * ByteString#copyFrom(byte[])} followed by {@link ByteString#substring(int, int)}. 1527 * 1528 * <p>This class contains most of the overhead involved in creating a substring from a {@link 1529 * LiteralByteString}. The overhead involves some range-checking and two extra fields. 1530 * 1531 * @author carlanton@google.com (Carl Haverl) 1532 */ 1533 // Keep this class private to avoid deadlocks in classloading across threads as ByteString's 1534 // static initializer loads LiteralByteString and another thread loads BoundedByteString. 1535 private static final class BoundedByteString extends LiteralByteString { 1536 1537 private final int bytesOffset; 1538 private final int bytesLength; 1539 1540 /** 1541 * Creates a {@code BoundedByteString} backed by the sub-range of given array, without copying. 1542 * 1543 * @param bytes array to wrap 1544 * @param offset index to first byte to use in bytes 1545 * @param length number of bytes to use from bytes 1546 * @throws IllegalArgumentException if {@code offset < 0}, {@code length < 0}, or if {@code 1547 * offset + length > bytes.length}. 1548 */ BoundedByteString(byte[] bytes, int offset, int length)1549 BoundedByteString(byte[] bytes, int offset, int length) { 1550 super(bytes); 1551 checkRange(offset, offset + length, bytes.length); 1552 1553 this.bytesOffset = offset; 1554 this.bytesLength = length; 1555 } 1556 1557 /** 1558 * Gets the byte at the given index. Throws {@link ArrayIndexOutOfBoundsException} for 1559 * backwards-compatibility reasons although it would more properly be {@link 1560 * IndexOutOfBoundsException}. 1561 * 1562 * @param index index of byte 1563 * @return the value 1564 * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size 1565 */ 1566 @Override byteAt(int index)1567 public byte byteAt(int index) { 1568 // We must check the index ourselves as we cannot rely on Java array index 1569 // checking for substrings. 1570 checkIndex(index, size()); 1571 return bytes[bytesOffset + index]; 1572 } 1573 1574 @Override internalByteAt(int index)1575 byte internalByteAt(int index) { 1576 return bytes[bytesOffset + index]; 1577 } 1578 1579 @Override size()1580 public int size() { 1581 return bytesLength; 1582 } 1583 1584 @Override getOffsetIntoBytes()1585 protected int getOffsetIntoBytes() { 1586 return bytesOffset; 1587 } 1588 1589 // ================================================================= 1590 // ByteString -> byte[] 1591 1592 @Override copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)1593 protected void copyToInternal( 1594 byte[] target, int sourceOffset, int targetOffset, int numberToCopy) { 1595 System.arraycopy( 1596 bytes, getOffsetIntoBytes() + sourceOffset, target, targetOffset, numberToCopy); 1597 } 1598 1599 // ================================================================= 1600 // Serializable 1601 1602 private static final long serialVersionUID = 1L; 1603 writeReplace()1604 Object writeReplace() { 1605 return ByteString.wrap(toByteArray()); 1606 } 1607 readObject(@uppressWarnings"unused") ObjectInputStream in)1608 private void readObject(@SuppressWarnings("unused") ObjectInputStream in) throws IOException { 1609 throw new InvalidObjectException( 1610 "BoundedByteStream instances are not to be serialized directly"); 1611 } 1612 } 1613 } 1614