1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import java.io.ByteArrayInputStream; 34 import java.io.ByteArrayOutputStream; 35 import java.io.IOException; 36 import java.io.InputStream; 37 import java.io.InvalidObjectException; 38 import java.io.ObjectInputStream; 39 import java.io.OutputStream; 40 import java.io.Serializable; 41 import java.io.UnsupportedEncodingException; 42 import java.nio.ByteBuffer; 43 import java.nio.charset.Charset; 44 import java.nio.charset.UnsupportedCharsetException; 45 import java.util.ArrayList; 46 import java.util.Arrays; 47 import java.util.Collection; 48 import java.util.Collections; 49 import java.util.Comparator; 50 import java.util.Iterator; 51 import java.util.List; 52 import java.util.NoSuchElementException; 53 54 /** 55 * Immutable sequence of bytes. Substring is supported by sharing the reference to the immutable 56 * underlying bytes. Concatenation is likewise supported without copying (long strings) by building 57 * a tree of pieces in {@link RopeByteString}. 58 * 59 * <p>Like {@link String}, the contents of a {@link ByteString} can never be observed to change, not 60 * even in the presence of a data race or incorrect API usage in the client code. 61 * 62 * @author crazybob@google.com Bob Lee 63 * @author kenton@google.com Kenton Varda 64 * @author carlanton@google.com Carl Haverl 65 * @author martinrb@google.com Martin Buchholz 66 */ 67 public abstract class ByteString implements Iterable<Byte>, Serializable { 68 69 /** 70 * When two strings to be concatenated have a combined length shorter than this, we just copy 71 * their bytes on {@link #concat(ByteString)}. The trade-off is copy size versus the overhead of 72 * creating tree nodes in {@link RopeByteString}. 73 */ 74 static final int CONCATENATE_BY_COPY_SIZE = 128; 75 76 /** 77 * When copying an InputStream into a ByteString with .readFrom(), the chunks in the underlying 78 * rope start at 256 bytes, but double each iteration up to 8192 bytes. 79 */ 80 static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b 81 82 static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k 83 84 /** Empty {@code ByteString}. */ 85 public static final ByteString EMPTY = new LiteralByteString(Internal.EMPTY_BYTE_ARRAY); 86 87 /** 88 * An interface to efficiently copy {@code byte[]}. 89 * 90 * <p>One of the noticeable costs of copying a byte[] into a new array using {@code 91 * System.arraycopy} is nullification of a new buffer before the copy. It has been shown the 92 * Hotspot VM is capable to intrisicfy {@code Arrays.copyOfRange} operation to avoid this 93 * expensive nullification and provide substantial performance gain. Unfortunately this does not 94 * hold on Android runtimes and could make the copy slightly slower due to additional code in the 95 * {@code Arrays.copyOfRange}. Thus we provide two different implementation for array copier for 96 * Hotspot and Android runtimes. 97 */ 98 private interface ByteArrayCopier { 99 /** Copies the specified range of the specified array into a new array */ copyFrom(byte[] bytes, int offset, int size)100 byte[] copyFrom(byte[] bytes, int offset, int size); 101 } 102 103 /** Implementation of {@code ByteArrayCopier} which uses {@link System#arraycopy}. */ 104 private static final class SystemByteArrayCopier implements ByteArrayCopier { 105 @Override copyFrom(byte[] bytes, int offset, int size)106 public byte[] copyFrom(byte[] bytes, int offset, int size) { 107 byte[] copy = new byte[size]; 108 System.arraycopy(bytes, offset, copy, 0, size); 109 return copy; 110 } 111 } 112 113 /** Implementation of {@code ByteArrayCopier} which uses {@link Arrays#copyOfRange}. */ 114 private static final class ArraysByteArrayCopier implements ByteArrayCopier { 115 @Override copyFrom(byte[] bytes, int offset, int size)116 public byte[] copyFrom(byte[] bytes, int offset, int size) { 117 return Arrays.copyOfRange(bytes, offset, offset + size); 118 } 119 } 120 121 private static final ByteArrayCopier byteArrayCopier; 122 123 static { 124 byteArrayCopier = 125 Android.isOnAndroidDevice() ? new SystemByteArrayCopier() : new ArraysByteArrayCopier(); 126 } 127 128 /** 129 * Cached hash value. Intentionally accessed via a data race, which is safe because of the Java 130 * Memory Model's "no out-of-thin-air values" guarantees for ints. A value of 0 implies that the 131 * hash has not been set. 132 */ 133 private int hash = 0; 134 135 // This constructor is here to prevent subclassing outside of this package, ByteString()136 ByteString() {} 137 138 /** 139 * Gets the byte at the given index. This method should be used only for random access to 140 * individual bytes. To access bytes sequentially, use the {@link ByteIterator} returned by {@link 141 * #iterator()}, and call {@link #substring(int, int)} first if necessary. 142 * 143 * @param index index of byte 144 * @return the value 145 * @throws IndexOutOfBoundsException {@code index < 0 or index >= size} 146 */ byteAt(int index)147 public abstract byte byteAt(int index); 148 149 /** 150 * Gets the byte at the given index, assumes bounds checking has already been performed. 151 * 152 * @param index index of byte 153 * @return the value 154 * @throws IndexOutOfBoundsException {@code index < 0 or index >= size} 155 */ internalByteAt(int index)156 abstract byte internalByteAt(int index); 157 158 /** 159 * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString. To avoid 160 * auto-boxing, you may get the iterator manually and call {@link ByteIterator#nextByte()}. 161 * 162 * @return the iterator 163 */ 164 @Override iterator()165 public ByteIterator iterator() { 166 return new AbstractByteIterator() { 167 private int position = 0; 168 private final int limit = size(); 169 170 @Override 171 public boolean hasNext() { 172 return position < limit; 173 } 174 175 @Override 176 public byte nextByte() { 177 int currentPos = position; 178 if (currentPos >= limit) { 179 throw new NoSuchElementException(); 180 } 181 position = currentPos + 1; 182 return internalByteAt(currentPos); 183 } 184 }; 185 } 186 187 /** 188 * This interface extends {@code Iterator<Byte>}, so that we can return an unboxed {@code byte}. 189 */ 190 public interface ByteIterator extends Iterator<Byte> { 191 /** 192 * An alternative to {@link Iterator#next()} that returns an unboxed primitive {@code byte}. 193 * 194 * @return the next {@code byte} in the iteration 195 * @throws NoSuchElementException if the iteration has no more elements 196 */ nextByte()197 byte nextByte(); 198 } 199 200 abstract static class AbstractByteIterator implements ByteIterator { 201 @Override next()202 public final Byte next() { 203 // Boxing calls Byte.valueOf(byte), which does not instantiate. 204 return nextByte(); 205 } 206 207 @Override remove()208 public final void remove() { 209 throw new UnsupportedOperationException(); 210 } 211 } 212 213 /** 214 * Gets the number of bytes. 215 * 216 * @return size in bytes 217 */ size()218 public abstract int size(); 219 220 /** 221 * Returns {@code true} if the size is {@code 0}, {@code false} otherwise. 222 * 223 * @return true if this is zero bytes long 224 */ isEmpty()225 public final boolean isEmpty() { 226 return size() == 0; 227 } 228 229 // ================================================================= 230 // Comparison 231 232 private static final int UNSIGNED_BYTE_MASK = 0xFF; 233 234 /** 235 * Returns the value of the given byte as an integer, interpreting the byte as an unsigned value. 236 * That is, returns {@code value + 256} if {@code value} is negative; {@code value} itself 237 * otherwise. 238 * 239 * <p>Note: This code was copied from {@link com.google.common.primitives.UnsignedBytes#toInt}, as 240 * Guava libraries cannot be used in the {@code com.google.protobuf} package. 241 */ toInt(byte value)242 private static int toInt(byte value) { 243 return value & UNSIGNED_BYTE_MASK; 244 } 245 246 /** 247 * Compares two {@link ByteString}s lexicographically, treating their contents as unsigned byte 248 * values between 0 and 255 (inclusive). 249 * 250 * <p>For example, {@code (byte) -1} is considered to be greater than {@code (byte) 1} because it 251 * is interpreted as an unsigned value, {@code 255}. 252 */ 253 private static final Comparator<ByteString> UNSIGNED_LEXICOGRAPHICAL_COMPARATOR = 254 new Comparator<ByteString>() { 255 @Override 256 public int compare(ByteString former, ByteString latter) { 257 ByteIterator formerBytes = former.iterator(); 258 ByteIterator latterBytes = latter.iterator(); 259 260 while (formerBytes.hasNext() && latterBytes.hasNext()) { 261 // Note: This code was copied from com.google.common.primitives.UnsignedBytes#compare, 262 // as Guava libraries cannot be used in the {@code com.google.protobuf} package. 263 int result = 264 Integer.compare(toInt(formerBytes.nextByte()), toInt(latterBytes.nextByte())); 265 if (result != 0) { 266 return result; 267 } 268 } 269 270 return Integer.compare(former.size(), latter.size()); 271 } 272 }; 273 274 /** 275 * Returns a {@link Comparator} which compares {@link ByteString}-s lexicographically 276 * as sequences of unsigned bytes (i.e. values between 0 and 255, inclusive). 277 * 278 * <p>For example, {@code (byte) -1} is considered to be greater than {@code (byte) 1} because it 279 * is interpreted as an unsigned value, {@code 255}: 280 * 281 * <ul> 282 * <li>{@code `-1` -> 0b11111111 (two's complement) -> 255} 283 * <li>{@code `1` -> 0b00000001 -> 1} 284 * </ul> 285 */ unsignedLexicographicalComparator()286 public static Comparator<ByteString> unsignedLexicographicalComparator() { 287 return UNSIGNED_LEXICOGRAPHICAL_COMPARATOR; 288 } 289 290 // ================================================================= 291 // ByteString -> substring 292 293 /** 294 * Return the substring from {@code beginIndex}, inclusive, to the end of the string. 295 * 296 * @param beginIndex start at this index 297 * @return substring sharing underlying data 298 * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or {@code beginIndex > size()}. 299 */ substring(int beginIndex)300 public final ByteString substring(int beginIndex) { 301 return substring(beginIndex, size()); 302 } 303 304 /** 305 * Return the substring from {@code beginIndex}, inclusive, to {@code endIndex}, exclusive. 306 * 307 * @param beginIndex start at this index 308 * @param endIndex the last character is the one before this index 309 * @return substring sharing underlying data 310 * @throws IndexOutOfBoundsException if {@code beginIndex < 0}, {@code endIndex > size()}, or 311 * {@code beginIndex > endIndex}. 312 */ substring(int beginIndex, int endIndex)313 public abstract ByteString substring(int beginIndex, int endIndex); 314 315 /** 316 * Tests if this bytestring starts with the specified prefix. Similar to {@link 317 * String#startsWith(String)} 318 * 319 * @param prefix the prefix. 320 * @return <code>true</code> if the byte sequence represented by the argument is a prefix of the 321 * byte sequence represented by this string; <code>false</code> otherwise. 322 */ startsWith(ByteString prefix)323 public final boolean startsWith(ByteString prefix) { 324 return size() >= prefix.size() && substring(0, prefix.size()).equals(prefix); 325 } 326 327 /** 328 * Tests if this bytestring ends with the specified suffix. Similar to {@link 329 * String#endsWith(String)} 330 * 331 * @param suffix the suffix. 332 * @return <code>true</code> if the byte sequence represented by the argument is a suffix of the 333 * byte sequence represented by this string; <code>false</code> otherwise. 334 */ endsWith(ByteString suffix)335 public final boolean endsWith(ByteString suffix) { 336 return size() >= suffix.size() && substring(size() - suffix.size()).equals(suffix); 337 } 338 339 // ================================================================= 340 // byte[] -> ByteString 341 342 /** 343 * Copies the given bytes into a {@code ByteString}. 344 * 345 * @param bytes source array 346 * @param offset offset in source array 347 * @param size number of bytes to copy 348 * @return new {@code ByteString} 349 * @throws IndexOutOfBoundsException if {@code offset} or {@code size} are out of bounds 350 */ copyFrom(byte[] bytes, int offset, int size)351 public static ByteString copyFrom(byte[] bytes, int offset, int size) { 352 checkRange(offset, offset + size, bytes.length); 353 return new LiteralByteString(byteArrayCopier.copyFrom(bytes, offset, size)); 354 } 355 356 /** 357 * Copies the given bytes into a {@code ByteString}. 358 * 359 * @param bytes to copy 360 * @return new {@code ByteString} 361 */ copyFrom(byte[] bytes)362 public static ByteString copyFrom(byte[] bytes) { 363 return copyFrom(bytes, 0, bytes.length); 364 } 365 366 /** Wraps the given bytes into a {@code ByteString}. Intended for internal only usage. */ wrap(ByteBuffer buffer)367 static ByteString wrap(ByteBuffer buffer) { 368 if (buffer.hasArray()) { 369 final int offset = buffer.arrayOffset(); 370 return ByteString.wrap(buffer.array(), offset + buffer.position(), buffer.remaining()); 371 } else { 372 return new NioByteString(buffer); 373 } 374 } 375 376 /** 377 * Wraps the given bytes into a {@code ByteString}. Intended for internal only usage to force a 378 * classload of ByteString before LiteralByteString. 379 */ wrap(byte[] bytes)380 static ByteString wrap(byte[] bytes) { 381 // TODO(dweis): Return EMPTY when bytes are empty to reduce allocations? 382 return new LiteralByteString(bytes); 383 } 384 385 /** 386 * Wraps the given bytes into a {@code ByteString}. Intended for internal only usage to force a 387 * classload of ByteString before BoundedByteString and LiteralByteString. 388 */ wrap(byte[] bytes, int offset, int length)389 static ByteString wrap(byte[] bytes, int offset, int length) { 390 return new BoundedByteString(bytes, offset, length); 391 } 392 393 /** 394 * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into a {@code 395 * ByteString}. 396 * 397 * @param bytes source buffer 398 * @param size number of bytes to copy 399 * @return new {@code ByteString} 400 * @throws IndexOutOfBoundsException if {@code size > bytes.remaining()} 401 */ copyFrom(ByteBuffer bytes, int size)402 public static ByteString copyFrom(ByteBuffer bytes, int size) { 403 checkRange(0, size, bytes.remaining()); 404 byte[] copy = new byte[size]; 405 bytes.get(copy); 406 return new LiteralByteString(copy); 407 } 408 409 /** 410 * Copies the remaining bytes from a {@code java.nio.ByteBuffer} into a {@code ByteString}. 411 * 412 * @param bytes sourceBuffer 413 * @return new {@code ByteString} 414 */ copyFrom(ByteBuffer bytes)415 public static ByteString copyFrom(ByteBuffer bytes) { 416 return copyFrom(bytes, bytes.remaining()); 417 } 418 419 /** 420 * Encodes {@code text} into a sequence of bytes using the named charset and returns the result as 421 * a {@code ByteString}. 422 * 423 * @param text source string 424 * @param charsetName encoding to use 425 * @return new {@code ByteString} 426 * @throws UnsupportedEncodingException if the encoding isn't found 427 */ copyFrom(String text, String charsetName)428 public static ByteString copyFrom(String text, String charsetName) 429 throws UnsupportedEncodingException { 430 return new LiteralByteString(text.getBytes(charsetName)); 431 } 432 433 /** 434 * Encodes {@code text} into a sequence of bytes using the named charset and returns the result as 435 * a {@code ByteString}. 436 * 437 * @param text source string 438 * @param charset encode using this charset 439 * @return new {@code ByteString} 440 */ copyFrom(String text, Charset charset)441 public static ByteString copyFrom(String text, Charset charset) { 442 return new LiteralByteString(text.getBytes(charset)); 443 } 444 445 /** 446 * Encodes {@code text} into a sequence of UTF-8 bytes and returns the result as a {@code 447 * ByteString}. 448 * 449 * @param text source string 450 * @return new {@code ByteString} 451 */ copyFromUtf8(String text)452 public static ByteString copyFromUtf8(String text) { 453 return new LiteralByteString(text.getBytes(Internal.UTF_8)); 454 } 455 456 // ================================================================= 457 // InputStream -> ByteString 458 459 /** 460 * Completely reads the given stream's bytes into a {@code ByteString}, blocking if necessary 461 * until all bytes are read through to the end of the stream. 462 * 463 * <p><b>Performance notes:</b> The returned {@code ByteString} is an immutable tree of byte 464 * arrays ("chunks") of the stream data. The first chunk is small, with subsequent chunks each 465 * being double the size, up to 8K. 466 * 467 * <p>Each byte read from the input stream will be copied twice to ensure that the resulting 468 * ByteString is truly immutable. 469 * 470 * @param streamToDrain The source stream, which is read completely but not closed. 471 * @return A new {@code ByteString} which is made up of chunks of various sizes, depending on the 472 * behavior of the underlying stream. 473 * @throws IOException IOException is thrown if there is a problem reading the underlying stream. 474 */ readFrom(InputStream streamToDrain)475 public static ByteString readFrom(InputStream streamToDrain) throws IOException { 476 return readFrom(streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE); 477 } 478 479 /** 480 * Completely reads the given stream's bytes into a {@code ByteString}, blocking if necessary 481 * until all bytes are read through to the end of the stream. 482 * 483 * <p><b>Performance notes:</b> The returned {@code ByteString} is an immutable tree of byte 484 * arrays ("chunks") of the stream data. The chunkSize parameter sets the size of these byte 485 * arrays. 486 * 487 * <p>Each byte read from the input stream will be copied twice to ensure that the resulting 488 * ByteString is truly immutable. 489 * 490 * @param streamToDrain The source stream, which is read completely but not closed. 491 * @param chunkSize The size of the chunks in which to read the stream. 492 * @return A new {@code ByteString} which is made up of chunks of the given size. 493 * @throws IOException IOException is thrown if there is a problem reading the underlying stream. 494 */ readFrom(InputStream streamToDrain, int chunkSize)495 public static ByteString readFrom(InputStream streamToDrain, int chunkSize) throws IOException { 496 return readFrom(streamToDrain, chunkSize, chunkSize); 497 } 498 499 // Helper method that takes the chunk size range as a parameter. readFrom(InputStream streamToDrain, int minChunkSize, int maxChunkSize)500 public static ByteString readFrom(InputStream streamToDrain, int minChunkSize, int maxChunkSize) 501 throws IOException { 502 Collection<ByteString> results = new ArrayList<ByteString>(); 503 504 // copy the inbound bytes into a list of chunks; the chunk size 505 // grows exponentially to support both short and long streams. 506 int chunkSize = minChunkSize; 507 while (true) { 508 ByteString chunk = readChunk(streamToDrain, chunkSize); 509 if (chunk == null) { 510 break; 511 } 512 results.add(chunk); 513 chunkSize = Math.min(chunkSize * 2, maxChunkSize); 514 } 515 516 return ByteString.copyFrom(results); 517 } 518 519 /** 520 * Blocks until a chunk of the given size can be made from the stream, or EOF is reached. Calls 521 * read() repeatedly in case the given stream implementation doesn't completely fill the given 522 * buffer in one read() call. 523 * 524 * @return A chunk of the desired size, or else a chunk as large as was available when end of 525 * stream was reached. Returns null if the given stream had no more data in it. 526 */ readChunk(InputStream in, final int chunkSize)527 private static ByteString readChunk(InputStream in, final int chunkSize) throws IOException { 528 final byte[] buf = new byte[chunkSize]; 529 int bytesRead = 0; 530 while (bytesRead < chunkSize) { 531 final int count = in.read(buf, bytesRead, chunkSize - bytesRead); 532 if (count == -1) { 533 break; 534 } 535 bytesRead += count; 536 } 537 538 if (bytesRead == 0) { 539 return null; 540 } 541 542 // Always make a copy since InputStream could steal a reference to buf. 543 return ByteString.copyFrom(buf, 0, bytesRead); 544 } 545 546 // ================================================================= 547 // Multiple ByteStrings -> One ByteString 548 549 /** 550 * Concatenate the given {@code ByteString} to this one. Short concatenations, of total size 551 * smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are produced by copying the 552 * underlying bytes (as per Rope.java, <a 553 * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf"> 554 * BAP95 </a>. In general, the concatenate involves no copying. 555 * 556 * @param other string to concatenate 557 * @return a new {@code ByteString} instance 558 */ concat(ByteString other)559 public final ByteString concat(ByteString other) { 560 if (Integer.MAX_VALUE - size() < other.size()) { 561 throw new IllegalArgumentException( 562 "ByteString would be too long: " + size() + "+" + other.size()); 563 } 564 565 return RopeByteString.concatenate(this, other); 566 } 567 568 /** 569 * Concatenates all byte strings in the iterable and returns the result. This is designed to run 570 * in O(list size), not O(total bytes). 571 * 572 * <p>The returned {@code ByteString} is not necessarily a unique object. If the list is empty, 573 * the returned object is the singleton empty {@code ByteString}. If the list has only one 574 * element, that {@code ByteString} will be returned without copying. 575 * 576 * @param byteStrings strings to be concatenated 577 * @return new {@code ByteString} 578 */ copyFrom(Iterable<ByteString> byteStrings)579 public static ByteString copyFrom(Iterable<ByteString> byteStrings) { 580 // Determine the size; 581 final int size; 582 if (!(byteStrings instanceof Collection)) { 583 int tempSize = 0; 584 for (Iterator<ByteString> iter = byteStrings.iterator(); 585 iter.hasNext(); 586 iter.next(), ++tempSize) {} 587 size = tempSize; 588 } else { 589 size = ((Collection<ByteString>) byteStrings).size(); 590 } 591 592 if (size == 0) { 593 return EMPTY; 594 } 595 596 return balancedConcat(byteStrings.iterator(), size); 597 } 598 599 // Internal function used by copyFrom(Iterable<ByteString>). 600 // Create a balanced concatenation of the next "length" elements from the 601 // iterable. balancedConcat(Iterator<ByteString> iterator, int length)602 private static ByteString balancedConcat(Iterator<ByteString> iterator, int length) { 603 if (length < 1) { 604 throw new IllegalArgumentException(String.format("length (%s) must be >= 1", length)); 605 } 606 ByteString result; 607 if (length == 1) { 608 result = iterator.next(); 609 } else { 610 int halfLength = length >>> 1; 611 ByteString left = balancedConcat(iterator, halfLength); 612 ByteString right = balancedConcat(iterator, length - halfLength); 613 result = left.concat(right); 614 } 615 return result; 616 } 617 618 // ================================================================= 619 // ByteString -> byte[] 620 621 /** 622 * Copies bytes into a buffer at the given offset. 623 * 624 * <p>To copy a subset of bytes, you call this method on the return value of {@link 625 * #substring(int, int)}. Example: {@code byteString.substring(start, end).copyTo(target, offset)} 626 * 627 * @param target buffer to copy into 628 * @param offset in the target buffer 629 * @throws IndexOutOfBoundsException if the offset is negative or too large 630 */ copyTo(byte[] target, int offset)631 public void copyTo(byte[] target, int offset) { 632 copyTo(target, 0, offset, size()); 633 } 634 635 /** 636 * Copies bytes into a buffer. 637 * 638 * @param target buffer to copy into 639 * @param sourceOffset offset within these bytes 640 * @param targetOffset offset within the target buffer 641 * @param numberToCopy number of bytes to copy 642 * @throws IndexOutOfBoundsException if an offset or size is negative or too large 643 * @deprecated Instead, call {@code byteString.substring(sourceOffset, sourceOffset + 644 * numberToCopy).copyTo(target, targetOffset)} 645 */ 646 @Deprecated copyTo(byte[] target, int sourceOffset, int targetOffset, int numberToCopy)647 public final void copyTo(byte[] target, int sourceOffset, int targetOffset, int numberToCopy) { 648 checkRange(sourceOffset, sourceOffset + numberToCopy, size()); 649 checkRange(targetOffset, targetOffset + numberToCopy, target.length); 650 if (numberToCopy > 0) { 651 copyToInternal(target, sourceOffset, targetOffset, numberToCopy); 652 } 653 } 654 655 /** 656 * Internal (package private) implementation of {@link #copyTo(byte[],int,int,int)}. It assumes 657 * that all error checking has already been performed and that {@code numberToCopy > 0}. 658 */ copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)659 protected abstract void copyToInternal( 660 byte[] target, int sourceOffset, int targetOffset, int numberToCopy); 661 662 /** 663 * Copies bytes into a ByteBuffer. 664 * 665 * <p>To copy a subset of bytes, you call this method on the return value of {@link 666 * #substring(int, int)}. Example: {@code byteString.substring(start, end).copyTo(target)} 667 * 668 * @param target ByteBuffer to copy into. 669 * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only 670 * @throws java.nio.BufferOverflowException if the {@code target}'s remaining() space is not large 671 * enough to hold the data. 672 */ copyTo(ByteBuffer target)673 public abstract void copyTo(ByteBuffer target); 674 675 /** 676 * Copies bytes to a {@code byte[]}. 677 * 678 * @return copied bytes 679 */ toByteArray()680 public final byte[] toByteArray() { 681 final int size = size(); 682 if (size == 0) { 683 return Internal.EMPTY_BYTE_ARRAY; 684 } 685 byte[] result = new byte[size]; 686 copyToInternal(result, 0, 0, size); 687 return result; 688 } 689 690 /** 691 * Writes a copy of the contents of this byte string to the specified output stream argument. 692 * 693 * @param out the output stream to which to write the data. 694 * @throws IOException if an I/O error occurs. 695 */ writeTo(OutputStream out)696 public abstract void writeTo(OutputStream out) throws IOException; 697 698 /** 699 * Writes a specified part of this byte string to an output stream. 700 * 701 * @param out the output stream to which to write the data. 702 * @param sourceOffset offset within these bytes 703 * @param numberToWrite number of bytes to write 704 * @throws IOException if an I/O error occurs. 705 * @throws IndexOutOfBoundsException if an offset or size is negative or too large 706 */ writeTo(OutputStream out, int sourceOffset, int numberToWrite)707 final void writeTo(OutputStream out, int sourceOffset, int numberToWrite) throws IOException { 708 checkRange(sourceOffset, sourceOffset + numberToWrite, size()); 709 if (numberToWrite > 0) { 710 writeToInternal(out, sourceOffset, numberToWrite); 711 } 712 } 713 714 /** 715 * Internal version of {@link #writeTo(OutputStream,int,int)} that assumes all error checking has 716 * already been done. 717 */ writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)718 abstract void writeToInternal(OutputStream out, int sourceOffset, int numberToWrite) 719 throws IOException; 720 721 /** 722 * Writes this {@link ByteString} to the provided {@link ByteOutput}. Calling this method may 723 * result in multiple operations on the target {@link ByteOutput}. 724 * 725 * <p>This method may expose internal backing buffers of the {@link ByteString} to the {@link 726 * ByteOutput} in order to avoid additional copying overhead. It would be possible for a malicious 727 * {@link ByteOutput} to corrupt the {@link ByteString}. Use with caution! 728 * 729 * @param byteOutput the output target to receive the bytes 730 * @throws IOException if an I/O error occurs 731 * @see UnsafeByteOperations#unsafeWriteTo(ByteString, ByteOutput) 732 */ writeTo(ByteOutput byteOutput)733 abstract void writeTo(ByteOutput byteOutput) throws IOException; 734 735 /** 736 * This method behaves exactly the same as {@link #writeTo(ByteOutput)} unless the {@link 737 * ByteString} is a rope. For ropes, the leaf nodes are written in reverse order to the {@code 738 * byteOutput}. 739 * 740 * @param byteOutput the output target to receive the bytes 741 * @throws IOException if an I/O error occurs 742 * @see UnsafeByteOperations#unsafeWriteToReverse(ByteString, ByteOutput) 743 */ writeToReverse(ByteOutput byteOutput)744 abstract void writeToReverse(ByteOutput byteOutput) throws IOException; 745 746 /** 747 * Constructs a read-only {@code java.nio.ByteBuffer} whose content is equal to the contents of 748 * this byte string. The result uses the same backing array as the byte string, if possible. 749 * 750 * @return wrapped bytes 751 */ asReadOnlyByteBuffer()752 public abstract ByteBuffer asReadOnlyByteBuffer(); 753 754 /** 755 * Constructs a list of read-only {@code java.nio.ByteBuffer} objects such that the concatenation 756 * of their contents is equal to the contents of this byte string. The result uses the same 757 * backing arrays as the byte string. 758 * 759 * <p>By returning a list, implementations of this method may be able to avoid copying even when 760 * there are multiple backing arrays. 761 * 762 * @return a list of wrapped bytes 763 */ asReadOnlyByteBufferList()764 public abstract List<ByteBuffer> asReadOnlyByteBufferList(); 765 766 /** 767 * Constructs a new {@code String} by decoding the bytes using the specified charset. 768 * 769 * @param charsetName encode using this charset 770 * @return new string 771 * @throws UnsupportedEncodingException if charset isn't recognized 772 */ toString(String charsetName)773 public final String toString(String charsetName) throws UnsupportedEncodingException { 774 try { 775 return toString(Charset.forName(charsetName)); 776 } catch (UnsupportedCharsetException e) { 777 UnsupportedEncodingException exception = new UnsupportedEncodingException(charsetName); 778 exception.initCause(e); 779 throw exception; 780 } 781 } 782 783 /** 784 * Constructs a new {@code String} by decoding the bytes using the specified charset. Returns the 785 * same empty String if empty. 786 * 787 * @param charset encode using this charset 788 * @return new string 789 */ toString(Charset charset)790 public final String toString(Charset charset) { 791 return size() == 0 ? "" : toStringInternal(charset); 792 } 793 794 /** 795 * Constructs a new {@code String} by decoding the bytes using the specified charset. 796 * 797 * @param charset encode using this charset 798 * @return new string 799 */ toStringInternal(Charset charset)800 protected abstract String toStringInternal(Charset charset); 801 802 // ================================================================= 803 // UTF-8 decoding 804 805 /** 806 * Constructs a new {@code String} by decoding the bytes as UTF-8. 807 * 808 * @return new string using UTF-8 encoding 809 */ toStringUtf8()810 public final String toStringUtf8() { 811 return toString(Internal.UTF_8); 812 } 813 814 /** 815 * Tells whether this {@code ByteString} represents a well-formed UTF-8 byte sequence, such that 816 * the original bytes can be converted to a String object and then round tripped back to bytes 817 * without loss. 818 * 819 * <p>More precisely, returns {@code true} whenever: 820 * 821 * <pre>{@code 822 * Arrays.equals(byteString.toByteArray(), 823 * new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8")) 824 * }</pre> 825 * 826 * <p>This method returns {@code false} for "overlong" byte sequences, as well as for 3-byte 827 * sequences that would map to a surrogate character, in accordance with the restricted definition 828 * of UTF-8 introduced in Unicode 3.1. Note that the UTF-8 decoder included in Oracle's JDK has 829 * been modified to also reject "overlong" byte sequences, but (as of 2011) still accepts 3-byte 830 * surrogate character byte sequences. 831 * 832 * <p>See the Unicode Standard,<br> 833 * Table 3-6. <em>UTF-8 Bit Distribution</em>,<br> 834 * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>. 835 * 836 * @return whether the bytes in this {@code ByteString} are a well-formed UTF-8 byte sequence 837 */ isValidUtf8()838 public abstract boolean isValidUtf8(); 839 840 /** 841 * Tells whether the given byte sequence is a well-formed, malformed, or incomplete UTF-8 byte 842 * sequence. This method accepts and returns a partial state result, allowing the bytes for a 843 * complete UTF-8 byte sequence to be composed from multiple {@code ByteString} segments. 844 * 845 * @param state either {@code 0} (if this is the initial decoding operation) or the value returned 846 * from a call to a partial decoding method for the previous bytes 847 * @param offset offset of the first byte to check 848 * @param length number of bytes to check 849 * @return {@code -1} if the partial byte sequence is definitely malformed, {@code 0} if it is 850 * well-formed (no additional input needed), or, if the byte sequence is "incomplete", i.e. 851 * apparently terminated in the middle of a character, an opaque integer "state" value 852 * containing enough information to decode the character when passed to a subsequent 853 * invocation of a partial decoding method. 854 */ partialIsValidUtf8(int state, int offset, int length)855 protected abstract int partialIsValidUtf8(int state, int offset, int length); 856 857 // ================================================================= 858 // equals() and hashCode() 859 860 @Override 861 public abstract boolean equals(Object o); 862 863 /** Base class for leaf {@link ByteString}s (i.e. non-ropes). */ 864 abstract static class LeafByteString extends ByteString { 865 @Override getTreeDepth()866 protected final int getTreeDepth() { 867 return 0; 868 } 869 870 @Override isBalanced()871 protected final boolean isBalanced() { 872 return true; 873 } 874 875 @Override writeToReverse(ByteOutput byteOutput)876 void writeToReverse(ByteOutput byteOutput) throws IOException { 877 writeTo(byteOutput); 878 } 879 880 /** 881 * Check equality of the substring of given length of this object starting at zero with another 882 * {@code ByteString} substring starting at offset. 883 * 884 * @param other what to compare a substring in 885 * @param offset offset into other 886 * @param length number of bytes to compare 887 * @return true for equality of substrings, else false. 888 */ equalsRange(ByteString other, int offset, int length)889 abstract boolean equalsRange(ByteString other, int offset, int length); 890 } 891 892 /** 893 * Compute the hashCode using the traditional algorithm from {@link ByteString}. 894 * 895 * @return hashCode value 896 */ 897 @Override hashCode()898 public final int hashCode() { 899 int h = hash; 900 901 if (h == 0) { 902 int size = size(); 903 h = partialHash(size, 0, size); 904 if (h == 0) { 905 h = 1; 906 } 907 hash = h; 908 } 909 return h; 910 } 911 912 // ================================================================= 913 // Input stream 914 915 /** 916 * Creates an {@code InputStream} which can be used to read the bytes. 917 * 918 * <p>The {@link InputStream} returned by this method is guaranteed to be completely non-blocking. 919 * The method {@link InputStream#available()} returns the number of bytes remaining in the stream. 920 * The methods {@link InputStream#read(byte[])}, {@link InputStream#read(byte[],int,int)} and 921 * {@link InputStream#skip(long)} will read/skip as many bytes as are available. The method {@link 922 * InputStream#markSupported()} returns {@code true}. 923 * 924 * <p>The methods in the returned {@link InputStream} might <b>not</b> be thread safe. 925 * 926 * @return an input stream that returns the bytes of this byte string. 927 */ newInput()928 public abstract InputStream newInput(); 929 930 /** 931 * Creates a {@link CodedInputStream} which can be used to read the bytes. Using this is often 932 * more efficient than creating a {@link CodedInputStream} that wraps the result of {@link 933 * #newInput()}. 934 * 935 * @return stream based on wrapped data 936 */ newCodedInput()937 public abstract CodedInputStream newCodedInput(); 938 939 // ================================================================= 940 // Output stream 941 942 /** 943 * Creates a new {@link Output} with the given initial capacity. Call {@link 944 * Output#toByteString()} to create the {@code ByteString} instance. 945 * 946 * <p>A {@link ByteString.Output} offers the same functionality as a {@link 947 * ByteArrayOutputStream}, except that it returns a {@link ByteString} rather than a {@code byte} 948 * array. 949 * 950 * @param initialCapacity estimate of number of bytes to be written 951 * @return {@code OutputStream} for building a {@code ByteString} 952 */ newOutput(int initialCapacity)953 public static Output newOutput(int initialCapacity) { 954 return new Output(initialCapacity); 955 } 956 957 /** 958 * Creates a new {@link Output}. Call {@link Output#toByteString()} to create the {@code 959 * ByteString} instance. 960 * 961 * <p>A {@link ByteString.Output} offers the same functionality as a {@link 962 * ByteArrayOutputStream}, except that it returns a {@link ByteString} rather than a {@code byte 963 * array}. 964 * 965 * @return {@code OutputStream} for building a {@code ByteString} 966 */ newOutput()967 public static Output newOutput() { 968 return new Output(CONCATENATE_BY_COPY_SIZE); 969 } 970 971 /** 972 * Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to create the {@code 973 * ByteString} instance. 974 */ 975 public static final class Output extends OutputStream { 976 // Implementation note. 977 // The public methods of this class must be synchronized. ByteStrings 978 // are guaranteed to be immutable. Without some sort of locking, it could 979 // be possible for one thread to call toByteSring(), while another thread 980 // is still modifying the underlying byte array. 981 982 private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; 983 // argument passed by user, indicating initial capacity. 984 private final int initialCapacity; 985 // ByteStrings to be concatenated to create the result 986 private final ArrayList<ByteString> flushedBuffers; 987 // Total number of bytes in the ByteStrings of flushedBuffers 988 private int flushedBuffersTotalBytes; 989 // Current buffer to which we are writing 990 private byte[] buffer; 991 // Location in buffer[] to which we write the next byte. 992 private int bufferPos; 993 994 /** 995 * Creates a new ByteString output stream with the specified initial capacity. 996 * 997 * @param initialCapacity the initial capacity of the output stream. 998 */ Output(int initialCapacity)999 Output(int initialCapacity) { 1000 if (initialCapacity < 0) { 1001 throw new IllegalArgumentException("Buffer size < 0"); 1002 } 1003 this.initialCapacity = initialCapacity; 1004 this.flushedBuffers = new ArrayList<ByteString>(); 1005 this.buffer = new byte[initialCapacity]; 1006 } 1007 1008 @Override write(int b)1009 public synchronized void write(int b) { 1010 if (bufferPos == buffer.length) { 1011 flushFullBuffer(1); 1012 } 1013 buffer[bufferPos++] = (byte) b; 1014 } 1015 1016 @Override write(byte[] b, int offset, int length)1017 public synchronized void write(byte[] b, int offset, int length) { 1018 if (length <= buffer.length - bufferPos) { 1019 // The bytes can fit into the current buffer. 1020 System.arraycopy(b, offset, buffer, bufferPos, length); 1021 bufferPos += length; 1022 } else { 1023 // Use up the current buffer 1024 int copySize = buffer.length - bufferPos; 1025 System.arraycopy(b, offset, buffer, bufferPos, copySize); 1026 offset += copySize; 1027 length -= copySize; 1028 // Flush the buffer, and get a new buffer at least big enough to cover 1029 // what we still need to output 1030 flushFullBuffer(length); 1031 System.arraycopy(b, offset, buffer, /* count= */ 0, length); 1032 bufferPos = length; 1033 } 1034 } 1035 1036 /** 1037 * Creates a byte string. Its size is the current size of this output stream and its output has 1038 * been copied to it. 1039 * 1040 * @return the current contents of this output stream, as a byte string. 1041 */ toByteString()1042 public synchronized ByteString toByteString() { 1043 flushLastBuffer(); 1044 return ByteString.copyFrom(flushedBuffers); 1045 } 1046 1047 /** Implement java.util.Arrays.copyOf() for jdk 1.5. */ copyArray(byte[] buffer, int length)1048 private byte[] copyArray(byte[] buffer, int length) { 1049 byte[] result = new byte[length]; 1050 System.arraycopy(buffer, 0, result, 0, Math.min(buffer.length, length)); 1051 return result; 1052 } 1053 1054 /** 1055 * Writes the complete contents of this byte array output stream to the specified output stream 1056 * argument. 1057 * 1058 * @param out the output stream to which to write the data. 1059 * @throws IOException if an I/O error occurs. 1060 */ writeTo(OutputStream out)1061 public void writeTo(OutputStream out) throws IOException { 1062 ByteString[] cachedFlushBuffers; 1063 byte[] cachedBuffer; 1064 int cachedBufferPos; 1065 synchronized (this) { 1066 // Copy the information we need into local variables so as to hold 1067 // the lock for as short a time as possible. 1068 cachedFlushBuffers = flushedBuffers.toArray(new ByteString[flushedBuffers.size()]); 1069 cachedBuffer = buffer; 1070 cachedBufferPos = bufferPos; 1071 } 1072 for (ByteString byteString : cachedFlushBuffers) { 1073 byteString.writeTo(out); 1074 } 1075 1076 out.write(copyArray(cachedBuffer, cachedBufferPos)); 1077 } 1078 1079 /** 1080 * Returns the current size of the output stream. 1081 * 1082 * @return the current size of the output stream 1083 */ size()1084 public synchronized int size() { 1085 return flushedBuffersTotalBytes + bufferPos; 1086 } 1087 1088 /** 1089 * Resets this stream, so that all currently accumulated output in the output stream is 1090 * discarded. The output stream can be used again, reusing the already allocated buffer space. 1091 */ reset()1092 public synchronized void reset() { 1093 flushedBuffers.clear(); 1094 flushedBuffersTotalBytes = 0; 1095 bufferPos = 0; 1096 } 1097 1098 @Override toString()1099 public String toString() { 1100 return String.format( 1101 "<ByteString.Output@%s size=%d>", 1102 Integer.toHexString(System.identityHashCode(this)), size()); 1103 } 1104 1105 /** 1106 * Internal function used by writers. The current buffer is full, and the writer needs a new 1107 * buffer whose size is at least the specified minimum size. 1108 */ flushFullBuffer(int minSize)1109 private void flushFullBuffer(int minSize) { 1110 flushedBuffers.add(new LiteralByteString(buffer)); 1111 flushedBuffersTotalBytes += buffer.length; 1112 // We want to increase our total capacity by 50%, but as a minimum, 1113 // the new buffer should also at least be >= minSize and 1114 // >= initial Capacity. 1115 int newSize = Math.max(initialCapacity, Math.max(minSize, flushedBuffersTotalBytes >>> 1)); 1116 buffer = new byte[newSize]; 1117 bufferPos = 0; 1118 } 1119 1120 /** 1121 * Internal function used by {@link #toByteString()}. The current buffer may or may not be full, 1122 * but it needs to be flushed. 1123 */ flushLastBuffer()1124 private void flushLastBuffer() { 1125 if (bufferPos < buffer.length) { 1126 if (bufferPos > 0) { 1127 byte[] bufferCopy = copyArray(buffer, bufferPos); 1128 flushedBuffers.add(new LiteralByteString(bufferCopy)); 1129 } 1130 // We reuse this buffer for further writes. 1131 } else { 1132 // Buffer is completely full. Huzzah. 1133 flushedBuffers.add(new LiteralByteString(buffer)); 1134 // 99% of the time, we're not going to use this OutputStream again. 1135 // We set buffer to an empty byte stream so that we're handling this 1136 // case without wasting space. In the rare case that more writes 1137 // *do* occur, this empty buffer will be flushed and an appropriately 1138 // sized new buffer will be created. 1139 buffer = EMPTY_BYTE_ARRAY; 1140 } 1141 flushedBuffersTotalBytes += bufferPos; 1142 bufferPos = 0; 1143 } 1144 } 1145 1146 /** 1147 * Constructs a new {@code ByteString} builder, which allows you to efficiently construct a {@code 1148 * ByteString} by writing to a {@link CodedOutputStream}. Using this is much more efficient than 1149 * calling {@code newOutput()} and wrapping that in a {@code CodedOutputStream}. 1150 * 1151 * <p>This is package-private because it's a somewhat confusing interface. Users can call {@link 1152 * Message#toByteString()} instead of calling this directly. 1153 * 1154 * @param size The target byte size of the {@code ByteString}. You must write exactly this many 1155 * bytes before building the result. 1156 * @return the builder 1157 */ newCodedBuilder(int size)1158 static CodedBuilder newCodedBuilder(int size) { 1159 return new CodedBuilder(size); 1160 } 1161 1162 /** See {@link ByteString#newCodedBuilder(int)}. */ 1163 static final class CodedBuilder { 1164 private final CodedOutputStream output; 1165 private final byte[] buffer; 1166 CodedBuilder(int size)1167 private CodedBuilder(int size) { 1168 buffer = new byte[size]; 1169 output = CodedOutputStream.newInstance(buffer); 1170 } 1171 build()1172 public ByteString build() { 1173 output.checkNoSpaceLeft(); 1174 1175 // We can be confident that the CodedOutputStream will not modify the 1176 // underlying bytes anymore because it already wrote all of them. So, 1177 // no need to make a copy. 1178 return new LiteralByteString(buffer); 1179 } 1180 getCodedOutput()1181 public CodedOutputStream getCodedOutput() { 1182 return output; 1183 } 1184 } 1185 1186 // ================================================================= 1187 // Methods {@link RopeByteString} needs on instances, which aren't part of the 1188 // public API. 1189 1190 /** 1191 * Return the depth of the tree representing this {@code ByteString}, if any, whose root is this 1192 * node. If this is a leaf node, return 0. 1193 * 1194 * @return tree depth or zero 1195 */ getTreeDepth()1196 protected abstract int getTreeDepth(); 1197 1198 /** 1199 * Return {@code true} if this ByteString is literal (a leaf node) or a flat-enough tree in the 1200 * sense of {@link RopeByteString}. 1201 * 1202 * @return true if the tree is flat enough 1203 */ isBalanced()1204 protected abstract boolean isBalanced(); 1205 1206 /** 1207 * Return the cached hash code if available. 1208 * 1209 * @return value of cached hash code or 0 if not computed yet 1210 */ peekCachedHashCode()1211 protected final int peekCachedHashCode() { 1212 return hash; 1213 } 1214 1215 /** 1216 * Compute the hash across the value bytes starting with the given hash, and return the result. 1217 * This is used to compute the hash across strings represented as a set of pieces by allowing the 1218 * hash computation to be continued from piece to piece. 1219 * 1220 * @param h starting hash value 1221 * @param offset offset into this value to start looking at data values 1222 * @param length number of data values to include in the hash computation 1223 * @return ending hash value 1224 */ partialHash(int h, int offset, int length)1225 protected abstract int partialHash(int h, int offset, int length); 1226 1227 /** 1228 * Checks that the given index falls within the specified array size. 1229 * 1230 * @param index the index position to be tested 1231 * @param size the length of the array 1232 * @throws IndexOutOfBoundsException if the index does not fall within the array. 1233 */ checkIndex(int index, int size)1234 static void checkIndex(int index, int size) { 1235 if ((index | (size - (index + 1))) < 0) { 1236 if (index < 0) { 1237 throw new ArrayIndexOutOfBoundsException("Index < 0: " + index); 1238 } 1239 throw new ArrayIndexOutOfBoundsException("Index > length: " + index + ", " + size); 1240 } 1241 } 1242 1243 /** 1244 * Checks that the given range falls within the bounds of an array 1245 * 1246 * @param startIndex the start index of the range (inclusive) 1247 * @param endIndex the end index of the range (exclusive) 1248 * @param size the size of the array. 1249 * @return the length of the range. 1250 * @throws IndexOutOfBoundsException some or all of the range falls outside of the array. 1251 */ checkRange(int startIndex, int endIndex, int size)1252 static int checkRange(int startIndex, int endIndex, int size) { 1253 final int length = endIndex - startIndex; 1254 if ((startIndex | endIndex | length | (size - endIndex)) < 0) { 1255 if (startIndex < 0) { 1256 throw new IndexOutOfBoundsException("Beginning index: " + startIndex + " < 0"); 1257 } 1258 if (endIndex < startIndex) { 1259 throw new IndexOutOfBoundsException( 1260 "Beginning index larger than ending index: " + startIndex + ", " + endIndex); 1261 } 1262 // endIndex >= size 1263 throw new IndexOutOfBoundsException("End index: " + endIndex + " >= " + size); 1264 } 1265 return length; 1266 } 1267 1268 @Override toString()1269 public final String toString() { 1270 return String.format( 1271 "<ByteString@%s size=%d>", Integer.toHexString(System.identityHashCode(this)), size()); 1272 } 1273 1274 /** 1275 * This class implements a {@link com.google.protobuf.ByteString} backed by a single array of 1276 * bytes, contiguous in memory. It supports substring by pointing to only a sub-range of the 1277 * underlying byte array, meaning that a substring will reference the full byte-array of the 1278 * string it's made from, exactly as with {@link String}. 1279 * 1280 * @author carlanton@google.com (Carl Haverl) 1281 */ 1282 // Keep this class private to avoid deadlocks in classloading across threads as ByteString's 1283 // static initializer loads LiteralByteString and another thread loads LiteralByteString. 1284 private static class LiteralByteString extends ByteString.LeafByteString { 1285 private static final long serialVersionUID = 1L; 1286 1287 protected final byte[] bytes; 1288 1289 /** 1290 * Creates a {@code LiteralByteString} backed by the given array, without copying. 1291 * 1292 * @param bytes array to wrap 1293 */ LiteralByteString(byte[] bytes)1294 LiteralByteString(byte[] bytes) { 1295 if (bytes == null) { 1296 throw new NullPointerException(); 1297 } 1298 this.bytes = bytes; 1299 } 1300 1301 @Override byteAt(int index)1302 public byte byteAt(int index) { 1303 // Unlike most methods in this class, this one is a direct implementation 1304 // ignoring the potential offset because we need to do range-checking in the 1305 // substring case anyway. 1306 return bytes[index]; 1307 } 1308 1309 @Override internalByteAt(int index)1310 byte internalByteAt(int index) { 1311 return bytes[index]; 1312 } 1313 1314 @Override size()1315 public int size() { 1316 return bytes.length; 1317 } 1318 1319 // ================================================================= 1320 // ByteString -> substring 1321 1322 @Override substring(int beginIndex, int endIndex)1323 public final ByteString substring(int beginIndex, int endIndex) { 1324 final int length = checkRange(beginIndex, endIndex, size()); 1325 1326 if (length == 0) { 1327 return ByteString.EMPTY; 1328 } 1329 1330 return new BoundedByteString(bytes, getOffsetIntoBytes() + beginIndex, length); 1331 } 1332 1333 // ================================================================= 1334 // ByteString -> byte[] 1335 1336 @Override copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)1337 protected void copyToInternal( 1338 byte[] target, int sourceOffset, int targetOffset, int numberToCopy) { 1339 // Optimized form, not for subclasses, since we don't call 1340 // getOffsetIntoBytes() or check the 'numberToCopy' parameter. 1341 // TODO(nathanmittler): Is not calling getOffsetIntoBytes really saving that much? 1342 System.arraycopy(bytes, sourceOffset, target, targetOffset, numberToCopy); 1343 } 1344 1345 @Override copyTo(ByteBuffer target)1346 public final void copyTo(ByteBuffer target) { 1347 target.put(bytes, getOffsetIntoBytes(), size()); // Copies bytes 1348 } 1349 1350 @Override asReadOnlyByteBuffer()1351 public final ByteBuffer asReadOnlyByteBuffer() { 1352 return ByteBuffer.wrap(bytes, getOffsetIntoBytes(), size()).asReadOnlyBuffer(); 1353 } 1354 1355 @Override asReadOnlyByteBufferList()1356 public final List<ByteBuffer> asReadOnlyByteBufferList() { 1357 return Collections.singletonList(asReadOnlyByteBuffer()); 1358 } 1359 1360 @Override writeTo(OutputStream outputStream)1361 public final void writeTo(OutputStream outputStream) throws IOException { 1362 outputStream.write(toByteArray()); 1363 } 1364 1365 @Override writeToInternal(OutputStream outputStream, int sourceOffset, int numberToWrite)1366 final void writeToInternal(OutputStream outputStream, int sourceOffset, int numberToWrite) 1367 throws IOException { 1368 outputStream.write(bytes, getOffsetIntoBytes() + sourceOffset, numberToWrite); 1369 } 1370 1371 @Override writeTo(ByteOutput output)1372 final void writeTo(ByteOutput output) throws IOException { 1373 output.writeLazy(bytes, getOffsetIntoBytes(), size()); 1374 } 1375 1376 @Override toStringInternal(Charset charset)1377 protected final String toStringInternal(Charset charset) { 1378 return new String(bytes, getOffsetIntoBytes(), size(), charset); 1379 } 1380 1381 // ================================================================= 1382 // UTF-8 decoding 1383 1384 @Override isValidUtf8()1385 public final boolean isValidUtf8() { 1386 int offset = getOffsetIntoBytes(); 1387 return Utf8.isValidUtf8(bytes, offset, offset + size()); 1388 } 1389 1390 @Override partialIsValidUtf8(int state, int offset, int length)1391 protected final int partialIsValidUtf8(int state, int offset, int length) { 1392 int index = getOffsetIntoBytes() + offset; 1393 return Utf8.partialIsValidUtf8(state, bytes, index, index + length); 1394 } 1395 1396 // ================================================================= 1397 // equals() and hashCode() 1398 1399 @Override equals(Object other)1400 public final boolean equals(Object other) { 1401 if (other == this) { 1402 return true; 1403 } 1404 if (!(other instanceof ByteString)) { 1405 return false; 1406 } 1407 1408 if (size() != ((ByteString) other).size()) { 1409 return false; 1410 } 1411 if (size() == 0) { 1412 return true; 1413 } 1414 1415 if (other instanceof LiteralByteString) { 1416 LiteralByteString otherAsLiteral = (LiteralByteString) other; 1417 // If we know the hash codes and they are not equal, we know the byte 1418 // strings are not equal. 1419 int thisHash = peekCachedHashCode(); 1420 int thatHash = otherAsLiteral.peekCachedHashCode(); 1421 if (thisHash != 0 && thatHash != 0 && thisHash != thatHash) { 1422 return false; 1423 } 1424 1425 return equalsRange((LiteralByteString) other, 0, size()); 1426 } else { 1427 // RopeByteString and NioByteString. 1428 return other.equals(this); 1429 } 1430 } 1431 1432 /** 1433 * Check equality of the substring of given length of this object starting at zero with another 1434 * {@code LiteralByteString} substring starting at offset. 1435 * 1436 * @param other what to compare a substring in 1437 * @param offset offset into other 1438 * @param length number of bytes to compare 1439 * @return true for equality of substrings, else false. 1440 */ 1441 @Override equalsRange(ByteString other, int offset, int length)1442 final boolean equalsRange(ByteString other, int offset, int length) { 1443 if (length > other.size()) { 1444 throw new IllegalArgumentException("Length too large: " + length + size()); 1445 } 1446 if (offset + length > other.size()) { 1447 throw new IllegalArgumentException( 1448 "Ran off end of other: " + offset + ", " + length + ", " + other.size()); 1449 } 1450 1451 if (other instanceof LiteralByteString) { 1452 LiteralByteString lbsOther = (LiteralByteString) other; 1453 byte[] thisBytes = bytes; 1454 byte[] otherBytes = lbsOther.bytes; 1455 int thisLimit = getOffsetIntoBytes() + length; 1456 for (int thisIndex = getOffsetIntoBytes(), 1457 otherIndex = lbsOther.getOffsetIntoBytes() + offset; 1458 (thisIndex < thisLimit); 1459 ++thisIndex, ++otherIndex) { 1460 if (thisBytes[thisIndex] != otherBytes[otherIndex]) { 1461 return false; 1462 } 1463 } 1464 return true; 1465 } 1466 1467 return other.substring(offset, offset + length).equals(substring(0, length)); 1468 } 1469 1470 @Override partialHash(int h, int offset, int length)1471 protected final int partialHash(int h, int offset, int length) { 1472 return Internal.partialHash(h, bytes, getOffsetIntoBytes() + offset, length); 1473 } 1474 1475 // ================================================================= 1476 // Input stream 1477 1478 @Override newInput()1479 public final InputStream newInput() { 1480 return new ByteArrayInputStream(bytes, getOffsetIntoBytes(), size()); // No copy 1481 } 1482 1483 @Override newCodedInput()1484 public final CodedInputStream newCodedInput() { 1485 // We trust CodedInputStream not to modify the bytes, or to give anyone 1486 // else access to them. 1487 return CodedInputStream.newInstance( 1488 bytes, getOffsetIntoBytes(), size(), /* bufferIsImmutable= */ true); 1489 } 1490 1491 // ================================================================= 1492 // Internal methods 1493 1494 /** 1495 * Offset into {@code bytes[]} to use, non-zero for substrings. 1496 * 1497 * @return always 0 for this class 1498 */ getOffsetIntoBytes()1499 protected int getOffsetIntoBytes() { 1500 return 0; 1501 } 1502 } 1503 1504 /** 1505 * This class is used to represent the substring of a {@link ByteString} over a single byte array. 1506 * In terms of the public API of {@link ByteString}, you end up here by calling {@link 1507 * ByteString#copyFrom(byte[])} followed by {@link ByteString#substring(int, int)}. 1508 * 1509 * <p>This class contains most of the overhead involved in creating a substring from a {@link 1510 * LiteralByteString}. The overhead involves some range-checking and two extra fields. 1511 * 1512 * @author carlanton@google.com (Carl Haverl) 1513 */ 1514 // Keep this class private to avoid deadlocks in classloading across threads as ByteString's 1515 // static initializer loads LiteralByteString and another thread loads BoundedByteString. 1516 private static final class BoundedByteString extends LiteralByteString { 1517 1518 private final int bytesOffset; 1519 private final int bytesLength; 1520 1521 /** 1522 * Creates a {@code BoundedByteString} backed by the sub-range of given array, without copying. 1523 * 1524 * @param bytes array to wrap 1525 * @param offset index to first byte to use in bytes 1526 * @param length number of bytes to use from bytes 1527 * @throws IllegalArgumentException if {@code offset < 0}, {@code length < 0}, or if {@code 1528 * offset + length > bytes.length}. 1529 */ BoundedByteString(byte[] bytes, int offset, int length)1530 BoundedByteString(byte[] bytes, int offset, int length) { 1531 super(bytes); 1532 checkRange(offset, offset + length, bytes.length); 1533 1534 this.bytesOffset = offset; 1535 this.bytesLength = length; 1536 } 1537 1538 /** 1539 * Gets the byte at the given index. Throws {@link ArrayIndexOutOfBoundsException} for 1540 * backwards-compatibility reasons although it would more properly be {@link 1541 * IndexOutOfBoundsException}. 1542 * 1543 * @param index index of byte 1544 * @return the value 1545 * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size 1546 */ 1547 @Override byteAt(int index)1548 public byte byteAt(int index) { 1549 // We must check the index ourselves as we cannot rely on Java array index 1550 // checking for substrings. 1551 checkIndex(index, size()); 1552 return bytes[bytesOffset + index]; 1553 } 1554 1555 @Override internalByteAt(int index)1556 byte internalByteAt(int index) { 1557 return bytes[bytesOffset + index]; 1558 } 1559 1560 @Override size()1561 public int size() { 1562 return bytesLength; 1563 } 1564 1565 @Override getOffsetIntoBytes()1566 protected int getOffsetIntoBytes() { 1567 return bytesOffset; 1568 } 1569 1570 // ================================================================= 1571 // ByteString -> byte[] 1572 1573 @Override copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)1574 protected void copyToInternal( 1575 byte[] target, int sourceOffset, int targetOffset, int numberToCopy) { 1576 System.arraycopy( 1577 bytes, getOffsetIntoBytes() + sourceOffset, target, targetOffset, numberToCopy); 1578 } 1579 1580 // ================================================================= 1581 // Serializable 1582 1583 private static final long serialVersionUID = 1L; 1584 writeReplace()1585 Object writeReplace() { 1586 return ByteString.wrap(toByteArray()); 1587 } 1588 readObject(@uppressWarnings"unused") ObjectInputStream in)1589 private void readObject(@SuppressWarnings("unused") ObjectInputStream in) throws IOException { 1590 throw new InvalidObjectException( 1591 "BoundedByteStream instances are not to be serialized directly"); 1592 } 1593 } 1594 } 1595