1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import java.io.ByteArrayOutputStream; 34 import java.io.IOException; 35 import java.io.InputStream; 36 import java.io.OutputStream; 37 import java.io.UnsupportedEncodingException; 38 import java.nio.ByteBuffer; 39 import java.util.ArrayList; 40 import java.util.Collection; 41 import java.util.Iterator; 42 import java.util.List; 43 import java.util.NoSuchElementException; 44 45 /** 46 * Immutable sequence of bytes. Substring is supported by sharing the reference 47 * to the immutable underlying bytes, as with {@link String}. Concatenation is 48 * likewise supported without copying (long strings) by building a tree of 49 * pieces in {@link RopeByteString}. 50 * <p> 51 * Like {@link String}, the contents of a {@link ByteString} can never be 52 * observed to change, not even in the presence of a data race or incorrect 53 * API usage in the client code. 54 * 55 * @author crazybob@google.com Bob Lee 56 * @author kenton@google.com Kenton Varda 57 * @author carlanton@google.com Carl Haverl 58 * @author martinrb@google.com Martin Buchholz 59 */ 60 public abstract class ByteString implements Iterable<Byte> { 61 62 /** 63 * When two strings to be concatenated have a combined length shorter than 64 * this, we just copy their bytes on {@link #concat(ByteString)}. 65 * The trade-off is copy size versus the overhead of creating tree nodes 66 * in {@link RopeByteString}. 67 */ 68 static final int CONCATENATE_BY_COPY_SIZE = 128; 69 70 /** 71 * When copying an InputStream into a ByteString with .readFrom(), 72 * the chunks in the underlying rope start at 256 bytes, but double 73 * each iteration up to 8192 bytes. 74 */ 75 static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b 76 static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k 77 78 /** 79 * Empty {@code ByteString}. 80 */ 81 public static final ByteString EMPTY = new LiteralByteString(new byte[0]); 82 83 // This constructor is here to prevent subclassing outside of this package, ByteString()84 ByteString() {} 85 86 /** 87 * Gets the byte at the given index. This method should be used only for 88 * random access to individual bytes. To access bytes sequentially, use the 89 * {@link ByteIterator} returned by {@link #iterator()}, and call {@link 90 * #substring(int, int)} first if necessary. 91 * 92 * @param index index of byte 93 * @return the value 94 * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size 95 */ byteAt(int index)96 public abstract byte byteAt(int index); 97 98 /** 99 * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString. 100 * To avoid auto-boxing, you may get the iterator manually and call 101 * {@link ByteIterator#nextByte()}. 102 * 103 * @return the iterator 104 */ iterator()105 public abstract ByteIterator iterator(); 106 107 /** 108 * This interface extends {@code Iterator<Byte>}, so that we can return an 109 * unboxed {@code byte}. 110 */ 111 public interface ByteIterator extends Iterator<Byte> { 112 /** 113 * An alternative to {@link Iterator#next()} that returns an 114 * unboxed primitive {@code byte}. 115 * 116 * @return the next {@code byte} in the iteration 117 * @throws NoSuchElementException if the iteration has no more elements 118 */ nextByte()119 byte nextByte(); 120 } 121 122 /** 123 * Gets the number of bytes. 124 * 125 * @return size in bytes 126 */ size()127 public abstract int size(); 128 129 /** 130 * Returns {@code true} if the size is {@code 0}, {@code false} otherwise. 131 * 132 * @return true if this is zero bytes long 133 */ isEmpty()134 public boolean isEmpty() { 135 return size() == 0; 136 } 137 138 // ================================================================= 139 // ByteString -> substring 140 141 /** 142 * Return the substring from {@code beginIndex}, inclusive, to the end of the 143 * string. 144 * 145 * @param beginIndex start at this index 146 * @return substring sharing underlying data 147 * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or 148 * {@code beginIndex > size()}. 149 */ substring(int beginIndex)150 public ByteString substring(int beginIndex) { 151 return substring(beginIndex, size()); 152 } 153 154 /** 155 * Return the substring from {@code beginIndex}, inclusive, to {@code 156 * endIndex}, exclusive. 157 * 158 * @param beginIndex start at this index 159 * @param endIndex the last character is the one before this index 160 * @return substring sharing underlying data 161 * @throws IndexOutOfBoundsException if {@code beginIndex < 0}, 162 * {@code endIndex > size()}, or {@code beginIndex > endIndex}. 163 */ substring(int beginIndex, int endIndex)164 public abstract ByteString substring(int beginIndex, int endIndex); 165 166 /** 167 * Tests if this bytestring starts with the specified prefix. 168 * Similar to {@link String#startsWith(String)} 169 * 170 * @param prefix the prefix. 171 * @return <code>true</code> if the byte sequence represented by the 172 * argument is a prefix of the byte sequence represented by 173 * this string; <code>false</code> otherwise. 174 */ startsWith(ByteString prefix)175 public boolean startsWith(ByteString prefix) { 176 return size() >= prefix.size() && 177 substring(0, prefix.size()).equals(prefix); 178 } 179 180 /** 181 * Tests if this bytestring ends with the specified suffix. 182 * Similar to {@link String#endsWith(String)} 183 * 184 * @param suffix the suffix. 185 * @return <code>true</code> if the byte sequence represented by the 186 * argument is a suffix of the byte sequence represented by 187 * this string; <code>false</code> otherwise. 188 */ endsWith(ByteString suffix)189 public boolean endsWith(ByteString suffix) { 190 return size() >= suffix.size() && 191 substring(size() - suffix.size()).equals(suffix); 192 } 193 194 // ================================================================= 195 // byte[] -> ByteString 196 197 /** 198 * Copies the given bytes into a {@code ByteString}. 199 * 200 * @param bytes source array 201 * @param offset offset in source array 202 * @param size number of bytes to copy 203 * @return new {@code ByteString} 204 */ copyFrom(byte[] bytes, int offset, int size)205 public static ByteString copyFrom(byte[] bytes, int offset, int size) { 206 byte[] copy = new byte[size]; 207 System.arraycopy(bytes, offset, copy, 0, size); 208 return new LiteralByteString(copy); 209 } 210 211 /** 212 * Copies the given bytes into a {@code ByteString}. 213 * 214 * @param bytes to copy 215 * @return new {@code ByteString} 216 */ copyFrom(byte[] bytes)217 public static ByteString copyFrom(byte[] bytes) { 218 return copyFrom(bytes, 0, bytes.length); 219 } 220 221 /** 222 * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into 223 * a {@code ByteString}. 224 * 225 * @param bytes source buffer 226 * @param size number of bytes to copy 227 * @return new {@code ByteString} 228 */ copyFrom(ByteBuffer bytes, int size)229 public static ByteString copyFrom(ByteBuffer bytes, int size) { 230 byte[] copy = new byte[size]; 231 bytes.get(copy); 232 return new LiteralByteString(copy); 233 } 234 235 /** 236 * Copies the remaining bytes from a {@code java.nio.ByteBuffer} into 237 * a {@code ByteString}. 238 * 239 * @param bytes sourceBuffer 240 * @return new {@code ByteString} 241 */ copyFrom(ByteBuffer bytes)242 public static ByteString copyFrom(ByteBuffer bytes) { 243 return copyFrom(bytes, bytes.remaining()); 244 } 245 246 /** 247 * Encodes {@code text} into a sequence of bytes using the named charset 248 * and returns the result as a {@code ByteString}. 249 * 250 * @param text source string 251 * @param charsetName encoding to use 252 * @return new {@code ByteString} 253 * @throws UnsupportedEncodingException if the encoding isn't found 254 */ copyFrom(String text, String charsetName)255 public static ByteString copyFrom(String text, String charsetName) 256 throws UnsupportedEncodingException { 257 return new LiteralByteString(text.getBytes(charsetName)); 258 } 259 260 /** 261 * Encodes {@code text} into a sequence of UTF-8 bytes and returns the 262 * result as a {@code ByteString}. 263 * 264 * @param text source string 265 * @return new {@code ByteString} 266 */ copyFromUtf8(String text)267 public static ByteString copyFromUtf8(String text) { 268 try { 269 return new LiteralByteString(text.getBytes("UTF-8")); 270 } catch (UnsupportedEncodingException e) { 271 throw new RuntimeException("UTF-8 not supported?", e); 272 } 273 } 274 275 // ================================================================= 276 // InputStream -> ByteString 277 278 /** 279 * Completely reads the given stream's bytes into a 280 * {@code ByteString}, blocking if necessary until all bytes are 281 * read through to the end of the stream. 282 * 283 * <b>Performance notes:</b> The returned {@code ByteString} is an 284 * immutable tree of byte arrays ("chunks") of the stream data. The 285 * first chunk is small, with subsequent chunks each being double 286 * the size, up to 8K. If the caller knows the precise length of 287 * the stream and wishes to avoid all unnecessary copies and 288 * allocations, consider using the two-argument version of this 289 * method, below. 290 * 291 * @param streamToDrain The source stream, which is read completely 292 * but not closed. 293 * @return A new {@code ByteString} which is made up of chunks of 294 * various sizes, depending on the behavior of the underlying 295 * stream. 296 * @throws IOException IOException is thrown if there is a problem 297 * reading the underlying stream. 298 */ readFrom(InputStream streamToDrain)299 public static ByteString readFrom(InputStream streamToDrain) 300 throws IOException { 301 return readFrom( 302 streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE); 303 } 304 305 /** 306 * Completely reads the given stream's bytes into a 307 * {@code ByteString}, blocking if necessary until all bytes are 308 * read through to the end of the stream. 309 * 310 * <b>Performance notes:</b> The returned {@code ByteString} is an 311 * immutable tree of byte arrays ("chunks") of the stream data. The 312 * chunkSize parameter sets the size of these byte arrays. In 313 * particular, if the chunkSize is precisely the same as the length 314 * of the stream, unnecessary allocations and copies will be 315 * avoided. Otherwise, the chunks will be of the given size, except 316 * for the last chunk, which will be resized (via a reallocation and 317 * copy) to contain the remainder of the stream. 318 * 319 * @param streamToDrain The source stream, which is read completely 320 * but not closed. 321 * @param chunkSize The size of the chunks in which to read the 322 * stream. 323 * @return A new {@code ByteString} which is made up of chunks of 324 * the given size. 325 * @throws IOException IOException is thrown if there is a problem 326 * reading the underlying stream. 327 */ readFrom(InputStream streamToDrain, int chunkSize)328 public static ByteString readFrom(InputStream streamToDrain, int chunkSize) 329 throws IOException { 330 return readFrom(streamToDrain, chunkSize, chunkSize); 331 } 332 333 // Helper method that takes the chunk size range as a parameter. readFrom(InputStream streamToDrain, int minChunkSize, int maxChunkSize)334 public static ByteString readFrom(InputStream streamToDrain, int minChunkSize, 335 int maxChunkSize) throws IOException { 336 Collection<ByteString> results = new ArrayList<ByteString>(); 337 338 // copy the inbound bytes into a list of chunks; the chunk size 339 // grows exponentially to support both short and long streams. 340 int chunkSize = minChunkSize; 341 while (true) { 342 ByteString chunk = readChunk(streamToDrain, chunkSize); 343 if (chunk == null) { 344 break; 345 } 346 results.add(chunk); 347 chunkSize = Math.min(chunkSize * 2, maxChunkSize); 348 } 349 350 return ByteString.copyFrom(results); 351 } 352 353 /** 354 * Blocks until a chunk of the given size can be made from the 355 * stream, or EOF is reached. Calls read() repeatedly in case the 356 * given stream implementation doesn't completely fill the given 357 * buffer in one read() call. 358 * 359 * @return A chunk of the desired size, or else a chunk as large as 360 * was available when end of stream was reached. Returns null if the 361 * given stream had no more data in it. 362 */ readChunk(InputStream in, final int chunkSize)363 private static ByteString readChunk(InputStream in, final int chunkSize) 364 throws IOException { 365 final byte[] buf = new byte[chunkSize]; 366 int bytesRead = 0; 367 while (bytesRead < chunkSize) { 368 final int count = in.read(buf, bytesRead, chunkSize - bytesRead); 369 if (count == -1) { 370 break; 371 } 372 bytesRead += count; 373 } 374 375 if (bytesRead == 0) { 376 return null; 377 } else { 378 return ByteString.copyFrom(buf, 0, bytesRead); 379 } 380 } 381 382 // ================================================================= 383 // Multiple ByteStrings -> One ByteString 384 385 /** 386 * Concatenate the given {@code ByteString} to this one. Short concatenations, 387 * of total size smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are 388 * produced by copying the underlying bytes (as per Rope.java, <a 389 * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf"> 390 * BAP95 </a>. In general, the concatenate involves no copying. 391 * 392 * @param other string to concatenate 393 * @return a new {@code ByteString} instance 394 */ concat(ByteString other)395 public ByteString concat(ByteString other) { 396 int thisSize = size(); 397 int otherSize = other.size(); 398 if ((long) thisSize + otherSize >= Integer.MAX_VALUE) { 399 throw new IllegalArgumentException("ByteString would be too long: " + 400 thisSize + "+" + otherSize); 401 } 402 403 return RopeByteString.concatenate(this, other); 404 } 405 406 /** 407 * Concatenates all byte strings in the iterable and returns the result. 408 * This is designed to run in O(list size), not O(total bytes). 409 * 410 * <p>The returned {@code ByteString} is not necessarily a unique object. 411 * If the list is empty, the returned object is the singleton empty 412 * {@code ByteString}. If the list has only one element, that 413 * {@code ByteString} will be returned without copying. 414 * 415 * @param byteStrings strings to be concatenated 416 * @return new {@code ByteString} 417 */ copyFrom(Iterable<ByteString> byteStrings)418 public static ByteString copyFrom(Iterable<ByteString> byteStrings) { 419 Collection<ByteString> collection; 420 if (!(byteStrings instanceof Collection)) { 421 collection = new ArrayList<ByteString>(); 422 for (ByteString byteString : byteStrings) { 423 collection.add(byteString); 424 } 425 } else { 426 collection = (Collection<ByteString>) byteStrings; 427 } 428 ByteString result; 429 if (collection.isEmpty()) { 430 result = EMPTY; 431 } else { 432 result = balancedConcat(collection.iterator(), collection.size()); 433 } 434 return result; 435 } 436 437 // Internal function used by copyFrom(Iterable<ByteString>). 438 // Create a balanced concatenation of the next "length" elements from the 439 // iterable. balancedConcat(Iterator<ByteString> iterator, int length)440 private static ByteString balancedConcat(Iterator<ByteString> iterator, 441 int length) { 442 assert length >= 1; 443 ByteString result; 444 if (length == 1) { 445 result = iterator.next(); 446 } else { 447 int halfLength = length >>> 1; 448 ByteString left = balancedConcat(iterator, halfLength); 449 ByteString right = balancedConcat(iterator, length - halfLength); 450 result = left.concat(right); 451 } 452 return result; 453 } 454 455 // ================================================================= 456 // ByteString -> byte[] 457 458 /** 459 * Copies bytes into a buffer at the given offset. 460 * 461 * @param target buffer to copy into 462 * @param offset in the target buffer 463 * @throws IndexOutOfBoundsException if the offset is negative or too large 464 */ copyTo(byte[] target, int offset)465 public void copyTo(byte[] target, int offset) { 466 copyTo(target, 0, offset, size()); 467 } 468 469 /** 470 * Copies bytes into a buffer. 471 * 472 * @param target buffer to copy into 473 * @param sourceOffset offset within these bytes 474 * @param targetOffset offset within the target buffer 475 * @param numberToCopy number of bytes to copy 476 * @throws IndexOutOfBoundsException if an offset or size is negative or too 477 * large 478 */ copyTo(byte[] target, int sourceOffset, int targetOffset, int numberToCopy)479 public void copyTo(byte[] target, int sourceOffset, int targetOffset, 480 int numberToCopy) { 481 if (sourceOffset < 0) { 482 throw new IndexOutOfBoundsException("Source offset < 0: " + sourceOffset); 483 } 484 if (targetOffset < 0) { 485 throw new IndexOutOfBoundsException("Target offset < 0: " + targetOffset); 486 } 487 if (numberToCopy < 0) { 488 throw new IndexOutOfBoundsException("Length < 0: " + numberToCopy); 489 } 490 if (sourceOffset + numberToCopy > size()) { 491 throw new IndexOutOfBoundsException( 492 "Source end offset < 0: " + (sourceOffset + numberToCopy)); 493 } 494 if (targetOffset + numberToCopy > target.length) { 495 throw new IndexOutOfBoundsException( 496 "Target end offset < 0: " + (targetOffset + numberToCopy)); 497 } 498 if (numberToCopy > 0) { 499 copyToInternal(target, sourceOffset, targetOffset, numberToCopy); 500 } 501 } 502 503 /** 504 * Internal (package private) implementation of 505 * @link{#copyTo(byte[],int,int,int}. 506 * It assumes that all error checking has already been performed and that 507 * @code{numberToCopy > 0}. 508 */ copyToInternal(byte[] target, int sourceOffset, int targetOffset, int numberToCopy)509 protected abstract void copyToInternal(byte[] target, int sourceOffset, 510 int targetOffset, int numberToCopy); 511 512 /** 513 * Copies bytes into a ByteBuffer. 514 * 515 * @param target ByteBuffer to copy into. 516 * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only 517 * @throws java.nio.BufferOverflowException if the {@code target}'s 518 * remaining() space is not large enough to hold the data. 519 */ copyTo(ByteBuffer target)520 public abstract void copyTo(ByteBuffer target); 521 522 /** 523 * Copies bytes to a {@code byte[]}. 524 * 525 * @return copied bytes 526 */ toByteArray()527 public byte[] toByteArray() { 528 int size = size(); 529 if (size == 0) { 530 return Internal.EMPTY_BYTE_ARRAY; 531 } 532 byte[] result = new byte[size]; 533 copyToInternal(result, 0, 0, size); 534 return result; 535 } 536 537 /** 538 * Writes the complete contents of this byte string to 539 * the specified output stream argument. 540 * 541 * @param out the output stream to which to write the data. 542 * @throws IOException if an I/O error occurs. 543 */ writeTo(OutputStream out)544 public abstract void writeTo(OutputStream out) throws IOException; 545 546 /** 547 * Writes a specified part of this byte string to an output stream. 548 * 549 * @param out the output stream to which to write the data. 550 * @param sourceOffset offset within these bytes 551 * @param numberToWrite number of bytes to write 552 * @throws IOException if an I/O error occurs. 553 * @throws IndexOutOfBoundsException if an offset or size is negative or too 554 * large 555 */ writeTo(OutputStream out, int sourceOffset, int numberToWrite)556 void writeTo(OutputStream out, int sourceOffset, int numberToWrite) 557 throws IOException { 558 if (sourceOffset < 0) { 559 throw new IndexOutOfBoundsException("Source offset < 0: " + sourceOffset); 560 } 561 if (numberToWrite < 0) { 562 throw new IndexOutOfBoundsException("Length < 0: " + numberToWrite); 563 } 564 if (sourceOffset + numberToWrite > size()) { 565 throw new IndexOutOfBoundsException( 566 "Source end offset exceeded: " + (sourceOffset + numberToWrite)); 567 } 568 if (numberToWrite > 0) { 569 writeToInternal(out, sourceOffset, numberToWrite); 570 } 571 572 } 573 574 /** 575 * Internal version of {@link #writeTo(OutputStream,int,int)} that assumes 576 * all error checking has already been done. 577 */ writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)578 abstract void writeToInternal(OutputStream out, int sourceOffset, 579 int numberToWrite) throws IOException; 580 581 /** 582 * Constructs a read-only {@code java.nio.ByteBuffer} whose content 583 * is equal to the contents of this byte string. 584 * The result uses the same backing array as the byte string, if possible. 585 * 586 * @return wrapped bytes 587 */ asReadOnlyByteBuffer()588 public abstract ByteBuffer asReadOnlyByteBuffer(); 589 590 /** 591 * Constructs a list of read-only {@code java.nio.ByteBuffer} objects 592 * such that the concatenation of their contents is equal to the contents 593 * of this byte string. The result uses the same backing arrays as the 594 * byte string. 595 * <p> 596 * By returning a list, implementations of this method may be able to avoid 597 * copying even when there are multiple backing arrays. 598 * 599 * @return a list of wrapped bytes 600 */ asReadOnlyByteBufferList()601 public abstract List<ByteBuffer> asReadOnlyByteBufferList(); 602 603 /** 604 * Constructs a new {@code String} by decoding the bytes using the 605 * specified charset. 606 * 607 * @param charsetName encode using this charset 608 * @return new string 609 * @throws UnsupportedEncodingException if charset isn't recognized 610 */ toString(String charsetName)611 public abstract String toString(String charsetName) 612 throws UnsupportedEncodingException; 613 614 // ================================================================= 615 // UTF-8 decoding 616 617 /** 618 * Constructs a new {@code String} by decoding the bytes as UTF-8. 619 * 620 * @return new string using UTF-8 encoding 621 */ toStringUtf8()622 public String toStringUtf8() { 623 try { 624 return toString("UTF-8"); 625 } catch (UnsupportedEncodingException e) { 626 throw new RuntimeException("UTF-8 not supported?", e); 627 } 628 } 629 630 /** 631 * Tells whether this {@code ByteString} represents a well-formed UTF-8 632 * byte sequence, such that the original bytes can be converted to a 633 * String object and then round tripped back to bytes without loss. 634 * 635 * <p>More precisely, returns {@code true} whenever: <pre> {@code 636 * Arrays.equals(byteString.toByteArray(), 637 * new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8")) 638 * }</pre> 639 * 640 * <p>This method returns {@code false} for "overlong" byte sequences, 641 * as well as for 3-byte sequences that would map to a surrogate 642 * character, in accordance with the restricted definition of UTF-8 643 * introduced in Unicode 3.1. Note that the UTF-8 decoder included in 644 * Oracle's JDK has been modified to also reject "overlong" byte 645 * sequences, but (as of 2011) still accepts 3-byte surrogate 646 * character byte sequences. 647 * 648 * <p>See the Unicode Standard,</br> 649 * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br> 650 * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>. 651 * 652 * @return whether the bytes in this {@code ByteString} are a 653 * well-formed UTF-8 byte sequence 654 */ isValidUtf8()655 public abstract boolean isValidUtf8(); 656 657 /** 658 * Tells whether the given byte sequence is a well-formed, malformed, or 659 * incomplete UTF-8 byte sequence. This method accepts and returns a partial 660 * state result, allowing the bytes for a complete UTF-8 byte sequence to be 661 * composed from multiple {@code ByteString} segments. 662 * 663 * @param state either {@code 0} (if this is the initial decoding operation) 664 * or the value returned from a call to a partial decoding method for the 665 * previous bytes 666 * @param offset offset of the first byte to check 667 * @param length number of bytes to check 668 * 669 * @return {@code -1} if the partial byte sequence is definitely malformed, 670 * {@code 0} if it is well-formed (no additional input needed), or, if the 671 * byte sequence is "incomplete", i.e. apparently terminated in the middle of 672 * a character, an opaque integer "state" value containing enough information 673 * to decode the character when passed to a subsequent invocation of a 674 * partial decoding method. 675 */ partialIsValidUtf8(int state, int offset, int length)676 protected abstract int partialIsValidUtf8(int state, int offset, int length); 677 678 // ================================================================= 679 // equals() and hashCode() 680 681 @Override equals(Object o)682 public abstract boolean equals(Object o); 683 684 /** 685 * Return a non-zero hashCode depending only on the sequence of bytes 686 * in this ByteString. 687 * 688 * @return hashCode value for this object 689 */ 690 @Override hashCode()691 public abstract int hashCode(); 692 693 // ================================================================= 694 // Input stream 695 696 /** 697 * Creates an {@code InputStream} which can be used to read the bytes. 698 * <p> 699 * The {@link InputStream} returned by this method is guaranteed to be 700 * completely non-blocking. The method {@link InputStream#available()} 701 * returns the number of bytes remaining in the stream. The methods 702 * {@link InputStream#read(byte[]), {@link InputStream#read(byte[],int,int)} 703 * and {@link InputStream#skip(long)} will read/skip as many bytes as are 704 * available. 705 * <p> 706 * The methods in the returned {@link InputStream} might <b>not</b> be 707 * thread safe. 708 * 709 * @return an input stream that returns the bytes of this byte string. 710 */ newInput()711 public abstract InputStream newInput(); 712 713 /** 714 * Creates a {@link CodedInputStream} which can be used to read the bytes. 715 * Using this is often more efficient than creating a {@link CodedInputStream} 716 * that wraps the result of {@link #newInput()}. 717 * 718 * @return stream based on wrapped data 719 */ newCodedInput()720 public abstract CodedInputStream newCodedInput(); 721 722 // ================================================================= 723 // Output stream 724 725 /** 726 * Creates a new {@link Output} with the given initial capacity. Call {@link 727 * Output#toByteString()} to create the {@code ByteString} instance. 728 * <p> 729 * A {@link ByteString.Output} offers the same functionality as a 730 * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString} 731 * rather than a {@code byte} array. 732 * 733 * @param initialCapacity estimate of number of bytes to be written 734 * @return {@code OutputStream} for building a {@code ByteString} 735 */ newOutput(int initialCapacity)736 public static Output newOutput(int initialCapacity) { 737 return new Output(initialCapacity); 738 } 739 740 /** 741 * Creates a new {@link Output}. Call {@link Output#toByteString()} to create 742 * the {@code ByteString} instance. 743 * <p> 744 * A {@link ByteString.Output} offers the same functionality as a 745 * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString} 746 * rather than a {@code byte array}. 747 * 748 * @return {@code OutputStream} for building a {@code ByteString} 749 */ newOutput()750 public static Output newOutput() { 751 return new Output(CONCATENATE_BY_COPY_SIZE); 752 } 753 754 /** 755 * Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to 756 * create the {@code ByteString} instance. 757 */ 758 public static final class Output extends OutputStream { 759 // Implementation note. 760 // The public methods of this class must be synchronized. ByteStrings 761 // are guaranteed to be immutable. Without some sort of locking, it could 762 // be possible for one thread to call toByteSring(), while another thread 763 // is still modifying the underlying byte array. 764 765 private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; 766 // argument passed by user, indicating initial capacity. 767 private final int initialCapacity; 768 // ByteStrings to be concatenated to create the result 769 private final ArrayList<ByteString> flushedBuffers; 770 // Total number of bytes in the ByteStrings of flushedBuffers 771 private int flushedBuffersTotalBytes; 772 // Current buffer to which we are writing 773 private byte[] buffer; 774 // Location in buffer[] to which we write the next byte. 775 private int bufferPos; 776 777 /** 778 * Creates a new ByteString output stream with the specified 779 * initial capacity. 780 * 781 * @param initialCapacity the initial capacity of the output stream. 782 */ Output(int initialCapacity)783 Output(int initialCapacity) { 784 if (initialCapacity < 0) { 785 throw new IllegalArgumentException("Buffer size < 0"); 786 } 787 this.initialCapacity = initialCapacity; 788 this.flushedBuffers = new ArrayList<ByteString>(); 789 this.buffer = new byte[initialCapacity]; 790 } 791 792 @Override write(int b)793 public synchronized void write(int b) { 794 if (bufferPos == buffer.length) { 795 flushFullBuffer(1); 796 } 797 buffer[bufferPos++] = (byte)b; 798 } 799 800 @Override write(byte[] b, int offset, int length)801 public synchronized void write(byte[] b, int offset, int length) { 802 if (length <= buffer.length - bufferPos) { 803 // The bytes can fit into the current buffer. 804 System.arraycopy(b, offset, buffer, bufferPos, length); 805 bufferPos += length; 806 } else { 807 // Use up the current buffer 808 int copySize = buffer.length - bufferPos; 809 System.arraycopy(b, offset, buffer, bufferPos, copySize); 810 offset += copySize; 811 length -= copySize; 812 // Flush the buffer, and get a new buffer at least big enough to cover 813 // what we still need to output 814 flushFullBuffer(length); 815 System.arraycopy(b, offset, buffer, 0 /* count */, length); 816 bufferPos = length; 817 } 818 } 819 820 /** 821 * Creates a byte string. Its size is the current size of this output 822 * stream and its output has been copied to it. 823 * 824 * @return the current contents of this output stream, as a byte string. 825 */ toByteString()826 public synchronized ByteString toByteString() { 827 flushLastBuffer(); 828 return ByteString.copyFrom(flushedBuffers); 829 } 830 831 /** 832 * Implement java.util.Arrays.copyOf() for jdk 1.5. 833 */ copyArray(byte[] buffer, int length)834 private byte[] copyArray(byte[] buffer, int length) { 835 byte[] result = new byte[length]; 836 System.arraycopy(buffer, 0, result, 0, Math.min(buffer.length, length)); 837 return result; 838 } 839 840 /** 841 * Writes the complete contents of this byte array output stream to 842 * the specified output stream argument. 843 * 844 * @param out the output stream to which to write the data. 845 * @throws IOException if an I/O error occurs. 846 */ writeTo(OutputStream out)847 public void writeTo(OutputStream out) throws IOException { 848 ByteString[] cachedFlushBuffers; 849 byte[] cachedBuffer; 850 int cachedBufferPos; 851 synchronized (this) { 852 // Copy the information we need into local variables so as to hold 853 // the lock for as short a time as possible. 854 cachedFlushBuffers = 855 flushedBuffers.toArray(new ByteString[flushedBuffers.size()]); 856 cachedBuffer = buffer; 857 cachedBufferPos = bufferPos; 858 } 859 for (ByteString byteString : cachedFlushBuffers) { 860 byteString.writeTo(out); 861 } 862 863 out.write(copyArray(cachedBuffer, cachedBufferPos)); 864 } 865 866 /** 867 * Returns the current size of the output stream. 868 * 869 * @return the current size of the output stream 870 */ size()871 public synchronized int size() { 872 return flushedBuffersTotalBytes + bufferPos; 873 } 874 875 /** 876 * Resets this stream, so that all currently accumulated output in the 877 * output stream is discarded. The output stream can be used again, 878 * reusing the already allocated buffer space. 879 */ reset()880 public synchronized void reset() { 881 flushedBuffers.clear(); 882 flushedBuffersTotalBytes = 0; 883 bufferPos = 0; 884 } 885 886 @Override toString()887 public String toString() { 888 return String.format("<ByteString.Output@%s size=%d>", 889 Integer.toHexString(System.identityHashCode(this)), size()); 890 } 891 892 /** 893 * Internal function used by writers. The current buffer is full, and the 894 * writer needs a new buffer whose size is at least the specified minimum 895 * size. 896 */ flushFullBuffer(int minSize)897 private void flushFullBuffer(int minSize) { 898 flushedBuffers.add(new LiteralByteString(buffer)); 899 flushedBuffersTotalBytes += buffer.length; 900 // We want to increase our total capacity by 50%, but as a minimum, 901 // the new buffer should also at least be >= minSize and 902 // >= initial Capacity. 903 int newSize = Math.max(initialCapacity, 904 Math.max(minSize, flushedBuffersTotalBytes >>> 1)); 905 buffer = new byte[newSize]; 906 bufferPos = 0; 907 } 908 909 /** 910 * Internal function used by {@link #toByteString()}. The current buffer may 911 * or may not be full, but it needs to be flushed. 912 */ flushLastBuffer()913 private void flushLastBuffer() { 914 if (bufferPos < buffer.length) { 915 if (bufferPos > 0) { 916 byte[] bufferCopy = copyArray(buffer, bufferPos); 917 flushedBuffers.add(new LiteralByteString(bufferCopy)); 918 } 919 // We reuse this buffer for further writes. 920 } else { 921 // Buffer is completely full. Huzzah. 922 flushedBuffers.add(new LiteralByteString(buffer)); 923 // 99% of the time, we're not going to use this OutputStream again. 924 // We set buffer to an empty byte stream so that we're handling this 925 // case without wasting space. In the rare case that more writes 926 // *do* occur, this empty buffer will be flushed and an appropriately 927 // sized new buffer will be created. 928 buffer = EMPTY_BYTE_ARRAY; 929 } 930 flushedBuffersTotalBytes += bufferPos; 931 bufferPos = 0; 932 } 933 } 934 935 /** 936 * Constructs a new {@code ByteString} builder, which allows you to 937 * efficiently construct a {@code ByteString} by writing to a {@link 938 * CodedOutputStream}. Using this is much more efficient than calling {@code 939 * newOutput()} and wrapping that in a {@code CodedOutputStream}. 940 * 941 * <p>This is package-private because it's a somewhat confusing interface. 942 * Users can call {@link Message#toByteString()} instead of calling this 943 * directly. 944 * 945 * @param size The target byte size of the {@code ByteString}. You must write 946 * exactly this many bytes before building the result. 947 * @return the builder 948 */ newCodedBuilder(int size)949 static CodedBuilder newCodedBuilder(int size) { 950 return new CodedBuilder(size); 951 } 952 953 /** See {@link ByteString#newCodedBuilder(int)}. */ 954 static final class CodedBuilder { 955 private final CodedOutputStream output; 956 private final byte[] buffer; 957 CodedBuilder(int size)958 private CodedBuilder(int size) { 959 buffer = new byte[size]; 960 output = CodedOutputStream.newInstance(buffer); 961 } 962 build()963 public ByteString build() { 964 output.checkNoSpaceLeft(); 965 966 // We can be confident that the CodedOutputStream will not modify the 967 // underlying bytes anymore because it already wrote all of them. So, 968 // no need to make a copy. 969 return new LiteralByteString(buffer); 970 } 971 getCodedOutput()972 public CodedOutputStream getCodedOutput() { 973 return output; 974 } 975 } 976 977 // ================================================================= 978 // Methods {@link RopeByteString} needs on instances, which aren't part of the 979 // public API. 980 981 /** 982 * Return the depth of the tree representing this {@code ByteString}, if any, 983 * whose root is this node. If this is a leaf node, return 0. 984 * 985 * @return tree depth or zero 986 */ getTreeDepth()987 protected abstract int getTreeDepth(); 988 989 /** 990 * Return {@code true} if this ByteString is literal (a leaf node) or a 991 * flat-enough tree in the sense of {@link RopeByteString}. 992 * 993 * @return true if the tree is flat enough 994 */ isBalanced()995 protected abstract boolean isBalanced(); 996 997 /** 998 * Return the cached hash code if available. 999 * 1000 * @return value of cached hash code or 0 if not computed yet 1001 */ peekCachedHashCode()1002 protected abstract int peekCachedHashCode(); 1003 1004 /** 1005 * Compute the hash across the value bytes starting with the given hash, and 1006 * return the result. This is used to compute the hash across strings 1007 * represented as a set of pieces by allowing the hash computation to be 1008 * continued from piece to piece. 1009 * 1010 * @param h starting hash value 1011 * @param offset offset into this value to start looking at data values 1012 * @param length number of data values to include in the hash computation 1013 * @return ending hash value 1014 */ partialHash(int h, int offset, int length)1015 protected abstract int partialHash(int h, int offset, int length); 1016 1017 @Override toString()1018 public String toString() { 1019 return String.format("<ByteString@%s size=%d>", 1020 Integer.toHexString(System.identityHashCode(this)), size()); 1021 } 1022 } 1023