1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import java.io.ByteArrayInputStream; 34 import java.io.IOException; 35 import java.io.InputStream; 36 import java.io.InvalidObjectException; 37 import java.io.ObjectInputStream; 38 import java.io.OutputStream; 39 import java.nio.ByteBuffer; 40 import java.nio.charset.Charset; 41 import java.util.ArrayDeque; 42 import java.util.ArrayList; 43 import java.util.Arrays; 44 import java.util.Iterator; 45 import java.util.List; 46 import java.util.NoSuchElementException; 47 48 /** 49 * Class to represent {@code ByteStrings} formed by concatenation of other ByteStrings, without 50 * copying the data in the pieces. The concatenation is represented as a tree whose leaf nodes are 51 * each a {@link com.google.protobuf.ByteString.LeafByteString}. 52 * 53 * <p>Most of the operation here is inspired by the now-famous paper <a 54 * href="https://web.archive.org/web/20060202015456/http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf"> 55 * BAP95 </a> Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and michael plass 56 * 57 * <p>The algorithms described in the paper have been implemented for character strings in {@code 58 * com.google.common.string.Rope} and in the c++ class {@code cord.cc}. 59 * 60 * <p>Fundamentally the Rope algorithm represents the collection of pieces as a binary tree. BAP95 61 * uses a Fibonacci bound relating depth to a minimum sequence length, sequences that are too short 62 * relative to their depth cause a tree rebalance. More precisely, a tree of depth d is "balanced" 63 * in the terminology of BAP95 if its length is at least F(d+2), where F(n) is the n-th Fibonacci 64 * number. Thus for depths 0, 1, 2, 3, 4, 5,... we have minimum lengths 1, 2, 3, 5, 8, 13,... 65 * 66 * @author carlanton@google.com (Carl Haverl) 67 */ 68 final class RopeByteString extends ByteString { 69 70 /** 71 * BAP95. Let Fn be the nth Fibonacci number. A {@link RopeByteString} of depth n is "balanced", 72 * i.e flat enough, if its length is at least Fn+2, e.g. a "balanced" {@link RopeByteString} of 73 * depth 1 must have length at least 2, of depth 4 must have length >= 8, etc. 74 * 75 * <p>There's nothing special about using the Fibonacci numbers for this, but they are a 76 * reasonable sequence for encapsulating the idea that we are OK with longer strings being encoded 77 * in deeper binary trees. 78 * 79 * <p>For 32-bit integers, this array has length 46. 80 * 81 * <p>The correctness of this constant array is validated in tests. 82 */ 83 static final int[] minLengthByDepth = { 84 1, 85 1, 86 2, 87 3, 88 5, 89 8, 90 13, 91 21, 92 34, 93 55, 94 89, 95 144, 96 233, 97 377, 98 610, 99 987, 100 1597, 101 2584, 102 4181, 103 6765, 104 10946, 105 17711, 106 28657, 107 46368, 108 75025, 109 121393, 110 196418, 111 317811, 112 514229, 113 832040, 114 1346269, 115 2178309, 116 3524578, 117 5702887, 118 9227465, 119 14930352, 120 24157817, 121 39088169, 122 63245986, 123 102334155, 124 165580141, 125 267914296, 126 433494437, 127 701408733, 128 1134903170, 129 1836311903, 130 Integer.MAX_VALUE 131 }; 132 133 private final int totalLength; 134 private final ByteString left; 135 private final ByteString right; 136 private final int leftLength; 137 private final int treeDepth; 138 139 /** 140 * Create a new RopeByteString, which can be thought of as a new tree node, by recording 141 * references to the two given strings. 142 * 143 * @param left string on the left of this node, should have {@code size() > 0} 144 * @param right string on the right of this node, should have {@code size() > 0} 145 */ RopeByteString(ByteString left, ByteString right)146 private RopeByteString(ByteString left, ByteString right) { 147 this.left = left; 148 this.right = right; 149 leftLength = left.size(); 150 totalLength = leftLength + right.size(); 151 treeDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1; 152 } 153 154 /** 155 * Concatenate the given strings while performing various optimizations to slow the growth rate of 156 * tree depth and tree node count. The result is either a {@link 157 * com.google.protobuf.ByteString.LeafByteString} or a {@link RopeByteString} depending on which 158 * optimizations, if any, were applied. 159 * 160 * <p>Small pieces of length less than {@link ByteString#CONCATENATE_BY_COPY_SIZE} may be copied 161 * by value here, as in BAP95. Large pieces are referenced without copy. 162 * 163 * @param left string on the left 164 * @param right string on the right 165 * @return concatenation representing the same sequence as the given strings 166 */ concatenate(ByteString left, ByteString right)167 static ByteString concatenate(ByteString left, ByteString right) { 168 if (right.size() == 0) { 169 return left; 170 } 171 172 if (left.size() == 0) { 173 return right; 174 } 175 176 final int newLength = left.size() + right.size(); 177 if (newLength < ByteString.CONCATENATE_BY_COPY_SIZE) { 178 // Optimization from BAP95: For short (leaves in paper, but just short 179 // here) total length, do a copy of data to a new leaf. 180 return concatenateBytes(left, right); 181 } 182 183 if (left instanceof RopeByteString) { 184 final RopeByteString leftRope = (RopeByteString) left; 185 if (leftRope.right.size() + right.size() < CONCATENATE_BY_COPY_SIZE) { 186 // Optimization from BAP95: As an optimization of the case where the 187 // ByteString is constructed by repeated concatenate, recognize the case 188 // where a short string is concatenated to a left-hand node whose 189 // right-hand branch is short. In the paper this applies to leaves, but 190 // we just look at the length here. This has the advantage of shedding 191 // references to unneeded data when substrings have been taken. 192 // 193 // When we recognize this case, we do a copy of the data and create a 194 // new parent node so that the depth of the result is the same as the 195 // given left tree. 196 ByteString newRight = concatenateBytes(leftRope.right, right); 197 return new RopeByteString(leftRope.left, newRight); 198 } 199 200 if (leftRope.left.getTreeDepth() > leftRope.right.getTreeDepth() 201 && leftRope.getTreeDepth() > right.getTreeDepth()) { 202 // Typically for concatenate-built strings the left-side is deeper than 203 // the right. This is our final attempt to concatenate without 204 // increasing the tree depth. We'll redo the node on the RHS. This 205 // is yet another optimization for building the string by repeatedly 206 // concatenating on the right. 207 ByteString newRight = new RopeByteString(leftRope.right, right); 208 return new RopeByteString(leftRope.left, newRight); 209 } 210 } 211 212 // Fine, we'll add a node and increase the tree depth--unless we rebalance ;^) 213 int newDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1; 214 if (newLength >= minLength(newDepth)) { 215 // The tree is shallow enough, so don't rebalance 216 return new RopeByteString(left, right); 217 } 218 219 return new Balancer().balance(left, right); 220 } 221 222 /** 223 * Concatenates two strings by copying data values. This is called in a few cases in order to 224 * reduce the growth of the number of tree nodes. 225 * 226 * @param left string on the left 227 * @param right string on the right 228 * @return string formed by copying data bytes 229 */ concatenateBytes(ByteString left, ByteString right)230 private static ByteString concatenateBytes(ByteString left, ByteString right) { 231 int leftSize = left.size(); 232 int rightSize = right.size(); 233 byte[] bytes = new byte[leftSize + rightSize]; 234 left.copyTo(bytes, 0, 0, leftSize); 235 right.copyTo(bytes, 0, leftSize, rightSize); 236 return ByteString.wrap(bytes); // Constructor wraps bytes 237 } 238 239 /** 240 * Create a new RopeByteString for testing only while bypassing all the defenses of {@link 241 * #concatenate(ByteString, ByteString)}. This allows testing trees of specific structure. We are 242 * also able to insert empty leaves, though these are dis-allowed, so that we can make sure the 243 * implementation can withstand their presence. 244 * 245 * @param left string on the left of this node 246 * @param right string on the right of this node 247 * @return an unsafe instance for testing only 248 */ newInstanceForTest(ByteString left, ByteString right)249 static RopeByteString newInstanceForTest(ByteString left, ByteString right) { 250 return new RopeByteString(left, right); 251 } 252 253 /** 254 * Returns the minimum length for which a tree of the given depth is considered balanced according 255 * to BAP95, which means the tree is flat-enough with respect to the bounds. Defaults to {@code 256 * Integer.MAX_VALUE} if {@code depth >= minLengthByDepth.length} in order to avoid an {@code 257 * ArrayIndexOutOfBoundsException}. 258 * 259 * @param depth tree depth 260 * @return minimum balanced length 261 */ minLength(int depth)262 static int minLength(int depth) { 263 if (depth >= minLengthByDepth.length) { 264 return Integer.MAX_VALUE; 265 } 266 return minLengthByDepth[depth]; 267 } 268 269 /** 270 * Gets the byte at the given index. Throws {@link ArrayIndexOutOfBoundsException} for 271 * backwards-compatibility reasons although it would more properly be {@link 272 * IndexOutOfBoundsException}. 273 * 274 * @param index index of byte 275 * @return the value 276 * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size 277 */ 278 @Override byteAt(int index)279 public byte byteAt(int index) { 280 checkIndex(index, totalLength); 281 return internalByteAt(index); 282 } 283 284 @Override internalByteAt(int index)285 byte internalByteAt(int index) { 286 // Find the relevant piece by recursive descent 287 if (index < leftLength) { 288 return left.internalByteAt(index); 289 } 290 291 return right.internalByteAt(index - leftLength); 292 } 293 294 @Override size()295 public int size() { 296 return totalLength; 297 } 298 299 @Override iterator()300 public ByteIterator iterator() { 301 return new AbstractByteIterator() { 302 final PieceIterator pieces = new PieceIterator(RopeByteString.this); 303 ByteIterator current = nextPiece(); 304 305 private ByteIterator nextPiece() { 306 // NOTE: PieceIterator is guaranteed to return non-empty pieces, so this method will always 307 // return non-empty iterators (or null) 308 return pieces.hasNext() ? pieces.next().iterator() : null; 309 } 310 311 @Override 312 public boolean hasNext() { 313 return current != null; 314 } 315 316 @Override 317 public byte nextByte() { 318 if (current == null) { 319 throw new NoSuchElementException(); 320 } 321 byte b = current.nextByte(); 322 if (!current.hasNext()) { 323 current = nextPiece(); 324 } 325 return b; 326 } 327 }; 328 } 329 330 // ================================================================= 331 // Pieces 332 333 @Override getTreeDepth()334 protected int getTreeDepth() { 335 return treeDepth; 336 } 337 338 /** 339 * Determines if the tree is balanced according to BAP95, which means the tree is flat-enough with 340 * respect to the bounds. Note that this definition of balanced is one where sub-trees of balanced 341 * trees are not necessarily balanced. 342 * 343 * @return true if the tree is balanced 344 */ 345 @Override isBalanced()346 protected boolean isBalanced() { 347 return totalLength >= minLength(treeDepth); 348 } 349 350 /** 351 * Takes a substring of this one. This involves recursive descent along the left and right edges 352 * of the substring, and referencing any wholly contained segments in between. Any leaf nodes 353 * entirely uninvolved in the substring will not be referenced by the substring. 354 * 355 * <p>Substrings of {@code length < 2} should result in at most a single recursive call chain, 356 * terminating at a leaf node. Thus the result will be a {@link 357 * com.google.protobuf.ByteString.LeafByteString}. 358 * 359 * @param beginIndex start at this index 360 * @param endIndex the last character is the one before this index 361 * @return substring leaf node or tree 362 */ 363 @Override substring(int beginIndex, int endIndex)364 public ByteString substring(int beginIndex, int endIndex) { 365 final int length = checkRange(beginIndex, endIndex, totalLength); 366 367 if (length == 0) { 368 // Empty substring 369 return ByteString.EMPTY; 370 } 371 372 if (length == totalLength) { 373 // The whole string 374 return this; 375 } 376 377 // Proper substring 378 if (endIndex <= leftLength) { 379 // Substring on the left 380 return left.substring(beginIndex, endIndex); 381 } 382 383 if (beginIndex >= leftLength) { 384 // Substring on the right 385 return right.substring(beginIndex - leftLength, endIndex - leftLength); 386 } 387 388 // Split substring 389 ByteString leftSub = left.substring(beginIndex); 390 ByteString rightSub = right.substring(0, endIndex - leftLength); 391 // Intentionally not rebalancing, since in many cases these two 392 // substrings will already be less deep than the top-level 393 // RopeByteString we're taking a substring of. 394 return new RopeByteString(leftSub, rightSub); 395 } 396 397 // ================================================================= 398 // ByteString -> byte[] 399 400 @Override copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)401 protected void copyToInternal( 402 byte[] target, int sourceOffset, int targetOffset, int numberToCopy) { 403 if (sourceOffset + numberToCopy <= leftLength) { 404 left.copyToInternal(target, sourceOffset, targetOffset, numberToCopy); 405 } else if (sourceOffset >= leftLength) { 406 right.copyToInternal(target, sourceOffset - leftLength, targetOffset, numberToCopy); 407 } else { 408 int leftLength = this.leftLength - sourceOffset; 409 left.copyToInternal(target, sourceOffset, targetOffset, leftLength); 410 right.copyToInternal(target, 0, targetOffset + leftLength, numberToCopy - leftLength); 411 } 412 } 413 414 @Override copyTo(ByteBuffer target)415 public void copyTo(ByteBuffer target) { 416 left.copyTo(target); 417 right.copyTo(target); 418 } 419 420 @Override asReadOnlyByteBuffer()421 public ByteBuffer asReadOnlyByteBuffer() { 422 ByteBuffer byteBuffer = ByteBuffer.wrap(toByteArray()); 423 return byteBuffer.asReadOnlyBuffer(); 424 } 425 426 @Override asReadOnlyByteBufferList()427 public List<ByteBuffer> asReadOnlyByteBufferList() { 428 // Walk through the list of LeafByteString's that make up this 429 // rope, and add each one as a read-only ByteBuffer. 430 List<ByteBuffer> result = new ArrayList<ByteBuffer>(); 431 PieceIterator pieces = new PieceIterator(this); 432 while (pieces.hasNext()) { 433 LeafByteString byteString = pieces.next(); 434 result.add(byteString.asReadOnlyByteBuffer()); 435 } 436 return result; 437 } 438 439 @Override writeTo(OutputStream outputStream)440 public void writeTo(OutputStream outputStream) throws IOException { 441 left.writeTo(outputStream); 442 right.writeTo(outputStream); 443 } 444 445 @Override writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)446 void writeToInternal(OutputStream out, int sourceOffset, int numberToWrite) throws IOException { 447 if (sourceOffset + numberToWrite <= leftLength) { 448 left.writeToInternal(out, sourceOffset, numberToWrite); 449 } else if (sourceOffset >= leftLength) { 450 right.writeToInternal(out, sourceOffset - leftLength, numberToWrite); 451 } else { 452 int numberToWriteInLeft = leftLength - sourceOffset; 453 left.writeToInternal(out, sourceOffset, numberToWriteInLeft); 454 right.writeToInternal(out, 0, numberToWrite - numberToWriteInLeft); 455 } 456 } 457 458 @Override writeTo(ByteOutput output)459 void writeTo(ByteOutput output) throws IOException { 460 left.writeTo(output); 461 right.writeTo(output); 462 } 463 464 @Override writeToReverse(ByteOutput output)465 void writeToReverse(ByteOutput output) throws IOException { 466 right.writeToReverse(output); 467 left.writeToReverse(output); 468 } 469 470 @Override toStringInternal(Charset charset)471 protected String toStringInternal(Charset charset) { 472 return new String(toByteArray(), charset); 473 } 474 475 // ================================================================= 476 // UTF-8 decoding 477 478 @Override isValidUtf8()479 public boolean isValidUtf8() { 480 int leftPartial = left.partialIsValidUtf8(Utf8.COMPLETE, 0, leftLength); 481 int state = right.partialIsValidUtf8(leftPartial, 0, right.size()); 482 return state == Utf8.COMPLETE; 483 } 484 485 @Override partialIsValidUtf8(int state, int offset, int length)486 protected int partialIsValidUtf8(int state, int offset, int length) { 487 int toIndex = offset + length; 488 if (toIndex <= leftLength) { 489 return left.partialIsValidUtf8(state, offset, length); 490 } else if (offset >= leftLength) { 491 return right.partialIsValidUtf8(state, offset - leftLength, length); 492 } else { 493 int leftLength = this.leftLength - offset; 494 int leftPartial = left.partialIsValidUtf8(state, offset, leftLength); 495 return right.partialIsValidUtf8(leftPartial, 0, length - leftLength); 496 } 497 } 498 499 // ================================================================= 500 // equals() and hashCode() 501 502 @Override equals(Object other)503 public boolean equals(Object other) { 504 if (other == this) { 505 return true; 506 } 507 if (!(other instanceof ByteString)) { 508 return false; 509 } 510 511 ByteString otherByteString = (ByteString) other; 512 if (totalLength != otherByteString.size()) { 513 return false; 514 } 515 if (totalLength == 0) { 516 return true; 517 } 518 519 // You don't really want to be calling equals on long strings, but since 520 // we cache the hashCode, we effectively cache inequality. We use the cached 521 // hashCode if it's already computed. It's arguable we should compute the 522 // hashCode here, and if we're going to be testing a bunch of byteStrings, 523 // it might even make sense. 524 int thisHash = peekCachedHashCode(); 525 int thatHash = otherByteString.peekCachedHashCode(); 526 if (thisHash != 0 && thatHash != 0 && thisHash != thatHash) { 527 return false; 528 } 529 530 return equalsFragments(otherByteString); 531 } 532 533 /** 534 * Determines if this string is equal to another of the same length by iterating over the leaf 535 * nodes. On each step of the iteration, the overlapping segments of the leaves are compared. 536 * 537 * @param other string of the same length as this one 538 * @return true if the values of this string equals the value of the given one 539 */ equalsFragments(ByteString other)540 private boolean equalsFragments(ByteString other) { 541 int thisOffset = 0; 542 Iterator<LeafByteString> thisIter = new PieceIterator(this); 543 LeafByteString thisString = thisIter.next(); 544 545 int thatOffset = 0; 546 Iterator<LeafByteString> thatIter = new PieceIterator(other); 547 LeafByteString thatString = thatIter.next(); 548 549 int pos = 0; 550 while (true) { 551 int thisRemaining = thisString.size() - thisOffset; 552 int thatRemaining = thatString.size() - thatOffset; 553 int bytesToCompare = Math.min(thisRemaining, thatRemaining); 554 555 // At least one of the offsets will be zero 556 boolean stillEqual = 557 (thisOffset == 0) 558 ? thisString.equalsRange(thatString, thatOffset, bytesToCompare) 559 : thatString.equalsRange(thisString, thisOffset, bytesToCompare); 560 if (!stillEqual) { 561 return false; 562 } 563 564 pos += bytesToCompare; 565 if (pos >= totalLength) { 566 if (pos == totalLength) { 567 return true; 568 } 569 throw new IllegalStateException(); 570 } 571 // We always get to the end of at least one of the pieces 572 if (bytesToCompare == thisRemaining) { // If reached end of this 573 thisOffset = 0; 574 thisString = thisIter.next(); 575 } else { 576 thisOffset += bytesToCompare; 577 } 578 if (bytesToCompare == thatRemaining) { // If reached end of that 579 thatOffset = 0; 580 thatString = thatIter.next(); 581 } else { 582 thatOffset += bytesToCompare; 583 } 584 } 585 } 586 587 @Override partialHash(int h, int offset, int length)588 protected int partialHash(int h, int offset, int length) { 589 int toIndex = offset + length; 590 if (toIndex <= leftLength) { 591 return left.partialHash(h, offset, length); 592 } else if (offset >= leftLength) { 593 return right.partialHash(h, offset - leftLength, length); 594 } else { 595 int leftLength = this.leftLength - offset; 596 int leftPartial = left.partialHash(h, offset, leftLength); 597 return right.partialHash(leftPartial, 0, length - leftLength); 598 } 599 } 600 601 // ================================================================= 602 // Input stream 603 604 @Override newCodedInput()605 public CodedInputStream newCodedInput() { 606 return CodedInputStream.newInstance(new RopeInputStream()); 607 } 608 609 @Override newInput()610 public InputStream newInput() { 611 return new RopeInputStream(); 612 } 613 614 /** 615 * This class implements the balancing algorithm of BAP95. In the paper the authors use an array 616 * to keep track of pieces, while here we use a stack. The tree is balanced by traversing subtrees 617 * in left to right order, and the stack always contains the part of the string we've traversed so 618 * far. 619 * 620 * <p>One surprising aspect of the algorithm is the result of balancing is not necessarily 621 * balanced, though it is nearly balanced. For details, see BAP95. 622 */ 623 private static class Balancer { 624 // Stack containing the part of the string, starting from the left, that 625 // we've already traversed. The final string should be the equivalent of 626 // concatenating the strings on the stack from bottom to top. 627 private final ArrayDeque<ByteString> prefixesStack = new ArrayDeque<>(); 628 balance(ByteString left, ByteString right)629 private ByteString balance(ByteString left, ByteString right) { 630 doBalance(left); 631 doBalance(right); 632 633 // Sweep stack to gather the result 634 ByteString partialString = prefixesStack.pop(); 635 while (!prefixesStack.isEmpty()) { 636 ByteString newLeft = prefixesStack.pop(); 637 partialString = new RopeByteString(newLeft, partialString); 638 } 639 // We should end up with a RopeByteString since at a minimum we will 640 // create one from concatenating left and right 641 return partialString; 642 } 643 doBalance(ByteString root)644 private void doBalance(ByteString root) { 645 // BAP95: Insert balanced subtrees whole. This means the result might not 646 // be balanced, leading to repeated rebalancings on concatenate. However, 647 // these rebalancings are shallow due to ignoring balanced subtrees, and 648 // relatively few calls to insert() result. 649 if (root.isBalanced()) { 650 insert(root); 651 } else if (root instanceof RopeByteString) { 652 RopeByteString rbs = (RopeByteString) root; 653 doBalance(rbs.left); 654 doBalance(rbs.right); 655 } else { 656 throw new IllegalArgumentException( 657 "Has a new type of ByteString been created? Found " + root.getClass()); 658 } 659 } 660 661 /** 662 * Push a string on the balance stack (BAP95). BAP95 uses an array and calls the elements in the 663 * array 'bins'. We instead use a stack, so the 'bins' of lengths are represented by differences 664 * between the elements of minLengthByDepth. 665 * 666 * <p>If the length bin for our string, and all shorter length bins, are empty, we just push it 667 * on the stack. Otherwise, we need to start concatenating, putting the given string in the 668 * "middle" and continuing until we land in an empty length bin that matches the length of our 669 * concatenation. 670 * 671 * @param byteString string to place on the balance stack 672 */ insert(ByteString byteString)673 private void insert(ByteString byteString) { 674 int depthBin = getDepthBinForLength(byteString.size()); 675 int binEnd = minLength(depthBin + 1); 676 677 // BAP95: Concatenate all trees occupying bins representing the length of 678 // our new piece or of shorter pieces, to the extent that is possible. 679 // The goal is to clear the bin which our piece belongs in, but that may 680 // not be entirely possible if there aren't enough longer bins occupied. 681 if (prefixesStack.isEmpty() || prefixesStack.peek().size() >= binEnd) { 682 prefixesStack.push(byteString); 683 } else { 684 int binStart = minLength(depthBin); 685 686 // Concatenate the subtrees of shorter length 687 ByteString newTree = prefixesStack.pop(); 688 while (!prefixesStack.isEmpty() && prefixesStack.peek().size() < binStart) { 689 ByteString left = prefixesStack.pop(); 690 newTree = new RopeByteString(left, newTree); 691 } 692 693 // Concatenate the given string 694 newTree = new RopeByteString(newTree, byteString); 695 696 // Continue concatenating until we land in an empty bin 697 while (!prefixesStack.isEmpty()) { 698 depthBin = getDepthBinForLength(newTree.size()); 699 binEnd = minLength(depthBin + 1); 700 if (prefixesStack.peek().size() < binEnd) { 701 ByteString left = prefixesStack.pop(); 702 newTree = new RopeByteString(left, newTree); 703 } else { 704 break; 705 } 706 } 707 prefixesStack.push(newTree); 708 } 709 } 710 getDepthBinForLength(int length)711 private int getDepthBinForLength(int length) { 712 int depth = Arrays.binarySearch(minLengthByDepth, length); 713 if (depth < 0) { 714 // It wasn't an exact match, so convert to the index of the containing 715 // fragment, which is one less even than the insertion point. 716 int insertionPoint = -(depth + 1); 717 depth = insertionPoint - 1; 718 } 719 720 return depth; 721 } 722 } 723 724 /** 725 * This class is a continuable tree traversal, which keeps the state information which would exist 726 * on the stack in a recursive traversal instead on a stack of "Bread Crumbs". The maximum depth 727 * of the stack in this iterator is the same as the depth of the tree being traversed. 728 * 729 * <p>This iterator is used to implement {@link RopeByteString#equalsFragments(ByteString)}. 730 */ 731 private static final class PieceIterator implements Iterator<LeafByteString> { 732 private final ArrayDeque<RopeByteString> breadCrumbs; 733 private LeafByteString next; 734 PieceIterator(ByteString root)735 private PieceIterator(ByteString root) { 736 if (root instanceof RopeByteString) { 737 RopeByteString rbs = (RopeByteString) root; 738 breadCrumbs = new ArrayDeque<>(rbs.getTreeDepth()); 739 breadCrumbs.push(rbs); 740 next = getLeafByLeft(rbs.left); 741 } else { 742 breadCrumbs = null; 743 next = (LeafByteString) root; 744 } 745 } 746 getLeafByLeft(ByteString root)747 private LeafByteString getLeafByLeft(ByteString root) { 748 ByteString pos = root; 749 while (pos instanceof RopeByteString) { 750 RopeByteString rbs = (RopeByteString) pos; 751 breadCrumbs.push(rbs); 752 pos = rbs.left; 753 } 754 return (LeafByteString) pos; 755 } 756 getNextNonEmptyLeaf()757 private LeafByteString getNextNonEmptyLeaf() { 758 while (true) { 759 // Almost always, we go through this loop exactly once. However, if 760 // we discover an empty string in the rope, we toss it and try again. 761 if (breadCrumbs == null || breadCrumbs.isEmpty()) { 762 return null; 763 } else { 764 LeafByteString result = getLeafByLeft(breadCrumbs.pop().right); 765 if (!result.isEmpty()) { 766 return result; 767 } 768 } 769 } 770 } 771 772 @Override hasNext()773 public boolean hasNext() { 774 return next != null; 775 } 776 777 /** 778 * Returns the next item and advances one {@link com.google.protobuf.ByteString.LeafByteString}. 779 * 780 * @return next non-empty LeafByteString or {@code null} 781 */ 782 @Override next()783 public LeafByteString next() { 784 if (next == null) { 785 throw new NoSuchElementException(); 786 } 787 LeafByteString result = next; 788 next = getNextNonEmptyLeaf(); 789 return result; 790 } 791 792 @Override remove()793 public void remove() { 794 throw new UnsupportedOperationException(); 795 } 796 } 797 798 // ================================================================= 799 // Serializable 800 801 private static final long serialVersionUID = 1L; 802 writeReplace()803 Object writeReplace() { 804 return ByteString.wrap(toByteArray()); 805 } 806 readObject(@uppressWarnings"unused") ObjectInputStream in)807 private void readObject(@SuppressWarnings("unused") ObjectInputStream in) throws IOException { 808 throw new InvalidObjectException("RopeByteStream instances are not to be serialized directly"); 809 } 810 811 /** This class is the {@link RopeByteString} equivalent for {@link ByteArrayInputStream}. */ 812 private class RopeInputStream extends InputStream { 813 // Iterates through the pieces of the rope 814 private PieceIterator pieceIterator; 815 // The current piece 816 private LeafByteString currentPiece; 817 // The size of the current piece 818 private int currentPieceSize; 819 // The index of the next byte to read in the current piece 820 private int currentPieceIndex; 821 // The offset of the start of the current piece in the rope byte string 822 private int currentPieceOffsetInRope; 823 // Offset in the buffer at which user called mark(); 824 private int mark; 825 RopeInputStream()826 public RopeInputStream() { 827 initialize(); 828 } 829 830 /** 831 * Reads up to {@code len} bytes of data into array {@code b}. 832 * 833 * <p>Note that {@link InputStream#read(byte[], int, int)} and {@link 834 * ByteArrayInputStream#read(byte[], int, int)} behave inconsistently when reading 0 bytes at 835 * EOF; the interface defines the return value to be 0 and the latter returns -1. We use the 836 * latter behavior so that all ByteString streams are consistent. 837 * 838 * @return -1 if at EOF, otherwise the actual number of bytes read. 839 */ 840 @Override read(byte[] b, int offset, int length)841 public int read(byte[] b, int offset, int length) { 842 if (b == null) { 843 throw new NullPointerException(); 844 } else if (offset < 0 || length < 0 || length > b.length - offset) { 845 throw new IndexOutOfBoundsException(); 846 } 847 int bytesRead = readSkipInternal(b, offset, length); 848 if (bytesRead == 0) { 849 return -1; 850 } else { 851 return bytesRead; 852 } 853 } 854 855 @Override skip(long length)856 public long skip(long length) { 857 if (length < 0) { 858 throw new IndexOutOfBoundsException(); 859 } else if (length > Integer.MAX_VALUE) { 860 length = Integer.MAX_VALUE; 861 } 862 return readSkipInternal(null, 0, (int) length); 863 } 864 865 /** 866 * Internal implementation of read and skip. If b != null, then read the next {@code length} 867 * bytes into the buffer {@code b} at offset {@code offset}. If b == null, then skip the next 868 * {@code length} bytes. 869 * 870 * <p>This method assumes that all error checking has already happened. 871 * 872 * <p>Returns the actual number of bytes read or skipped. 873 */ readSkipInternal(byte[] b, int offset, int length)874 private int readSkipInternal(byte[] b, int offset, int length) { 875 int bytesRemaining = length; 876 while (bytesRemaining > 0) { 877 advanceIfCurrentPieceFullyRead(); 878 if (currentPiece == null) { 879 break; 880 } else { 881 // Copy the bytes from this piece. 882 int currentPieceRemaining = currentPieceSize - currentPieceIndex; 883 int count = Math.min(currentPieceRemaining, bytesRemaining); 884 if (b != null) { 885 currentPiece.copyTo(b, currentPieceIndex, offset, count); 886 offset += count; 887 } 888 currentPieceIndex += count; 889 bytesRemaining -= count; 890 } 891 } 892 // Return the number of bytes read. 893 return length - bytesRemaining; 894 } 895 896 @Override read()897 public int read() throws IOException { 898 advanceIfCurrentPieceFullyRead(); 899 if (currentPiece == null) { 900 return -1; 901 } else { 902 return currentPiece.byteAt(currentPieceIndex++) & 0xFF; 903 } 904 } 905 906 @Override available()907 public int available() throws IOException { 908 int bytesRead = currentPieceOffsetInRope + currentPieceIndex; 909 return RopeByteString.this.size() - bytesRead; 910 } 911 912 @Override markSupported()913 public boolean markSupported() { 914 return true; 915 } 916 917 @Override mark(int readAheadLimit)918 public void mark(int readAheadLimit) { 919 // Set the mark to our position in the byte string 920 mark = currentPieceOffsetInRope + currentPieceIndex; 921 } 922 923 @Override reset()924 public synchronized void reset() { 925 // Just reinitialize and skip the specified number of bytes. 926 initialize(); 927 readSkipInternal(null, 0, mark); 928 } 929 930 /** Common initialization code used by both the constructor and reset() */ initialize()931 private void initialize() { 932 pieceIterator = new PieceIterator(RopeByteString.this); 933 currentPiece = pieceIterator.next(); 934 currentPieceSize = currentPiece.size(); 935 currentPieceIndex = 0; 936 currentPieceOffsetInRope = 0; 937 } 938 939 /** 940 * Skips to the next piece if we have read all the data in the current piece. Sets currentPiece 941 * to null if we have reached the end of the input. 942 */ advanceIfCurrentPieceFullyRead()943 private void advanceIfCurrentPieceFullyRead() { 944 if (currentPiece != null && currentPieceIndex == currentPieceSize) { 945 // Generally, we can only go through this loop at most once, since 946 // empty strings can't end up in a rope. But better to test. 947 currentPieceOffsetInRope += currentPieceSize; 948 currentPieceIndex = 0; 949 if (pieceIterator.hasNext()) { 950 currentPiece = pieceIterator.next(); 951 currentPieceSize = currentPiece.size(); 952 } else { 953 currentPiece = null; 954 currentPieceSize = 0; 955 } 956 } 957 } 958 } 959 } 960