1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file or at 6 // https://developers.google.com/open-source/licenses/bsd 7 8 package com.google.protobuf; 9 10 import java.io.ByteArrayInputStream; 11 import java.io.IOException; 12 import java.io.InputStream; 13 import java.io.InvalidObjectException; 14 import java.io.ObjectInputStream; 15 import java.io.OutputStream; 16 import java.nio.ByteBuffer; 17 import java.nio.charset.Charset; 18 import java.util.ArrayDeque; 19 import java.util.ArrayList; 20 import java.util.Arrays; 21 import java.util.Iterator; 22 import java.util.List; 23 import java.util.NoSuchElementException; 24 25 /** 26 * Class to represent {@code ByteStrings} formed by concatenation of other ByteStrings, without 27 * copying the data in the pieces. The concatenation is represented as a tree whose leaf nodes are 28 * each a {@link com.google.protobuf.ByteString.LeafByteString}. 29 * 30 * <p>Most of the operation here is inspired by the now-famous paper <a 31 * href="https://web.archive.org/web/20060202015456/http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf"> 32 * BAP95 </a> Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and michael plass 33 * 34 * <p>The algorithms described in the paper have been implemented for character strings in {@code 35 * com.google.common.string.Rope} and in the c++ class {@code cord.cc}. 36 * 37 * <p>Fundamentally the Rope algorithm represents the collection of pieces as a binary tree. BAP95 38 * uses a Fibonacci bound relating depth to a minimum sequence length, sequences that are too short 39 * relative to their depth cause a tree rebalance. More precisely, a tree of depth d is "balanced" 40 * in the terminology of BAP95 if its length is at least F(d+2), where F(n) is the n-th Fibonacci 41 * number. Thus for depths 0, 1, 2, 3, 4, 5,... we have minimum lengths 1, 2, 3, 5, 8, 13,... 42 * 43 * @author carlanton@google.com (Carl Haverl) 44 */ 45 final class RopeByteString extends ByteString { 46 47 /** 48 * BAP95. Let Fn be the nth Fibonacci number. A {@link RopeByteString} of depth n is "balanced", 49 * i.e flat enough, if its length is at least Fn+2, e.g. a "balanced" {@link RopeByteString} of 50 * depth 1 must have length at least 2, of depth 4 must have length >= 8, etc. 51 * 52 * <p>There's nothing special about using the Fibonacci numbers for this, but they are a 53 * reasonable sequence for encapsulating the idea that we are OK with longer strings being encoded 54 * in deeper binary trees. 55 * 56 * <p>For 32-bit integers, this array has length 46. 57 * 58 * <p>The correctness of this constant array is validated in tests. 59 */ 60 static final int[] minLengthByDepth = { 61 1, 62 1, 63 2, 64 3, 65 5, 66 8, 67 13, 68 21, 69 34, 70 55, 71 89, 72 144, 73 233, 74 377, 75 610, 76 987, 77 1597, 78 2584, 79 4181, 80 6765, 81 10946, 82 17711, 83 28657, 84 46368, 85 75025, 86 121393, 87 196418, 88 317811, 89 514229, 90 832040, 91 1346269, 92 2178309, 93 3524578, 94 5702887, 95 9227465, 96 14930352, 97 24157817, 98 39088169, 99 63245986, 100 102334155, 101 165580141, 102 267914296, 103 433494437, 104 701408733, 105 1134903170, 106 1836311903, 107 Integer.MAX_VALUE 108 }; 109 110 private final int totalLength; 111 private final ByteString left; 112 private final ByteString right; 113 private final int leftLength; 114 private final int treeDepth; 115 116 /** 117 * Create a new RopeByteString, which can be thought of as a new tree node, by recording 118 * references to the two given strings. 119 * 120 * @param left string on the left of this node, should have {@code size() > 0} 121 * @param right string on the right of this node, should have {@code size() > 0} 122 */ RopeByteString(ByteString left, ByteString right)123 private RopeByteString(ByteString left, ByteString right) { 124 this.left = left; 125 this.right = right; 126 leftLength = left.size(); 127 totalLength = leftLength + right.size(); 128 treeDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1; 129 } 130 131 /** 132 * Concatenate the given strings while performing various optimizations to slow the growth rate of 133 * tree depth and tree node count. The result is either a {@link 134 * com.google.protobuf.ByteString.LeafByteString} or a {@link RopeByteString} depending on which 135 * optimizations, if any, were applied. 136 * 137 * <p>Small pieces of length less than {@link ByteString#CONCATENATE_BY_COPY_SIZE} may be copied 138 * by value here, as in BAP95. Large pieces are referenced without copy. 139 * 140 * @param left string on the left 141 * @param right string on the right 142 * @return concatenation representing the same sequence as the given strings 143 */ concatenate(ByteString left, ByteString right)144 static ByteString concatenate(ByteString left, ByteString right) { 145 if (right.size() == 0) { 146 return left; 147 } 148 149 if (left.size() == 0) { 150 return right; 151 } 152 153 final int newLength = left.size() + right.size(); 154 if (newLength < ByteString.CONCATENATE_BY_COPY_SIZE) { 155 // Optimization from BAP95: For short (leaves in paper, but just short 156 // here) total length, do a copy of data to a new leaf. 157 return concatenateBytes(left, right); 158 } 159 160 if (left instanceof RopeByteString) { 161 final RopeByteString leftRope = (RopeByteString) left; 162 if (leftRope.right.size() + right.size() < CONCATENATE_BY_COPY_SIZE) { 163 // Optimization from BAP95: As an optimization of the case where the 164 // ByteString is constructed by repeated concatenate, recognize the case 165 // where a short string is concatenated to a left-hand node whose 166 // right-hand branch is short. In the paper this applies to leaves, but 167 // we just look at the length here. This has the advantage of shedding 168 // references to unneeded data when substrings have been taken. 169 // 170 // When we recognize this case, we do a copy of the data and create a 171 // new parent node so that the depth of the result is the same as the 172 // given left tree. 173 ByteString newRight = concatenateBytes(leftRope.right, right); 174 return new RopeByteString(leftRope.left, newRight); 175 } 176 177 if (leftRope.left.getTreeDepth() > leftRope.right.getTreeDepth() 178 && leftRope.getTreeDepth() > right.getTreeDepth()) { 179 // Typically for concatenate-built strings the left-side is deeper than 180 // the right. This is our final attempt to concatenate without 181 // increasing the tree depth. We'll redo the node on the RHS. This 182 // is yet another optimization for building the string by repeatedly 183 // concatenating on the right. 184 ByteString newRight = new RopeByteString(leftRope.right, right); 185 return new RopeByteString(leftRope.left, newRight); 186 } 187 } 188 189 // Fine, we'll add a node and increase the tree depth--unless we rebalance ;^) 190 int newDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1; 191 if (newLength >= minLength(newDepth)) { 192 // The tree is shallow enough, so don't rebalance 193 return new RopeByteString(left, right); 194 } 195 196 return new Balancer().balance(left, right); 197 } 198 199 /** 200 * Concatenates two strings by copying data values. This is called in a few cases in order to 201 * reduce the growth of the number of tree nodes. 202 * 203 * @param left string on the left 204 * @param right string on the right 205 * @return string formed by copying data bytes 206 */ concatenateBytes(ByteString left, ByteString right)207 private static ByteString concatenateBytes(ByteString left, ByteString right) { 208 int leftSize = left.size(); 209 int rightSize = right.size(); 210 byte[] bytes = new byte[leftSize + rightSize]; 211 left.copyTo(bytes, 0, 0, leftSize); 212 right.copyTo(bytes, 0, leftSize, rightSize); 213 return ByteString.wrap(bytes); // Constructor wraps bytes 214 } 215 216 /** 217 * Create a new RopeByteString for testing only while bypassing all the defenses of {@link 218 * #concatenate(ByteString, ByteString)}. This allows testing trees of specific structure. We are 219 * also able to insert empty leaves, though these are dis-allowed, so that we can make sure the 220 * implementation can withstand their presence. 221 * 222 * @param left string on the left of this node 223 * @param right string on the right of this node 224 * @return an unsafe instance for testing only 225 */ newInstanceForTest(ByteString left, ByteString right)226 static RopeByteString newInstanceForTest(ByteString left, ByteString right) { 227 return new RopeByteString(left, right); 228 } 229 230 /** 231 * Returns the minimum length for which a tree of the given depth is considered balanced according 232 * to BAP95, which means the tree is flat-enough with respect to the bounds. Defaults to {@code 233 * Integer.MAX_VALUE} if {@code depth >= minLengthByDepth.length} in order to avoid an {@code 234 * ArrayIndexOutOfBoundsException}. 235 * 236 * @param depth tree depth 237 * @return minimum balanced length 238 */ minLength(int depth)239 static int minLength(int depth) { 240 if (depth >= minLengthByDepth.length) { 241 return Integer.MAX_VALUE; 242 } 243 return minLengthByDepth[depth]; 244 } 245 246 /** 247 * Gets the byte at the given index. Throws {@link ArrayIndexOutOfBoundsException} for 248 * backwards-compatibility reasons although it would more properly be {@link 249 * IndexOutOfBoundsException}. 250 * 251 * @param index index of byte 252 * @return the value 253 * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size 254 */ 255 @Override byteAt(int index)256 public byte byteAt(int index) { 257 checkIndex(index, totalLength); 258 return internalByteAt(index); 259 } 260 261 @Override internalByteAt(int index)262 byte internalByteAt(int index) { 263 // Find the relevant piece by recursive descent 264 if (index < leftLength) { 265 return left.internalByteAt(index); 266 } 267 268 return right.internalByteAt(index - leftLength); 269 } 270 271 @Override size()272 public int size() { 273 return totalLength; 274 } 275 276 @Override iterator()277 public ByteIterator iterator() { 278 return new AbstractByteIterator() { 279 final PieceIterator pieces = new PieceIterator(RopeByteString.this); 280 ByteIterator current = nextPiece(); 281 282 private ByteIterator nextPiece() { 283 // NOTE: PieceIterator is guaranteed to return non-empty pieces, so this method will always 284 // return non-empty iterators (or null) 285 return pieces.hasNext() ? pieces.next().iterator() : null; 286 } 287 288 @Override 289 public boolean hasNext() { 290 return current != null; 291 } 292 293 @Override 294 public byte nextByte() { 295 if (current == null) { 296 throw new NoSuchElementException(); 297 } 298 byte b = current.nextByte(); 299 if (!current.hasNext()) { 300 current = nextPiece(); 301 } 302 return b; 303 } 304 }; 305 } 306 307 // ================================================================= 308 // Pieces 309 310 @Override getTreeDepth()311 protected int getTreeDepth() { 312 return treeDepth; 313 } 314 315 /** 316 * Determines if the tree is balanced according to BAP95, which means the tree is flat-enough with 317 * respect to the bounds. Note that this definition of balanced is one where sub-trees of balanced 318 * trees are not necessarily balanced. 319 * 320 * @return true if the tree is balanced 321 */ 322 @Override isBalanced()323 protected boolean isBalanced() { 324 return totalLength >= minLength(treeDepth); 325 } 326 327 /** 328 * Takes a substring of this one. This involves recursive descent along the left and right edges 329 * of the substring, and referencing any wholly contained segments in between. Any leaf nodes 330 * entirely uninvolved in the substring will not be referenced by the substring. 331 * 332 * <p>Substrings of {@code length < 2} should result in at most a single recursive call chain, 333 * terminating at a leaf node. Thus the result will be a {@link 334 * com.google.protobuf.ByteString.LeafByteString}. 335 * 336 * @param beginIndex start at this index 337 * @param endIndex the last character is the one before this index 338 * @return substring leaf node or tree 339 */ 340 @Override substring(int beginIndex, int endIndex)341 public ByteString substring(int beginIndex, int endIndex) { 342 final int length = checkRange(beginIndex, endIndex, totalLength); 343 344 if (length == 0) { 345 // Empty substring 346 return ByteString.EMPTY; 347 } 348 349 if (length == totalLength) { 350 // The whole string 351 return this; 352 } 353 354 // Proper substring 355 if (endIndex <= leftLength) { 356 // Substring on the left 357 return left.substring(beginIndex, endIndex); 358 } 359 360 if (beginIndex >= leftLength) { 361 // Substring on the right 362 return right.substring(beginIndex - leftLength, endIndex - leftLength); 363 } 364 365 // Split substring 366 ByteString leftSub = left.substring(beginIndex); 367 ByteString rightSub = right.substring(0, endIndex - leftLength); 368 // Intentionally not rebalancing, since in many cases these two 369 // substrings will already be less deep than the top-level 370 // RopeByteString we're taking a substring of. 371 return new RopeByteString(leftSub, rightSub); 372 } 373 374 // ================================================================= 375 // ByteString -> byte[] 376 377 @Override copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)378 protected void copyToInternal( 379 byte[] target, int sourceOffset, int targetOffset, int numberToCopy) { 380 if (sourceOffset + numberToCopy <= leftLength) { 381 left.copyToInternal(target, sourceOffset, targetOffset, numberToCopy); 382 } else if (sourceOffset >= leftLength) { 383 right.copyToInternal(target, sourceOffset - leftLength, targetOffset, numberToCopy); 384 } else { 385 int leftLength = this.leftLength - sourceOffset; 386 left.copyToInternal(target, sourceOffset, targetOffset, leftLength); 387 right.copyToInternal(target, 0, targetOffset + leftLength, numberToCopy - leftLength); 388 } 389 } 390 391 @Override copyTo(ByteBuffer target)392 public void copyTo(ByteBuffer target) { 393 left.copyTo(target); 394 right.copyTo(target); 395 } 396 397 @Override asReadOnlyByteBuffer()398 public ByteBuffer asReadOnlyByteBuffer() { 399 ByteBuffer byteBuffer = ByteBuffer.wrap(toByteArray()); 400 return byteBuffer.asReadOnlyBuffer(); 401 } 402 403 @Override asReadOnlyByteBufferList()404 public List<ByteBuffer> asReadOnlyByteBufferList() { 405 // Walk through the list of LeafByteString's that make up this 406 // rope, and add each one as a read-only ByteBuffer. 407 List<ByteBuffer> result = new ArrayList<ByteBuffer>(); 408 PieceIterator pieces = new PieceIterator(this); 409 while (pieces.hasNext()) { 410 LeafByteString byteString = pieces.next(); 411 result.add(byteString.asReadOnlyByteBuffer()); 412 } 413 return result; 414 } 415 416 @Override writeTo(OutputStream outputStream)417 public void writeTo(OutputStream outputStream) throws IOException { 418 left.writeTo(outputStream); 419 right.writeTo(outputStream); 420 } 421 422 @Override writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)423 void writeToInternal(OutputStream out, int sourceOffset, int numberToWrite) throws IOException { 424 if (sourceOffset + numberToWrite <= leftLength) { 425 left.writeToInternal(out, sourceOffset, numberToWrite); 426 } else if (sourceOffset >= leftLength) { 427 right.writeToInternal(out, sourceOffset - leftLength, numberToWrite); 428 } else { 429 int numberToWriteInLeft = leftLength - sourceOffset; 430 left.writeToInternal(out, sourceOffset, numberToWriteInLeft); 431 right.writeToInternal(out, 0, numberToWrite - numberToWriteInLeft); 432 } 433 } 434 435 @Override writeTo(ByteOutput output)436 void writeTo(ByteOutput output) throws IOException { 437 left.writeTo(output); 438 right.writeTo(output); 439 } 440 441 @Override writeToReverse(ByteOutput output)442 void writeToReverse(ByteOutput output) throws IOException { 443 right.writeToReverse(output); 444 left.writeToReverse(output); 445 } 446 447 @Override toStringInternal(Charset charset)448 protected String toStringInternal(Charset charset) { 449 return new String(toByteArray(), charset); 450 } 451 452 // ================================================================= 453 // UTF-8 decoding 454 455 @Override isValidUtf8()456 public boolean isValidUtf8() { 457 int leftPartial = left.partialIsValidUtf8(Utf8.COMPLETE, 0, leftLength); 458 int state = right.partialIsValidUtf8(leftPartial, 0, right.size()); 459 return state == Utf8.COMPLETE; 460 } 461 462 @Override partialIsValidUtf8(int state, int offset, int length)463 protected int partialIsValidUtf8(int state, int offset, int length) { 464 int toIndex = offset + length; 465 if (toIndex <= leftLength) { 466 return left.partialIsValidUtf8(state, offset, length); 467 } else if (offset >= leftLength) { 468 return right.partialIsValidUtf8(state, offset - leftLength, length); 469 } else { 470 int leftLength = this.leftLength - offset; 471 int leftPartial = left.partialIsValidUtf8(state, offset, leftLength); 472 return right.partialIsValidUtf8(leftPartial, 0, length - leftLength); 473 } 474 } 475 476 // ================================================================= 477 // equals() and hashCode() 478 479 @Override equals(Object other)480 public boolean equals(Object other) { 481 if (other == this) { 482 return true; 483 } 484 if (!(other instanceof ByteString)) { 485 return false; 486 } 487 488 ByteString otherByteString = (ByteString) other; 489 if (totalLength != otherByteString.size()) { 490 return false; 491 } 492 if (totalLength == 0) { 493 return true; 494 } 495 496 // You don't really want to be calling equals on long strings, but since 497 // we cache the hashCode, we effectively cache inequality. We use the cached 498 // hashCode if it's already computed. It's arguable we should compute the 499 // hashCode here, and if we're going to be testing a bunch of byteStrings, 500 // it might even make sense. 501 int thisHash = peekCachedHashCode(); 502 int thatHash = otherByteString.peekCachedHashCode(); 503 if (thisHash != 0 && thatHash != 0 && thisHash != thatHash) { 504 return false; 505 } 506 507 return equalsFragments(otherByteString); 508 } 509 510 /** 511 * Determines if this string is equal to another of the same length by iterating over the leaf 512 * nodes. On each step of the iteration, the overlapping segments of the leaves are compared. 513 * 514 * @param other string of the same length as this one 515 * @return true if the values of this string equals the value of the given one 516 */ equalsFragments(ByteString other)517 private boolean equalsFragments(ByteString other) { 518 int thisOffset = 0; 519 Iterator<LeafByteString> thisIter = new PieceIterator(this); 520 LeafByteString thisString = thisIter.next(); 521 522 int thatOffset = 0; 523 Iterator<LeafByteString> thatIter = new PieceIterator(other); 524 LeafByteString thatString = thatIter.next(); 525 526 int pos = 0; 527 while (true) { 528 int thisRemaining = thisString.size() - thisOffset; 529 int thatRemaining = thatString.size() - thatOffset; 530 int bytesToCompare = Math.min(thisRemaining, thatRemaining); 531 532 // At least one of the offsets will be zero 533 boolean stillEqual = 534 (thisOffset == 0) 535 ? thisString.equalsRange(thatString, thatOffset, bytesToCompare) 536 : thatString.equalsRange(thisString, thisOffset, bytesToCompare); 537 if (!stillEqual) { 538 return false; 539 } 540 541 pos += bytesToCompare; 542 if (pos >= totalLength) { 543 if (pos == totalLength) { 544 return true; 545 } 546 throw new IllegalStateException(); 547 } 548 // We always get to the end of at least one of the pieces 549 if (bytesToCompare == thisRemaining) { // If reached end of this 550 thisOffset = 0; 551 thisString = thisIter.next(); 552 } else { 553 thisOffset += bytesToCompare; 554 } 555 if (bytesToCompare == thatRemaining) { // If reached end of that 556 thatOffset = 0; 557 thatString = thatIter.next(); 558 } else { 559 thatOffset += bytesToCompare; 560 } 561 } 562 } 563 564 @Override partialHash(int h, int offset, int length)565 protected int partialHash(int h, int offset, int length) { 566 int toIndex = offset + length; 567 if (toIndex <= leftLength) { 568 return left.partialHash(h, offset, length); 569 } else if (offset >= leftLength) { 570 return right.partialHash(h, offset - leftLength, length); 571 } else { 572 int leftLength = this.leftLength - offset; 573 int leftPartial = left.partialHash(h, offset, leftLength); 574 return right.partialHash(leftPartial, 0, length - leftLength); 575 } 576 } 577 578 // ================================================================= 579 // Input stream 580 581 @Override newCodedInput()582 public CodedInputStream newCodedInput() { 583 // Passing along direct references to internal ByteBuffers can support more efficient parsing 584 // via aliasing in CodedInputStream for users who wish to use it. 585 // 586 // Otherwise we force data copies, both in copying as an input stream and in buffering in the 587 // CodedInputSteam. 588 return CodedInputStream.newInstance(asReadOnlyByteBufferList(), /* bufferIsImmutable= */ true); 589 } 590 591 @Override newInput()592 public InputStream newInput() { 593 return new RopeInputStream(); 594 } 595 596 /** 597 * This class implements the balancing algorithm of BAP95. In the paper the authors use an array 598 * to keep track of pieces, while here we use a stack. The tree is balanced by traversing subtrees 599 * in left to right order, and the stack always contains the part of the string we've traversed so 600 * far. 601 * 602 * <p>One surprising aspect of the algorithm is the result of balancing is not necessarily 603 * balanced, though it is nearly balanced. For details, see BAP95. 604 */ 605 private static class Balancer { 606 // Stack containing the part of the string, starting from the left, that 607 // we've already traversed. The final string should be the equivalent of 608 // concatenating the strings on the stack from bottom to top. 609 private final ArrayDeque<ByteString> prefixesStack = new ArrayDeque<>(); 610 balance(ByteString left, ByteString right)611 private ByteString balance(ByteString left, ByteString right) { 612 doBalance(left); 613 doBalance(right); 614 615 // Sweep stack to gather the result 616 ByteString partialString = prefixesStack.pop(); 617 while (!prefixesStack.isEmpty()) { 618 ByteString newLeft = prefixesStack.pop(); 619 partialString = new RopeByteString(newLeft, partialString); 620 } 621 // We should end up with a RopeByteString since at a minimum we will 622 // create one from concatenating left and right 623 return partialString; 624 } 625 doBalance(ByteString root)626 private void doBalance(ByteString root) { 627 // BAP95: Insert balanced subtrees whole. This means the result might not 628 // be balanced, leading to repeated rebalancings on concatenate. However, 629 // these rebalancings are shallow due to ignoring balanced subtrees, and 630 // relatively few calls to insert() result. 631 if (root.isBalanced()) { 632 insert(root); 633 } else if (root instanceof RopeByteString) { 634 RopeByteString rbs = (RopeByteString) root; 635 doBalance(rbs.left); 636 doBalance(rbs.right); 637 } else { 638 throw new IllegalArgumentException( 639 "Has a new type of ByteString been created? Found " + root.getClass()); 640 } 641 } 642 643 /** 644 * Push a string on the balance stack (BAP95). BAP95 uses an array and calls the elements in the 645 * array 'bins'. We instead use a stack, so the 'bins' of lengths are represented by differences 646 * between the elements of minLengthByDepth. 647 * 648 * <p>If the length bin for our string, and all shorter length bins, are empty, we just push it 649 * on the stack. Otherwise, we need to start concatenating, putting the given string in the 650 * "middle" and continuing until we land in an empty length bin that matches the length of our 651 * concatenation. 652 * 653 * @param byteString string to place on the balance stack 654 */ insert(ByteString byteString)655 private void insert(ByteString byteString) { 656 int depthBin = getDepthBinForLength(byteString.size()); 657 int binEnd = minLength(depthBin + 1); 658 659 // BAP95: Concatenate all trees occupying bins representing the length of 660 // our new piece or of shorter pieces, to the extent that is possible. 661 // The goal is to clear the bin which our piece belongs in, but that may 662 // not be entirely possible if there aren't enough longer bins occupied. 663 if (prefixesStack.isEmpty() || prefixesStack.peek().size() >= binEnd) { 664 prefixesStack.push(byteString); 665 } else { 666 int binStart = minLength(depthBin); 667 668 // Concatenate the subtrees of shorter length 669 ByteString newTree = prefixesStack.pop(); 670 while (!prefixesStack.isEmpty() && prefixesStack.peek().size() < binStart) { 671 ByteString left = prefixesStack.pop(); 672 newTree = new RopeByteString(left, newTree); 673 } 674 675 // Concatenate the given string 676 newTree = new RopeByteString(newTree, byteString); 677 678 // Continue concatenating until we land in an empty bin 679 while (!prefixesStack.isEmpty()) { 680 depthBin = getDepthBinForLength(newTree.size()); 681 binEnd = minLength(depthBin + 1); 682 if (prefixesStack.peek().size() < binEnd) { 683 ByteString left = prefixesStack.pop(); 684 newTree = new RopeByteString(left, newTree); 685 } else { 686 break; 687 } 688 } 689 prefixesStack.push(newTree); 690 } 691 } 692 getDepthBinForLength(int length)693 private int getDepthBinForLength(int length) { 694 int depth = Arrays.binarySearch(minLengthByDepth, length); 695 if (depth < 0) { 696 // It wasn't an exact match, so convert to the index of the containing 697 // fragment, which is one less even than the insertion point. 698 int insertionPoint = -(depth + 1); 699 depth = insertionPoint - 1; 700 } 701 702 return depth; 703 } 704 } 705 706 /** 707 * This class is a continuable tree traversal, which keeps the state information which would exist 708 * on the stack in a recursive traversal instead on a stack of "Bread Crumbs". The maximum depth 709 * of the stack in this iterator is the same as the depth of the tree being traversed. 710 * 711 * <p>This iterator is used to implement {@link RopeByteString#equalsFragments(ByteString)}. 712 */ 713 private static final class PieceIterator implements Iterator<LeafByteString> { 714 private final ArrayDeque<RopeByteString> breadCrumbs; 715 private LeafByteString next; 716 PieceIterator(ByteString root)717 private PieceIterator(ByteString root) { 718 if (root instanceof RopeByteString) { 719 RopeByteString rbs = (RopeByteString) root; 720 breadCrumbs = new ArrayDeque<>(rbs.getTreeDepth()); 721 breadCrumbs.push(rbs); 722 next = getLeafByLeft(rbs.left); 723 } else { 724 breadCrumbs = null; 725 next = (LeafByteString) root; 726 } 727 } 728 getLeafByLeft(ByteString root)729 private LeafByteString getLeafByLeft(ByteString root) { 730 ByteString pos = root; 731 while (pos instanceof RopeByteString) { 732 RopeByteString rbs = (RopeByteString) pos; 733 breadCrumbs.push(rbs); 734 pos = rbs.left; 735 } 736 return (LeafByteString) pos; 737 } 738 getNextNonEmptyLeaf()739 private LeafByteString getNextNonEmptyLeaf() { 740 while (true) { 741 // Almost always, we go through this loop exactly once. However, if 742 // we discover an empty string in the rope, we toss it and try again. 743 if (breadCrumbs == null || breadCrumbs.isEmpty()) { 744 return null; 745 } else { 746 LeafByteString result = getLeafByLeft(breadCrumbs.pop().right); 747 if (!result.isEmpty()) { 748 return result; 749 } 750 } 751 } 752 } 753 754 @Override hasNext()755 public boolean hasNext() { 756 return next != null; 757 } 758 759 /** 760 * Returns the next item and advances one {@link com.google.protobuf.ByteString.LeafByteString}. 761 * 762 * @return next non-empty LeafByteString or {@code null} 763 */ 764 @Override next()765 public LeafByteString next() { 766 if (next == null) { 767 throw new NoSuchElementException(); 768 } 769 LeafByteString result = next; 770 next = getNextNonEmptyLeaf(); 771 return result; 772 } 773 774 @Override remove()775 public void remove() { 776 throw new UnsupportedOperationException(); 777 } 778 } 779 780 // ================================================================= 781 // Serializable 782 783 private static final long serialVersionUID = 1L; 784 writeReplace()785 Object writeReplace() { 786 return ByteString.wrap(toByteArray()); 787 } 788 readObject(@uppressWarnings"unused") ObjectInputStream in)789 private void readObject(@SuppressWarnings("unused") ObjectInputStream in) throws IOException { 790 throw new InvalidObjectException("RopeByteStream instances are not to be serialized directly"); 791 } 792 793 /** This class is the {@link RopeByteString} equivalent for {@link ByteArrayInputStream}. */ 794 private class RopeInputStream extends InputStream { 795 // Iterates through the pieces of the rope 796 private PieceIterator pieceIterator; 797 // The current piece 798 private LeafByteString currentPiece; 799 // The size of the current piece 800 private int currentPieceSize; 801 // The index of the next byte to read in the current piece 802 private int currentPieceIndex; 803 // The offset of the start of the current piece in the rope byte string 804 private int currentPieceOffsetInRope; 805 // Offset in the buffer at which user called mark(); 806 private int mark; 807 RopeInputStream()808 public RopeInputStream() { 809 initialize(); 810 } 811 812 /** 813 * Reads up to {@code len} bytes of data into array {@code b}. 814 * 815 * <p>Note that {@link InputStream#read(byte[], int, int)} and {@link 816 * ByteArrayInputStream#read(byte[], int, int)} behave inconsistently when reading 0 bytes at 817 * EOF; the interface defines the return value to be 0 and the latter returns -1. We use the 818 * latter behavior so that all ByteString streams are consistent. 819 * 820 * @return -1 if at EOF, otherwise the actual number of bytes read. 821 */ 822 @Override read(byte[] b, int offset, int length)823 public int read(byte[] b, int offset, int length) { 824 if (b == null) { 825 throw new NullPointerException(); 826 } else if (offset < 0 || length < 0 || length > b.length - offset) { 827 throw new IndexOutOfBoundsException(); 828 } 829 int bytesRead = readSkipInternal(b, offset, length); 830 if (bytesRead == 0 && (length > 0 || availableInternal() == 0)) { 831 // Modeling ByteArrayInputStream.read(byte[], int, int) behavior noted above: 832 // It's ok to read 0 bytes on purpose (length == 0) from a stream that isn't at EOF. 833 // It's not ok to try to read bytes (even 0 bytes) from a stream that is at EOF. 834 return -1; 835 } else { 836 return bytesRead; 837 } 838 } 839 840 @Override skip(long length)841 public long skip(long length) { 842 if (length < 0) { 843 throw new IndexOutOfBoundsException(); 844 } else if (length > Integer.MAX_VALUE) { 845 length = Integer.MAX_VALUE; 846 } 847 return readSkipInternal(null, 0, (int) length); 848 } 849 850 /** 851 * Internal implementation of read and skip. If b != null, then read the next {@code length} 852 * bytes into the buffer {@code b} at offset {@code offset}. If b == null, then skip the next 853 * {@code length} bytes. 854 * 855 * <p>This method assumes that all error checking has already happened. 856 * 857 * <p>Returns the actual number of bytes read or skipped. 858 */ readSkipInternal(byte[] b, int offset, int length)859 private int readSkipInternal(byte[] b, int offset, int length) { 860 int bytesRemaining = length; 861 while (bytesRemaining > 0) { 862 advanceIfCurrentPieceFullyRead(); 863 if (currentPiece == null) { 864 break; 865 } else { 866 // Copy the bytes from this piece. 867 int currentPieceRemaining = currentPieceSize - currentPieceIndex; 868 int count = Math.min(currentPieceRemaining, bytesRemaining); 869 if (b != null) { 870 currentPiece.copyTo(b, currentPieceIndex, offset, count); 871 offset += count; 872 } 873 currentPieceIndex += count; 874 bytesRemaining -= count; 875 } 876 } 877 // Return the number of bytes read. 878 return length - bytesRemaining; 879 } 880 881 @Override read()882 public int read() throws IOException { 883 advanceIfCurrentPieceFullyRead(); 884 if (currentPiece == null) { 885 return -1; 886 } else { 887 return currentPiece.byteAt(currentPieceIndex++) & 0xFF; 888 } 889 } 890 891 @Override available()892 public int available() throws IOException { 893 return availableInternal(); 894 } 895 896 @Override markSupported()897 public boolean markSupported() { 898 return true; 899 } 900 901 @Override mark(int readAheadLimit)902 public void mark(int readAheadLimit) { 903 // Set the mark to our position in the byte string 904 mark = currentPieceOffsetInRope + currentPieceIndex; 905 } 906 907 @Override reset()908 public synchronized void reset() { 909 // Just reinitialize and skip the specified number of bytes. 910 initialize(); 911 readSkipInternal(null, 0, mark); 912 } 913 914 /** Common initialization code used by both the constructor and reset() */ initialize()915 private void initialize() { 916 pieceIterator = new PieceIterator(RopeByteString.this); 917 currentPiece = pieceIterator.next(); 918 currentPieceSize = currentPiece.size(); 919 currentPieceIndex = 0; 920 currentPieceOffsetInRope = 0; 921 } 922 923 /** 924 * Skips to the next piece if we have read all the data in the current piece. Sets currentPiece 925 * to null if we have reached the end of the input. 926 */ advanceIfCurrentPieceFullyRead()927 private void advanceIfCurrentPieceFullyRead() { 928 if (currentPiece != null && currentPieceIndex == currentPieceSize) { 929 // Generally, we can only go through this loop at most once, since 930 // empty strings can't end up in a rope. But better to test. 931 currentPieceOffsetInRope += currentPieceSize; 932 currentPieceIndex = 0; 933 if (pieceIterator.hasNext()) { 934 currentPiece = pieceIterator.next(); 935 currentPieceSize = currentPiece.size(); 936 } else { 937 currentPiece = null; 938 currentPieceSize = 0; 939 } 940 } 941 } 942 943 /** Computes the number of bytes still available to read. */ availableInternal()944 private int availableInternal() { 945 int bytesRead = currentPieceOffsetInRope + currentPieceIndex; 946 return RopeByteString.this.size() - bytesRead; 947 } 948 } 949 } 950