• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 package com.google.protobuf;
9 
10 import java.io.ByteArrayInputStream;
11 import java.io.IOException;
12 import java.io.InputStream;
13 import java.io.InvalidObjectException;
14 import java.io.ObjectInputStream;
15 import java.io.OutputStream;
16 import java.nio.ByteBuffer;
17 import java.nio.charset.Charset;
18 import java.util.ArrayDeque;
19 import java.util.ArrayList;
20 import java.util.Arrays;
21 import java.util.Iterator;
22 import java.util.List;
23 import java.util.NoSuchElementException;
24 
25 /**
26  * Class to represent {@code ByteStrings} formed by concatenation of other ByteStrings, without
27  * copying the data in the pieces. The concatenation is represented as a tree whose leaf nodes are
28  * each a {@link com.google.protobuf.ByteString.LeafByteString}.
29  *
30  * <p>Most of the operation here is inspired by the now-famous paper <a
31  * href="https://web.archive.org/web/20060202015456/http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
32  * BAP95 </a> Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and michael plass
33  *
34  * <p>The algorithms described in the paper have been implemented for character strings in {@code
35  * com.google.common.string.Rope} and in the c++ class {@code cord.cc}.
36  *
37  * <p>Fundamentally the Rope algorithm represents the collection of pieces as a binary tree. BAP95
38  * uses a Fibonacci bound relating depth to a minimum sequence length, sequences that are too short
39  * relative to their depth cause a tree rebalance. More precisely, a tree of depth d is "balanced"
40  * in the terminology of BAP95 if its length is at least F(d+2), where F(n) is the n-th Fibonacci
41  * number. Thus for depths 0, 1, 2, 3, 4, 5,... we have minimum lengths 1, 2, 3, 5, 8, 13,...
42  *
43  * @author carlanton@google.com (Carl Haverl)
44  */
45 final class RopeByteString extends ByteString {
46 
47   /**
48    * BAP95. Let Fn be the nth Fibonacci number. A {@link RopeByteString} of depth n is "balanced",
49    * i.e flat enough, if its length is at least Fn+2, e.g. a "balanced" {@link RopeByteString} of
50    * depth 1 must have length at least 2, of depth 4 must have length >= 8, etc.
51    *
52    * <p>There's nothing special about using the Fibonacci numbers for this, but they are a
53    * reasonable sequence for encapsulating the idea that we are OK with longer strings being encoded
54    * in deeper binary trees.
55    *
56    * <p>For 32-bit integers, this array has length 46.
57    *
58    * <p>The correctness of this constant array is validated in tests.
59    */
60   static final int[] minLengthByDepth = {
61     1,
62     1,
63     2,
64     3,
65     5,
66     8,
67     13,
68     21,
69     34,
70     55,
71     89,
72     144,
73     233,
74     377,
75     610,
76     987,
77     1597,
78     2584,
79     4181,
80     6765,
81     10946,
82     17711,
83     28657,
84     46368,
85     75025,
86     121393,
87     196418,
88     317811,
89     514229,
90     832040,
91     1346269,
92     2178309,
93     3524578,
94     5702887,
95     9227465,
96     14930352,
97     24157817,
98     39088169,
99     63245986,
100     102334155,
101     165580141,
102     267914296,
103     433494437,
104     701408733,
105     1134903170,
106     1836311903,
107     Integer.MAX_VALUE
108   };
109 
110   private final int totalLength;
111   private final ByteString left;
112   private final ByteString right;
113   private final int leftLength;
114   private final int treeDepth;
115 
116   /**
117    * Create a new RopeByteString, which can be thought of as a new tree node, by recording
118    * references to the two given strings.
119    *
120    * @param left string on the left of this node, should have {@code size() > 0}
121    * @param right string on the right of this node, should have {@code size() > 0}
122    */
RopeByteString(ByteString left, ByteString right)123   private RopeByteString(ByteString left, ByteString right) {
124     this.left = left;
125     this.right = right;
126     leftLength = left.size();
127     totalLength = leftLength + right.size();
128     treeDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
129   }
130 
131   /**
132    * Concatenate the given strings while performing various optimizations to slow the growth rate of
133    * tree depth and tree node count. The result is either a {@link
134    * com.google.protobuf.ByteString.LeafByteString} or a {@link RopeByteString} depending on which
135    * optimizations, if any, were applied.
136    *
137    * <p>Small pieces of length less than {@link ByteString#CONCATENATE_BY_COPY_SIZE} may be copied
138    * by value here, as in BAP95. Large pieces are referenced without copy.
139    *
140    * @param left string on the left
141    * @param right string on the right
142    * @return concatenation representing the same sequence as the given strings
143    */
concatenate(ByteString left, ByteString right)144   static ByteString concatenate(ByteString left, ByteString right) {
145     if (right.size() == 0) {
146       return left;
147     }
148 
149     if (left.size() == 0) {
150       return right;
151     }
152 
153     final int newLength = left.size() + right.size();
154     if (newLength < ByteString.CONCATENATE_BY_COPY_SIZE) {
155       // Optimization from BAP95: For short (leaves in paper, but just short
156       // here) total length, do a copy of data to a new leaf.
157       return concatenateBytes(left, right);
158     }
159 
160     if (left instanceof RopeByteString) {
161       final RopeByteString leftRope = (RopeByteString) left;
162       if (leftRope.right.size() + right.size() < CONCATENATE_BY_COPY_SIZE) {
163         // Optimization from BAP95: As an optimization of the case where the
164         // ByteString is constructed by repeated concatenate, recognize the case
165         // where a short string is concatenated to a left-hand node whose
166         // right-hand branch is short.  In the paper this applies to leaves, but
167         // we just look at the length here. This has the advantage of shedding
168         // references to unneeded data when substrings have been taken.
169         //
170         // When we recognize this case, we do a copy of the data and create a
171         // new parent node so that the depth of the result is the same as the
172         // given left tree.
173         ByteString newRight = concatenateBytes(leftRope.right, right);
174         return new RopeByteString(leftRope.left, newRight);
175       }
176 
177       if (leftRope.left.getTreeDepth() > leftRope.right.getTreeDepth()
178           && leftRope.getTreeDepth() > right.getTreeDepth()) {
179         // Typically for concatenate-built strings the left-side is deeper than
180         // the right.  This is our final attempt to concatenate without
181         // increasing the tree depth.  We'll redo the node on the RHS.  This
182         // is yet another optimization for building the string by repeatedly
183         // concatenating on the right.
184         ByteString newRight = new RopeByteString(leftRope.right, right);
185         return new RopeByteString(leftRope.left, newRight);
186       }
187     }
188 
189     // Fine, we'll add a node and increase the tree depth--unless we rebalance ;^)
190     int newDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
191     if (newLength >= minLength(newDepth)) {
192       // The tree is shallow enough, so don't rebalance
193       return new RopeByteString(left, right);
194     }
195 
196     return new Balancer().balance(left, right);
197   }
198 
199   /**
200    * Concatenates two strings by copying data values. This is called in a few cases in order to
201    * reduce the growth of the number of tree nodes.
202    *
203    * @param left string on the left
204    * @param right string on the right
205    * @return string formed by copying data bytes
206    */
concatenateBytes(ByteString left, ByteString right)207   private static ByteString concatenateBytes(ByteString left, ByteString right) {
208     int leftSize = left.size();
209     int rightSize = right.size();
210     byte[] bytes = new byte[leftSize + rightSize];
211     left.copyTo(bytes, 0, 0, leftSize);
212     right.copyTo(bytes, 0, leftSize, rightSize);
213     return ByteString.wrap(bytes); // Constructor wraps bytes
214   }
215 
216   /**
217    * Create a new RopeByteString for testing only while bypassing all the defenses of {@link
218    * #concatenate(ByteString, ByteString)}. This allows testing trees of specific structure. We are
219    * also able to insert empty leaves, though these are dis-allowed, so that we can make sure the
220    * implementation can withstand their presence.
221    *
222    * @param left string on the left of this node
223    * @param right string on the right of this node
224    * @return an unsafe instance for testing only
225    */
newInstanceForTest(ByteString left, ByteString right)226   static RopeByteString newInstanceForTest(ByteString left, ByteString right) {
227     return new RopeByteString(left, right);
228   }
229 
230   /**
231    * Returns the minimum length for which a tree of the given depth is considered balanced according
232    * to BAP95, which means the tree is flat-enough with respect to the bounds. Defaults to {@code
233    * Integer.MAX_VALUE} if {@code depth >= minLengthByDepth.length} in order to avoid an {@code
234    * ArrayIndexOutOfBoundsException}.
235    *
236    * @param depth tree depth
237    * @return minimum balanced length
238    */
minLength(int depth)239   static int minLength(int depth) {
240     if (depth >= minLengthByDepth.length) {
241       return Integer.MAX_VALUE;
242     }
243     return minLengthByDepth[depth];
244   }
245 
246   /**
247    * Gets the byte at the given index. Throws {@link ArrayIndexOutOfBoundsException} for
248    * backwards-compatibility reasons although it would more properly be {@link
249    * IndexOutOfBoundsException}.
250    *
251    * @param index index of byte
252    * @return the value
253    * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
254    */
255   @Override
byteAt(int index)256   public byte byteAt(int index) {
257     checkIndex(index, totalLength);
258     return internalByteAt(index);
259   }
260 
261   @Override
internalByteAt(int index)262   byte internalByteAt(int index) {
263     // Find the relevant piece by recursive descent
264     if (index < leftLength) {
265       return left.internalByteAt(index);
266     }
267 
268     return right.internalByteAt(index - leftLength);
269   }
270 
271   @Override
size()272   public int size() {
273     return totalLength;
274   }
275 
276   @Override
iterator()277   public ByteIterator iterator() {
278     return new AbstractByteIterator() {
279       final PieceIterator pieces = new PieceIterator(RopeByteString.this);
280       ByteIterator current = nextPiece();
281 
282       private ByteIterator nextPiece() {
283         // NOTE: PieceIterator is guaranteed to return non-empty pieces, so this method will always
284         // return non-empty iterators (or null)
285         return pieces.hasNext() ? pieces.next().iterator() : null;
286       }
287 
288       @Override
289       public boolean hasNext() {
290         return current != null;
291       }
292 
293       @Override
294       public byte nextByte() {
295         if (current == null) {
296           throw new NoSuchElementException();
297         }
298         byte b = current.nextByte();
299         if (!current.hasNext()) {
300           current = nextPiece();
301         }
302         return b;
303       }
304     };
305   }
306 
307   // =================================================================
308   // Pieces
309 
310   @Override
getTreeDepth()311   protected int getTreeDepth() {
312     return treeDepth;
313   }
314 
315   /**
316    * Determines if the tree is balanced according to BAP95, which means the tree is flat-enough with
317    * respect to the bounds. Note that this definition of balanced is one where sub-trees of balanced
318    * trees are not necessarily balanced.
319    *
320    * @return true if the tree is balanced
321    */
322   @Override
isBalanced()323   protected boolean isBalanced() {
324     return totalLength >= minLength(treeDepth);
325   }
326 
327   /**
328    * Takes a substring of this one. This involves recursive descent along the left and right edges
329    * of the substring, and referencing any wholly contained segments in between. Any leaf nodes
330    * entirely uninvolved in the substring will not be referenced by the substring.
331    *
332    * <p>Substrings of {@code length < 2} should result in at most a single recursive call chain,
333    * terminating at a leaf node. Thus the result will be a {@link
334    * com.google.protobuf.ByteString.LeafByteString}.
335    *
336    * @param beginIndex start at this index
337    * @param endIndex the last character is the one before this index
338    * @return substring leaf node or tree
339    */
340   @Override
substring(int beginIndex, int endIndex)341   public ByteString substring(int beginIndex, int endIndex) {
342     final int length = checkRange(beginIndex, endIndex, totalLength);
343 
344     if (length == 0) {
345       // Empty substring
346       return ByteString.EMPTY;
347     }
348 
349     if (length == totalLength) {
350       // The whole string
351       return this;
352     }
353 
354     // Proper substring
355     if (endIndex <= leftLength) {
356       // Substring on the left
357       return left.substring(beginIndex, endIndex);
358     }
359 
360     if (beginIndex >= leftLength) {
361       // Substring on the right
362       return right.substring(beginIndex - leftLength, endIndex - leftLength);
363     }
364 
365     // Split substring
366     ByteString leftSub = left.substring(beginIndex);
367     ByteString rightSub = right.substring(0, endIndex - leftLength);
368     // Intentionally not rebalancing, since in many cases these two
369     // substrings will already be less deep than the top-level
370     // RopeByteString we're taking a substring of.
371     return new RopeByteString(leftSub, rightSub);
372   }
373 
374   // =================================================================
375   // ByteString -> byte[]
376 
377   @Override
copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)378   protected void copyToInternal(
379       byte[] target, int sourceOffset, int targetOffset, int numberToCopy) {
380     if (sourceOffset + numberToCopy <= leftLength) {
381       left.copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
382     } else if (sourceOffset >= leftLength) {
383       right.copyToInternal(target, sourceOffset - leftLength, targetOffset, numberToCopy);
384     } else {
385       int leftLength = this.leftLength - sourceOffset;
386       left.copyToInternal(target, sourceOffset, targetOffset, leftLength);
387       right.copyToInternal(target, 0, targetOffset + leftLength, numberToCopy - leftLength);
388     }
389   }
390 
391   @Override
copyTo(ByteBuffer target)392   public void copyTo(ByteBuffer target) {
393     left.copyTo(target);
394     right.copyTo(target);
395   }
396 
397   @Override
asReadOnlyByteBuffer()398   public ByteBuffer asReadOnlyByteBuffer() {
399     ByteBuffer byteBuffer = ByteBuffer.wrap(toByteArray());
400     return byteBuffer.asReadOnlyBuffer();
401   }
402 
403   @Override
asReadOnlyByteBufferList()404   public List<ByteBuffer> asReadOnlyByteBufferList() {
405     // Walk through the list of LeafByteString's that make up this
406     // rope, and add each one as a read-only ByteBuffer.
407     List<ByteBuffer> result = new ArrayList<ByteBuffer>();
408     PieceIterator pieces = new PieceIterator(this);
409     while (pieces.hasNext()) {
410       LeafByteString byteString = pieces.next();
411       result.add(byteString.asReadOnlyByteBuffer());
412     }
413     return result;
414   }
415 
416   @Override
writeTo(OutputStream outputStream)417   public void writeTo(OutputStream outputStream) throws IOException {
418     left.writeTo(outputStream);
419     right.writeTo(outputStream);
420   }
421 
422   @Override
writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)423   void writeToInternal(OutputStream out, int sourceOffset, int numberToWrite) throws IOException {
424     if (sourceOffset + numberToWrite <= leftLength) {
425       left.writeToInternal(out, sourceOffset, numberToWrite);
426     } else if (sourceOffset >= leftLength) {
427       right.writeToInternal(out, sourceOffset - leftLength, numberToWrite);
428     } else {
429       int numberToWriteInLeft = leftLength - sourceOffset;
430       left.writeToInternal(out, sourceOffset, numberToWriteInLeft);
431       right.writeToInternal(out, 0, numberToWrite - numberToWriteInLeft);
432     }
433   }
434 
435   @Override
writeTo(ByteOutput output)436   void writeTo(ByteOutput output) throws IOException {
437     left.writeTo(output);
438     right.writeTo(output);
439   }
440 
441   @Override
writeToReverse(ByteOutput output)442   void writeToReverse(ByteOutput output) throws IOException {
443     right.writeToReverse(output);
444     left.writeToReverse(output);
445   }
446 
447   @Override
toStringInternal(Charset charset)448   protected String toStringInternal(Charset charset) {
449     return new String(toByteArray(), charset);
450   }
451 
452   // =================================================================
453   // UTF-8 decoding
454 
455   @Override
isValidUtf8()456   public boolean isValidUtf8() {
457     int leftPartial = left.partialIsValidUtf8(Utf8.COMPLETE, 0, leftLength);
458     int state = right.partialIsValidUtf8(leftPartial, 0, right.size());
459     return state == Utf8.COMPLETE;
460   }
461 
462   @Override
partialIsValidUtf8(int state, int offset, int length)463   protected int partialIsValidUtf8(int state, int offset, int length) {
464     int toIndex = offset + length;
465     if (toIndex <= leftLength) {
466       return left.partialIsValidUtf8(state, offset, length);
467     } else if (offset >= leftLength) {
468       return right.partialIsValidUtf8(state, offset - leftLength, length);
469     } else {
470       int leftLength = this.leftLength - offset;
471       int leftPartial = left.partialIsValidUtf8(state, offset, leftLength);
472       return right.partialIsValidUtf8(leftPartial, 0, length - leftLength);
473     }
474   }
475 
476   // =================================================================
477   // equals() and hashCode()
478 
479   @Override
equals(Object other)480   public boolean equals(Object other) {
481     if (other == this) {
482       return true;
483     }
484     if (!(other instanceof ByteString)) {
485       return false;
486     }
487 
488     ByteString otherByteString = (ByteString) other;
489     if (totalLength != otherByteString.size()) {
490       return false;
491     }
492     if (totalLength == 0) {
493       return true;
494     }
495 
496     // You don't really want to be calling equals on long strings, but since
497     // we cache the hashCode, we effectively cache inequality. We use the cached
498     // hashCode if it's already computed.  It's arguable we should compute the
499     // hashCode here, and if we're going to be testing a bunch of byteStrings,
500     // it might even make sense.
501     int thisHash = peekCachedHashCode();
502     int thatHash = otherByteString.peekCachedHashCode();
503     if (thisHash != 0 && thatHash != 0 && thisHash != thatHash) {
504       return false;
505     }
506 
507     return equalsFragments(otherByteString);
508   }
509 
510   /**
511    * Determines if this string is equal to another of the same length by iterating over the leaf
512    * nodes. On each step of the iteration, the overlapping segments of the leaves are compared.
513    *
514    * @param other string of the same length as this one
515    * @return true if the values of this string equals the value of the given one
516    */
equalsFragments(ByteString other)517   private boolean equalsFragments(ByteString other) {
518     int thisOffset = 0;
519     Iterator<LeafByteString> thisIter = new PieceIterator(this);
520     LeafByteString thisString = thisIter.next();
521 
522     int thatOffset = 0;
523     Iterator<LeafByteString> thatIter = new PieceIterator(other);
524     LeafByteString thatString = thatIter.next();
525 
526     int pos = 0;
527     while (true) {
528       int thisRemaining = thisString.size() - thisOffset;
529       int thatRemaining = thatString.size() - thatOffset;
530       int bytesToCompare = Math.min(thisRemaining, thatRemaining);
531 
532       // At least one of the offsets will be zero
533       boolean stillEqual =
534           (thisOffset == 0)
535               ? thisString.equalsRange(thatString, thatOffset, bytesToCompare)
536               : thatString.equalsRange(thisString, thisOffset, bytesToCompare);
537       if (!stillEqual) {
538         return false;
539       }
540 
541       pos += bytesToCompare;
542       if (pos >= totalLength) {
543         if (pos == totalLength) {
544           return true;
545         }
546         throw new IllegalStateException();
547       }
548       // We always get to the end of at least one of the pieces
549       if (bytesToCompare == thisRemaining) { // If reached end of this
550         thisOffset = 0;
551         thisString = thisIter.next();
552       } else {
553         thisOffset += bytesToCompare;
554       }
555       if (bytesToCompare == thatRemaining) { // If reached end of that
556         thatOffset = 0;
557         thatString = thatIter.next();
558       } else {
559         thatOffset += bytesToCompare;
560       }
561     }
562   }
563 
564   @Override
partialHash(int h, int offset, int length)565   protected int partialHash(int h, int offset, int length) {
566     int toIndex = offset + length;
567     if (toIndex <= leftLength) {
568       return left.partialHash(h, offset, length);
569     } else if (offset >= leftLength) {
570       return right.partialHash(h, offset - leftLength, length);
571     } else {
572       int leftLength = this.leftLength - offset;
573       int leftPartial = left.partialHash(h, offset, leftLength);
574       return right.partialHash(leftPartial, 0, length - leftLength);
575     }
576   }
577 
578   // =================================================================
579   // Input stream
580 
581   @Override
newCodedInput()582   public CodedInputStream newCodedInput() {
583     // Passing along direct references to internal ByteBuffers can support more efficient parsing
584     // via aliasing in CodedInputStream for users who wish to use it.
585     //
586     // Otherwise we force data copies, both in copying as an input stream and in buffering in the
587     // CodedInputSteam.
588     return CodedInputStream.newInstance(asReadOnlyByteBufferList(), /* bufferIsImmutable= */ true);
589   }
590 
591   @Override
newInput()592   public InputStream newInput() {
593     return new RopeInputStream();
594   }
595 
596   /**
597    * This class implements the balancing algorithm of BAP95. In the paper the authors use an array
598    * to keep track of pieces, while here we use a stack. The tree is balanced by traversing subtrees
599    * in left to right order, and the stack always contains the part of the string we've traversed so
600    * far.
601    *
602    * <p>One surprising aspect of the algorithm is the result of balancing is not necessarily
603    * balanced, though it is nearly balanced. For details, see BAP95.
604    */
605   private static class Balancer {
606     // Stack containing the part of the string, starting from the left, that
607     // we've already traversed.  The final string should be the equivalent of
608     // concatenating the strings on the stack from bottom to top.
609     private final ArrayDeque<ByteString> prefixesStack = new ArrayDeque<>();
610 
balance(ByteString left, ByteString right)611     private ByteString balance(ByteString left, ByteString right) {
612       doBalance(left);
613       doBalance(right);
614 
615       // Sweep stack to gather the result
616       ByteString partialString = prefixesStack.pop();
617       while (!prefixesStack.isEmpty()) {
618         ByteString newLeft = prefixesStack.pop();
619         partialString = new RopeByteString(newLeft, partialString);
620       }
621       // We should end up with a RopeByteString since at a minimum we will
622       // create one from concatenating left and right
623       return partialString;
624     }
625 
doBalance(ByteString root)626     private void doBalance(ByteString root) {
627       // BAP95: Insert balanced subtrees whole. This means the result might not
628       // be balanced, leading to repeated rebalancings on concatenate. However,
629       // these rebalancings are shallow due to ignoring balanced subtrees, and
630       // relatively few calls to insert() result.
631       if (root.isBalanced()) {
632         insert(root);
633       } else if (root instanceof RopeByteString) {
634         RopeByteString rbs = (RopeByteString) root;
635         doBalance(rbs.left);
636         doBalance(rbs.right);
637       } else {
638         throw new IllegalArgumentException(
639             "Has a new type of ByteString been created? Found " + root.getClass());
640       }
641     }
642 
643     /**
644      * Push a string on the balance stack (BAP95). BAP95 uses an array and calls the elements in the
645      * array 'bins'. We instead use a stack, so the 'bins' of lengths are represented by differences
646      * between the elements of minLengthByDepth.
647      *
648      * <p>If the length bin for our string, and all shorter length bins, are empty, we just push it
649      * on the stack. Otherwise, we need to start concatenating, putting the given string in the
650      * "middle" and continuing until we land in an empty length bin that matches the length of our
651      * concatenation.
652      *
653      * @param byteString string to place on the balance stack
654      */
insert(ByteString byteString)655     private void insert(ByteString byteString) {
656       int depthBin = getDepthBinForLength(byteString.size());
657       int binEnd = minLength(depthBin + 1);
658 
659       // BAP95: Concatenate all trees occupying bins representing the length of
660       // our new piece or of shorter pieces, to the extent that is possible.
661       // The goal is to clear the bin which our piece belongs in, but that may
662       // not be entirely possible if there aren't enough longer bins occupied.
663       if (prefixesStack.isEmpty() || prefixesStack.peek().size() >= binEnd) {
664         prefixesStack.push(byteString);
665       } else {
666         int binStart = minLength(depthBin);
667 
668         // Concatenate the subtrees of shorter length
669         ByteString newTree = prefixesStack.pop();
670         while (!prefixesStack.isEmpty() && prefixesStack.peek().size() < binStart) {
671           ByteString left = prefixesStack.pop();
672           newTree = new RopeByteString(left, newTree);
673         }
674 
675         // Concatenate the given string
676         newTree = new RopeByteString(newTree, byteString);
677 
678         // Continue concatenating until we land in an empty bin
679         while (!prefixesStack.isEmpty()) {
680           depthBin = getDepthBinForLength(newTree.size());
681           binEnd = minLength(depthBin + 1);
682           if (prefixesStack.peek().size() < binEnd) {
683             ByteString left = prefixesStack.pop();
684             newTree = new RopeByteString(left, newTree);
685           } else {
686             break;
687           }
688         }
689         prefixesStack.push(newTree);
690       }
691     }
692 
getDepthBinForLength(int length)693     private int getDepthBinForLength(int length) {
694       int depth = Arrays.binarySearch(minLengthByDepth, length);
695       if (depth < 0) {
696         // It wasn't an exact match, so convert to the index of the containing
697         // fragment, which is one less even than the insertion point.
698         int insertionPoint = -(depth + 1);
699         depth = insertionPoint - 1;
700       }
701 
702       return depth;
703     }
704   }
705 
706   /**
707    * This class is a continuable tree traversal, which keeps the state information which would exist
708    * on the stack in a recursive traversal instead on a stack of "Bread Crumbs". The maximum depth
709    * of the stack in this iterator is the same as the depth of the tree being traversed.
710    *
711    * <p>This iterator is used to implement {@link RopeByteString#equalsFragments(ByteString)}.
712    */
713   private static final class PieceIterator implements Iterator<LeafByteString> {
714     private final ArrayDeque<RopeByteString> breadCrumbs;
715     private LeafByteString next;
716 
PieceIterator(ByteString root)717     private PieceIterator(ByteString root) {
718       if (root instanceof RopeByteString) {
719         RopeByteString rbs = (RopeByteString) root;
720         breadCrumbs = new ArrayDeque<>(rbs.getTreeDepth());
721         breadCrumbs.push(rbs);
722         next = getLeafByLeft(rbs.left);
723       } else {
724         breadCrumbs = null;
725         next = (LeafByteString) root;
726       }
727     }
728 
getLeafByLeft(ByteString root)729     private LeafByteString getLeafByLeft(ByteString root) {
730       ByteString pos = root;
731       while (pos instanceof RopeByteString) {
732         RopeByteString rbs = (RopeByteString) pos;
733         breadCrumbs.push(rbs);
734         pos = rbs.left;
735       }
736       return (LeafByteString) pos;
737     }
738 
getNextNonEmptyLeaf()739     private LeafByteString getNextNonEmptyLeaf() {
740       while (true) {
741         // Almost always, we go through this loop exactly once.  However, if
742         // we discover an empty string in the rope, we toss it and try again.
743         if (breadCrumbs == null || breadCrumbs.isEmpty()) {
744           return null;
745         } else {
746           LeafByteString result = getLeafByLeft(breadCrumbs.pop().right);
747           if (!result.isEmpty()) {
748             return result;
749           }
750         }
751       }
752     }
753 
754     @Override
hasNext()755     public boolean hasNext() {
756       return next != null;
757     }
758 
759     /**
760      * Returns the next item and advances one {@link com.google.protobuf.ByteString.LeafByteString}.
761      *
762      * @return next non-empty LeafByteString or {@code null}
763      */
764     @Override
next()765     public LeafByteString next() {
766       if (next == null) {
767         throw new NoSuchElementException();
768       }
769       LeafByteString result = next;
770       next = getNextNonEmptyLeaf();
771       return result;
772     }
773 
774     @Override
remove()775     public void remove() {
776       throw new UnsupportedOperationException();
777     }
778   }
779 
780   // =================================================================
781   // Serializable
782 
783   private static final long serialVersionUID = 1L;
784 
writeReplace()785   Object writeReplace() {
786     return ByteString.wrap(toByteArray());
787   }
788 
readObject(@uppressWarnings"unused") ObjectInputStream in)789   private void readObject(@SuppressWarnings("unused") ObjectInputStream in) throws IOException {
790     throw new InvalidObjectException("RopeByteStream instances are not to be serialized directly");
791   }
792 
793   /** This class is the {@link RopeByteString} equivalent for {@link ByteArrayInputStream}. */
794   private class RopeInputStream extends InputStream {
795     // Iterates through the pieces of the rope
796     private PieceIterator pieceIterator;
797     // The current piece
798     private LeafByteString currentPiece;
799     // The size of the current piece
800     private int currentPieceSize;
801     // The index of the next byte to read in the current piece
802     private int currentPieceIndex;
803     // The offset of the start of the current piece in the rope byte string
804     private int currentPieceOffsetInRope;
805     // Offset in the buffer at which user called mark();
806     private int mark;
807 
RopeInputStream()808     public RopeInputStream() {
809       initialize();
810     }
811 
812     /**
813      * Reads up to {@code len} bytes of data into array {@code b}.
814      *
815      * <p>Note that {@link InputStream#read(byte[], int, int)} and {@link
816      * ByteArrayInputStream#read(byte[], int, int)} behave inconsistently when reading 0 bytes at
817      * EOF; the interface defines the return value to be 0 and the latter returns -1. We use the
818      * latter behavior so that all ByteString streams are consistent.
819      *
820      * @return -1 if at EOF, otherwise the actual number of bytes read.
821      */
822     @Override
read(byte[] b, int offset, int length)823     public int read(byte[] b, int offset, int length) {
824       if (b == null) {
825         throw new NullPointerException();
826       } else if (offset < 0 || length < 0 || length > b.length - offset) {
827         throw new IndexOutOfBoundsException();
828       }
829       int bytesRead = readSkipInternal(b, offset, length);
830       if (bytesRead == 0 && (length > 0 || availableInternal() == 0)) {
831         // Modeling ByteArrayInputStream.read(byte[], int, int) behavior noted above:
832         // It's ok to read 0 bytes on purpose (length == 0) from a stream that isn't at EOF.
833         // It's not ok to try to read bytes (even 0 bytes) from a stream that is at EOF.
834         return -1;
835       } else {
836         return bytesRead;
837       }
838     }
839 
840     @Override
skip(long length)841     public long skip(long length) {
842       if (length < 0) {
843         throw new IndexOutOfBoundsException();
844       } else if (length > Integer.MAX_VALUE) {
845         length = Integer.MAX_VALUE;
846       }
847       return readSkipInternal(null, 0, (int) length);
848     }
849 
850     /**
851      * Internal implementation of read and skip. If b != null, then read the next {@code length}
852      * bytes into the buffer {@code b} at offset {@code offset}. If b == null, then skip the next
853      * {@code length} bytes.
854      *
855      * <p>This method assumes that all error checking has already happened.
856      *
857      * <p>Returns the actual number of bytes read or skipped.
858      */
readSkipInternal(byte[] b, int offset, int length)859     private int readSkipInternal(byte[] b, int offset, int length) {
860       int bytesRemaining = length;
861       while (bytesRemaining > 0) {
862         advanceIfCurrentPieceFullyRead();
863         if (currentPiece == null) {
864           break;
865         } else {
866           // Copy the bytes from this piece.
867           int currentPieceRemaining = currentPieceSize - currentPieceIndex;
868           int count = Math.min(currentPieceRemaining, bytesRemaining);
869           if (b != null) {
870             currentPiece.copyTo(b, currentPieceIndex, offset, count);
871             offset += count;
872           }
873           currentPieceIndex += count;
874           bytesRemaining -= count;
875         }
876       }
877       // Return the number of bytes read.
878       return length - bytesRemaining;
879     }
880 
881     @Override
read()882     public int read() throws IOException {
883       advanceIfCurrentPieceFullyRead();
884       if (currentPiece == null) {
885         return -1;
886       } else {
887         return currentPiece.byteAt(currentPieceIndex++) & 0xFF;
888       }
889     }
890 
891     @Override
available()892     public int available() throws IOException {
893       return availableInternal();
894     }
895 
896     @Override
markSupported()897     public boolean markSupported() {
898       return true;
899     }
900 
901     @Override
mark(int readAheadLimit)902     public void mark(int readAheadLimit) {
903       // Set the mark to our position in the byte string
904       mark = currentPieceOffsetInRope + currentPieceIndex;
905     }
906 
907     @Override
reset()908     public synchronized void reset() {
909       // Just reinitialize and skip the specified number of bytes.
910       initialize();
911       readSkipInternal(null, 0, mark);
912     }
913 
914     /** Common initialization code used by both the constructor and reset() */
initialize()915     private void initialize() {
916       pieceIterator = new PieceIterator(RopeByteString.this);
917       currentPiece = pieceIterator.next();
918       currentPieceSize = currentPiece.size();
919       currentPieceIndex = 0;
920       currentPieceOffsetInRope = 0;
921     }
922 
923     /**
924      * Skips to the next piece if we have read all the data in the current piece. Sets currentPiece
925      * to null if we have reached the end of the input.
926      */
advanceIfCurrentPieceFullyRead()927     private void advanceIfCurrentPieceFullyRead() {
928       if (currentPiece != null && currentPieceIndex == currentPieceSize) {
929         // Generally, we can only go through this loop at most once, since
930         // empty strings can't end up in a rope.  But better to test.
931         currentPieceOffsetInRope += currentPieceSize;
932         currentPieceIndex = 0;
933         if (pieceIterator.hasNext()) {
934           currentPiece = pieceIterator.next();
935           currentPieceSize = currentPiece.size();
936         } else {
937           currentPiece = null;
938           currentPieceSize = 0;
939         }
940       }
941     }
942 
943     /** Computes the number of bytes still available to read. */
availableInternal()944     private int availableInternal() {
945       int bytesRead = currentPieceOffsetInRope + currentPieceIndex;
946       return RopeByteString.this.size() - bytesRead;
947     }
948   }
949 }
950