• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 package com.google.protobuf;
32 
33 import java.io.ByteArrayInputStream;
34 import java.io.IOException;
35 import java.io.InputStream;
36 import java.io.InvalidObjectException;
37 import java.io.ObjectInputStream;
38 import java.io.OutputStream;
39 import java.nio.ByteBuffer;
40 import java.nio.charset.Charset;
41 import java.util.ArrayDeque;
42 import java.util.ArrayList;
43 import java.util.Arrays;
44 import java.util.Iterator;
45 import java.util.List;
46 import java.util.NoSuchElementException;
47 
48 /**
49  * Class to represent {@code ByteStrings} formed by concatenation of other ByteStrings, without
50  * copying the data in the pieces. The concatenation is represented as a tree whose leaf nodes are
51  * each a {@link com.google.protobuf.ByteString.LeafByteString}.
52  *
53  * <p>Most of the operation here is inspired by the now-famous paper <a
54  * href="https://web.archive.org/web/20060202015456/http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
55  * BAP95 </a> Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and michael plass
56  *
57  * <p>The algorithms described in the paper have been implemented for character strings in {@code
58  * com.google.common.string.Rope} and in the c++ class {@code cord.cc}.
59  *
60  * <p>Fundamentally the Rope algorithm represents the collection of pieces as a binary tree. BAP95
61  * uses a Fibonacci bound relating depth to a minimum sequence length, sequences that are too short
62  * relative to their depth cause a tree rebalance. More precisely, a tree of depth d is "balanced"
63  * in the terminology of BAP95 if its length is at least F(d+2), where F(n) is the n-th Fibonacci
64  * number. Thus for depths 0, 1, 2, 3, 4, 5,... we have minimum lengths 1, 2, 3, 5, 8, 13,...
65  *
66  * @author carlanton@google.com (Carl Haverl)
67  */
68 final class RopeByteString extends ByteString {
69 
70   /**
71    * BAP95. Let Fn be the nth Fibonacci number. A {@link RopeByteString} of depth n is "balanced",
72    * i.e flat enough, if its length is at least Fn+2, e.g. a "balanced" {@link RopeByteString} of
73    * depth 1 must have length at least 2, of depth 4 must have length >= 8, etc.
74    *
75    * <p>There's nothing special about using the Fibonacci numbers for this, but they are a
76    * reasonable sequence for encapsulating the idea that we are OK with longer strings being encoded
77    * in deeper binary trees.
78    *
79    * <p>For 32-bit integers, this array has length 46.
80    *
81    * <p>The correctness of this constant array is validated in tests.
82    */
83   static final int[] minLengthByDepth = {
84     1,
85     1,
86     2,
87     3,
88     5,
89     8,
90     13,
91     21,
92     34,
93     55,
94     89,
95     144,
96     233,
97     377,
98     610,
99     987,
100     1597,
101     2584,
102     4181,
103     6765,
104     10946,
105     17711,
106     28657,
107     46368,
108     75025,
109     121393,
110     196418,
111     317811,
112     514229,
113     832040,
114     1346269,
115     2178309,
116     3524578,
117     5702887,
118     9227465,
119     14930352,
120     24157817,
121     39088169,
122     63245986,
123     102334155,
124     165580141,
125     267914296,
126     433494437,
127     701408733,
128     1134903170,
129     1836311903,
130     Integer.MAX_VALUE
131   };
132 
133   private final int totalLength;
134   private final ByteString left;
135   private final ByteString right;
136   private final int leftLength;
137   private final int treeDepth;
138 
139   /**
140    * Create a new RopeByteString, which can be thought of as a new tree node, by recording
141    * references to the two given strings.
142    *
143    * @param left string on the left of this node, should have {@code size() > 0}
144    * @param right string on the right of this node, should have {@code size() > 0}
145    */
RopeByteString(ByteString left, ByteString right)146   private RopeByteString(ByteString left, ByteString right) {
147     this.left = left;
148     this.right = right;
149     leftLength = left.size();
150     totalLength = leftLength + right.size();
151     treeDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
152   }
153 
154   /**
155    * Concatenate the given strings while performing various optimizations to slow the growth rate of
156    * tree depth and tree node count. The result is either a {@link
157    * com.google.protobuf.ByteString.LeafByteString} or a {@link RopeByteString} depending on which
158    * optimizations, if any, were applied.
159    *
160    * <p>Small pieces of length less than {@link ByteString#CONCATENATE_BY_COPY_SIZE} may be copied
161    * by value here, as in BAP95. Large pieces are referenced without copy.
162    *
163    * @param left string on the left
164    * @param right string on the right
165    * @return concatenation representing the same sequence as the given strings
166    */
concatenate(ByteString left, ByteString right)167   static ByteString concatenate(ByteString left, ByteString right) {
168     if (right.size() == 0) {
169       return left;
170     }
171 
172     if (left.size() == 0) {
173       return right;
174     }
175 
176     final int newLength = left.size() + right.size();
177     if (newLength < ByteString.CONCATENATE_BY_COPY_SIZE) {
178       // Optimization from BAP95: For short (leaves in paper, but just short
179       // here) total length, do a copy of data to a new leaf.
180       return concatenateBytes(left, right);
181     }
182 
183     if (left instanceof RopeByteString) {
184       final RopeByteString leftRope = (RopeByteString) left;
185       if (leftRope.right.size() + right.size() < CONCATENATE_BY_COPY_SIZE) {
186         // Optimization from BAP95: As an optimization of the case where the
187         // ByteString is constructed by repeated concatenate, recognize the case
188         // where a short string is concatenated to a left-hand node whose
189         // right-hand branch is short.  In the paper this applies to leaves, but
190         // we just look at the length here. This has the advantage of shedding
191         // references to unneeded data when substrings have been taken.
192         //
193         // When we recognize this case, we do a copy of the data and create a
194         // new parent node so that the depth of the result is the same as the
195         // given left tree.
196         ByteString newRight = concatenateBytes(leftRope.right, right);
197         return new RopeByteString(leftRope.left, newRight);
198       }
199 
200       if (leftRope.left.getTreeDepth() > leftRope.right.getTreeDepth()
201           && leftRope.getTreeDepth() > right.getTreeDepth()) {
202         // Typically for concatenate-built strings the left-side is deeper than
203         // the right.  This is our final attempt to concatenate without
204         // increasing the tree depth.  We'll redo the node on the RHS.  This
205         // is yet another optimization for building the string by repeatedly
206         // concatenating on the right.
207         ByteString newRight = new RopeByteString(leftRope.right, right);
208         return new RopeByteString(leftRope.left, newRight);
209       }
210     }
211 
212     // Fine, we'll add a node and increase the tree depth--unless we rebalance ;^)
213     int newDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
214     if (newLength >= minLength(newDepth)) {
215       // The tree is shallow enough, so don't rebalance
216       return new RopeByteString(left, right);
217     }
218 
219     return new Balancer().balance(left, right);
220   }
221 
222   /**
223    * Concatenates two strings by copying data values. This is called in a few cases in order to
224    * reduce the growth of the number of tree nodes.
225    *
226    * @param left string on the left
227    * @param right string on the right
228    * @return string formed by copying data bytes
229    */
concatenateBytes(ByteString left, ByteString right)230   private static ByteString concatenateBytes(ByteString left, ByteString right) {
231     int leftSize = left.size();
232     int rightSize = right.size();
233     byte[] bytes = new byte[leftSize + rightSize];
234     left.copyTo(bytes, 0, 0, leftSize);
235     right.copyTo(bytes, 0, leftSize, rightSize);
236     return ByteString.wrap(bytes); // Constructor wraps bytes
237   }
238 
239   /**
240    * Create a new RopeByteString for testing only while bypassing all the defenses of {@link
241    * #concatenate(ByteString, ByteString)}. This allows testing trees of specific structure. We are
242    * also able to insert empty leaves, though these are dis-allowed, so that we can make sure the
243    * implementation can withstand their presence.
244    *
245    * @param left string on the left of this node
246    * @param right string on the right of this node
247    * @return an unsafe instance for testing only
248    */
newInstanceForTest(ByteString left, ByteString right)249   static RopeByteString newInstanceForTest(ByteString left, ByteString right) {
250     return new RopeByteString(left, right);
251   }
252 
253   /**
254    * Returns the minimum length for which a tree of the given depth is considered balanced according
255    * to BAP95, which means the tree is flat-enough with respect to the bounds. Defaults to {@code
256    * Integer.MAX_VALUE} if {@code depth >= minLengthByDepth.length} in order to avoid an {@code
257    * ArrayIndexOutOfBoundsException}.
258    *
259    * @param depth tree depth
260    * @return minimum balanced length
261    */
minLength(int depth)262   static int minLength(int depth) {
263     if (depth >= minLengthByDepth.length) {
264       return Integer.MAX_VALUE;
265     }
266     return minLengthByDepth[depth];
267   }
268 
269   /**
270    * Gets the byte at the given index. Throws {@link ArrayIndexOutOfBoundsException} for
271    * backwards-compatibility reasons although it would more properly be {@link
272    * IndexOutOfBoundsException}.
273    *
274    * @param index index of byte
275    * @return the value
276    * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
277    */
278   @Override
byteAt(int index)279   public byte byteAt(int index) {
280     checkIndex(index, totalLength);
281     return internalByteAt(index);
282   }
283 
284   @Override
internalByteAt(int index)285   byte internalByteAt(int index) {
286     // Find the relevant piece by recursive descent
287     if (index < leftLength) {
288       return left.internalByteAt(index);
289     }
290 
291     return right.internalByteAt(index - leftLength);
292   }
293 
294   @Override
size()295   public int size() {
296     return totalLength;
297   }
298 
299   @Override
iterator()300   public ByteIterator iterator() {
301     return new AbstractByteIterator() {
302       final PieceIterator pieces = new PieceIterator(RopeByteString.this);
303       ByteIterator current = nextPiece();
304 
305       private ByteIterator nextPiece() {
306         // NOTE: PieceIterator is guaranteed to return non-empty pieces, so this method will always
307         // return non-empty iterators (or null)
308         return pieces.hasNext() ? pieces.next().iterator() : null;
309       }
310 
311       @Override
312       public boolean hasNext() {
313         return current != null;
314       }
315 
316       @Override
317       public byte nextByte() {
318         if (current == null) {
319           throw new NoSuchElementException();
320         }
321         byte b = current.nextByte();
322         if (!current.hasNext()) {
323           current = nextPiece();
324         }
325         return b;
326       }
327     };
328   }
329 
330   // =================================================================
331   // Pieces
332 
333   @Override
getTreeDepth()334   protected int getTreeDepth() {
335     return treeDepth;
336   }
337 
338   /**
339    * Determines if the tree is balanced according to BAP95, which means the tree is flat-enough with
340    * respect to the bounds. Note that this definition of balanced is one where sub-trees of balanced
341    * trees are not necessarily balanced.
342    *
343    * @return true if the tree is balanced
344    */
345   @Override
isBalanced()346   protected boolean isBalanced() {
347     return totalLength >= minLength(treeDepth);
348   }
349 
350   /**
351    * Takes a substring of this one. This involves recursive descent along the left and right edges
352    * of the substring, and referencing any wholly contained segments in between. Any leaf nodes
353    * entirely uninvolved in the substring will not be referenced by the substring.
354    *
355    * <p>Substrings of {@code length < 2} should result in at most a single recursive call chain,
356    * terminating at a leaf node. Thus the result will be a {@link
357    * com.google.protobuf.ByteString.LeafByteString}.
358    *
359    * @param beginIndex start at this index
360    * @param endIndex the last character is the one before this index
361    * @return substring leaf node or tree
362    */
363   @Override
substring(int beginIndex, int endIndex)364   public ByteString substring(int beginIndex, int endIndex) {
365     final int length = checkRange(beginIndex, endIndex, totalLength);
366 
367     if (length == 0) {
368       // Empty substring
369       return ByteString.EMPTY;
370     }
371 
372     if (length == totalLength) {
373       // The whole string
374       return this;
375     }
376 
377     // Proper substring
378     if (endIndex <= leftLength) {
379       // Substring on the left
380       return left.substring(beginIndex, endIndex);
381     }
382 
383     if (beginIndex >= leftLength) {
384       // Substring on the right
385       return right.substring(beginIndex - leftLength, endIndex - leftLength);
386     }
387 
388     // Split substring
389     ByteString leftSub = left.substring(beginIndex);
390     ByteString rightSub = right.substring(0, endIndex - leftLength);
391     // Intentionally not rebalancing, since in many cases these two
392     // substrings will already be less deep than the top-level
393     // RopeByteString we're taking a substring of.
394     return new RopeByteString(leftSub, rightSub);
395   }
396 
397   // =================================================================
398   // ByteString -> byte[]
399 
400   @Override
copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)401   protected void copyToInternal(
402       byte[] target, int sourceOffset, int targetOffset, int numberToCopy) {
403     if (sourceOffset + numberToCopy <= leftLength) {
404       left.copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
405     } else if (sourceOffset >= leftLength) {
406       right.copyToInternal(target, sourceOffset - leftLength, targetOffset, numberToCopy);
407     } else {
408       int leftLength = this.leftLength - sourceOffset;
409       left.copyToInternal(target, sourceOffset, targetOffset, leftLength);
410       right.copyToInternal(target, 0, targetOffset + leftLength, numberToCopy - leftLength);
411     }
412   }
413 
414   @Override
copyTo(ByteBuffer target)415   public void copyTo(ByteBuffer target) {
416     left.copyTo(target);
417     right.copyTo(target);
418   }
419 
420   @Override
asReadOnlyByteBuffer()421   public ByteBuffer asReadOnlyByteBuffer() {
422     ByteBuffer byteBuffer = ByteBuffer.wrap(toByteArray());
423     return byteBuffer.asReadOnlyBuffer();
424   }
425 
426   @Override
asReadOnlyByteBufferList()427   public List<ByteBuffer> asReadOnlyByteBufferList() {
428     // Walk through the list of LeafByteString's that make up this
429     // rope, and add each one as a read-only ByteBuffer.
430     List<ByteBuffer> result = new ArrayList<ByteBuffer>();
431     PieceIterator pieces = new PieceIterator(this);
432     while (pieces.hasNext()) {
433       LeafByteString byteString = pieces.next();
434       result.add(byteString.asReadOnlyByteBuffer());
435     }
436     return result;
437   }
438 
439   @Override
writeTo(OutputStream outputStream)440   public void writeTo(OutputStream outputStream) throws IOException {
441     left.writeTo(outputStream);
442     right.writeTo(outputStream);
443   }
444 
445   @Override
writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)446   void writeToInternal(OutputStream out, int sourceOffset, int numberToWrite) throws IOException {
447     if (sourceOffset + numberToWrite <= leftLength) {
448       left.writeToInternal(out, sourceOffset, numberToWrite);
449     } else if (sourceOffset >= leftLength) {
450       right.writeToInternal(out, sourceOffset - leftLength, numberToWrite);
451     } else {
452       int numberToWriteInLeft = leftLength - sourceOffset;
453       left.writeToInternal(out, sourceOffset, numberToWriteInLeft);
454       right.writeToInternal(out, 0, numberToWrite - numberToWriteInLeft);
455     }
456   }
457 
458   @Override
writeTo(ByteOutput output)459   void writeTo(ByteOutput output) throws IOException {
460     left.writeTo(output);
461     right.writeTo(output);
462   }
463 
464   @Override
writeToReverse(ByteOutput output)465   void writeToReverse(ByteOutput output) throws IOException {
466     right.writeToReverse(output);
467     left.writeToReverse(output);
468   }
469 
470   @Override
toStringInternal(Charset charset)471   protected String toStringInternal(Charset charset) {
472     return new String(toByteArray(), charset);
473   }
474 
475   // =================================================================
476   // UTF-8 decoding
477 
478   @Override
isValidUtf8()479   public boolean isValidUtf8() {
480     int leftPartial = left.partialIsValidUtf8(Utf8.COMPLETE, 0, leftLength);
481     int state = right.partialIsValidUtf8(leftPartial, 0, right.size());
482     return state == Utf8.COMPLETE;
483   }
484 
485   @Override
partialIsValidUtf8(int state, int offset, int length)486   protected int partialIsValidUtf8(int state, int offset, int length) {
487     int toIndex = offset + length;
488     if (toIndex <= leftLength) {
489       return left.partialIsValidUtf8(state, offset, length);
490     } else if (offset >= leftLength) {
491       return right.partialIsValidUtf8(state, offset - leftLength, length);
492     } else {
493       int leftLength = this.leftLength - offset;
494       int leftPartial = left.partialIsValidUtf8(state, offset, leftLength);
495       return right.partialIsValidUtf8(leftPartial, 0, length - leftLength);
496     }
497   }
498 
499   // =================================================================
500   // equals() and hashCode()
501 
502   @Override
equals(Object other)503   public boolean equals(Object other) {
504     if (other == this) {
505       return true;
506     }
507     if (!(other instanceof ByteString)) {
508       return false;
509     }
510 
511     ByteString otherByteString = (ByteString) other;
512     if (totalLength != otherByteString.size()) {
513       return false;
514     }
515     if (totalLength == 0) {
516       return true;
517     }
518 
519     // You don't really want to be calling equals on long strings, but since
520     // we cache the hashCode, we effectively cache inequality. We use the cached
521     // hashCode if it's already computed.  It's arguable we should compute the
522     // hashCode here, and if we're going to be testing a bunch of byteStrings,
523     // it might even make sense.
524     int thisHash = peekCachedHashCode();
525     int thatHash = otherByteString.peekCachedHashCode();
526     if (thisHash != 0 && thatHash != 0 && thisHash != thatHash) {
527       return false;
528     }
529 
530     return equalsFragments(otherByteString);
531   }
532 
533   /**
534    * Determines if this string is equal to another of the same length by iterating over the leaf
535    * nodes. On each step of the iteration, the overlapping segments of the leaves are compared.
536    *
537    * @param other string of the same length as this one
538    * @return true if the values of this string equals the value of the given one
539    */
equalsFragments(ByteString other)540   private boolean equalsFragments(ByteString other) {
541     int thisOffset = 0;
542     Iterator<LeafByteString> thisIter = new PieceIterator(this);
543     LeafByteString thisString = thisIter.next();
544 
545     int thatOffset = 0;
546     Iterator<LeafByteString> thatIter = new PieceIterator(other);
547     LeafByteString thatString = thatIter.next();
548 
549     int pos = 0;
550     while (true) {
551       int thisRemaining = thisString.size() - thisOffset;
552       int thatRemaining = thatString.size() - thatOffset;
553       int bytesToCompare = Math.min(thisRemaining, thatRemaining);
554 
555       // At least one of the offsets will be zero
556       boolean stillEqual =
557           (thisOffset == 0)
558               ? thisString.equalsRange(thatString, thatOffset, bytesToCompare)
559               : thatString.equalsRange(thisString, thisOffset, bytesToCompare);
560       if (!stillEqual) {
561         return false;
562       }
563 
564       pos += bytesToCompare;
565       if (pos >= totalLength) {
566         if (pos == totalLength) {
567           return true;
568         }
569         throw new IllegalStateException();
570       }
571       // We always get to the end of at least one of the pieces
572       if (bytesToCompare == thisRemaining) { // If reached end of this
573         thisOffset = 0;
574         thisString = thisIter.next();
575       } else {
576         thisOffset += bytesToCompare;
577       }
578       if (bytesToCompare == thatRemaining) { // If reached end of that
579         thatOffset = 0;
580         thatString = thatIter.next();
581       } else {
582         thatOffset += bytesToCompare;
583       }
584     }
585   }
586 
587   @Override
partialHash(int h, int offset, int length)588   protected int partialHash(int h, int offset, int length) {
589     int toIndex = offset + length;
590     if (toIndex <= leftLength) {
591       return left.partialHash(h, offset, length);
592     } else if (offset >= leftLength) {
593       return right.partialHash(h, offset - leftLength, length);
594     } else {
595       int leftLength = this.leftLength - offset;
596       int leftPartial = left.partialHash(h, offset, leftLength);
597       return right.partialHash(leftPartial, 0, length - leftLength);
598     }
599   }
600 
601   // =================================================================
602   // Input stream
603 
604   @Override
newCodedInput()605   public CodedInputStream newCodedInput() {
606     return CodedInputStream.newInstance(new RopeInputStream());
607   }
608 
609   @Override
newInput()610   public InputStream newInput() {
611     return new RopeInputStream();
612   }
613 
614   /**
615    * This class implements the balancing algorithm of BAP95. In the paper the authors use an array
616    * to keep track of pieces, while here we use a stack. The tree is balanced by traversing subtrees
617    * in left to right order, and the stack always contains the part of the string we've traversed so
618    * far.
619    *
620    * <p>One surprising aspect of the algorithm is the result of balancing is not necessarily
621    * balanced, though it is nearly balanced. For details, see BAP95.
622    */
623   private static class Balancer {
624     // Stack containing the part of the string, starting from the left, that
625     // we've already traversed.  The final string should be the equivalent of
626     // concatenating the strings on the stack from bottom to top.
627     private final ArrayDeque<ByteString> prefixesStack = new ArrayDeque<>();
628 
balance(ByteString left, ByteString right)629     private ByteString balance(ByteString left, ByteString right) {
630       doBalance(left);
631       doBalance(right);
632 
633       // Sweep stack to gather the result
634       ByteString partialString = prefixesStack.pop();
635       while (!prefixesStack.isEmpty()) {
636         ByteString newLeft = prefixesStack.pop();
637         partialString = new RopeByteString(newLeft, partialString);
638       }
639       // We should end up with a RopeByteString since at a minimum we will
640       // create one from concatenating left and right
641       return partialString;
642     }
643 
doBalance(ByteString root)644     private void doBalance(ByteString root) {
645       // BAP95: Insert balanced subtrees whole. This means the result might not
646       // be balanced, leading to repeated rebalancings on concatenate. However,
647       // these rebalancings are shallow due to ignoring balanced subtrees, and
648       // relatively few calls to insert() result.
649       if (root.isBalanced()) {
650         insert(root);
651       } else if (root instanceof RopeByteString) {
652         RopeByteString rbs = (RopeByteString) root;
653         doBalance(rbs.left);
654         doBalance(rbs.right);
655       } else {
656         throw new IllegalArgumentException(
657             "Has a new type of ByteString been created? Found " + root.getClass());
658       }
659     }
660 
661     /**
662      * Push a string on the balance stack (BAP95). BAP95 uses an array and calls the elements in the
663      * array 'bins'. We instead use a stack, so the 'bins' of lengths are represented by differences
664      * between the elements of minLengthByDepth.
665      *
666      * <p>If the length bin for our string, and all shorter length bins, are empty, we just push it
667      * on the stack. Otherwise, we need to start concatenating, putting the given string in the
668      * "middle" and continuing until we land in an empty length bin that matches the length of our
669      * concatenation.
670      *
671      * @param byteString string to place on the balance stack
672      */
insert(ByteString byteString)673     private void insert(ByteString byteString) {
674       int depthBin = getDepthBinForLength(byteString.size());
675       int binEnd = minLength(depthBin + 1);
676 
677       // BAP95: Concatenate all trees occupying bins representing the length of
678       // our new piece or of shorter pieces, to the extent that is possible.
679       // The goal is to clear the bin which our piece belongs in, but that may
680       // not be entirely possible if there aren't enough longer bins occupied.
681       if (prefixesStack.isEmpty() || prefixesStack.peek().size() >= binEnd) {
682         prefixesStack.push(byteString);
683       } else {
684         int binStart = minLength(depthBin);
685 
686         // Concatenate the subtrees of shorter length
687         ByteString newTree = prefixesStack.pop();
688         while (!prefixesStack.isEmpty() && prefixesStack.peek().size() < binStart) {
689           ByteString left = prefixesStack.pop();
690           newTree = new RopeByteString(left, newTree);
691         }
692 
693         // Concatenate the given string
694         newTree = new RopeByteString(newTree, byteString);
695 
696         // Continue concatenating until we land in an empty bin
697         while (!prefixesStack.isEmpty()) {
698           depthBin = getDepthBinForLength(newTree.size());
699           binEnd = minLength(depthBin + 1);
700           if (prefixesStack.peek().size() < binEnd) {
701             ByteString left = prefixesStack.pop();
702             newTree = new RopeByteString(left, newTree);
703           } else {
704             break;
705           }
706         }
707         prefixesStack.push(newTree);
708       }
709     }
710 
getDepthBinForLength(int length)711     private int getDepthBinForLength(int length) {
712       int depth = Arrays.binarySearch(minLengthByDepth, length);
713       if (depth < 0) {
714         // It wasn't an exact match, so convert to the index of the containing
715         // fragment, which is one less even than the insertion point.
716         int insertionPoint = -(depth + 1);
717         depth = insertionPoint - 1;
718       }
719 
720       return depth;
721     }
722   }
723 
724   /**
725    * This class is a continuable tree traversal, which keeps the state information which would exist
726    * on the stack in a recursive traversal instead on a stack of "Bread Crumbs". The maximum depth
727    * of the stack in this iterator is the same as the depth of the tree being traversed.
728    *
729    * <p>This iterator is used to implement {@link RopeByteString#equalsFragments(ByteString)}.
730    */
731   private static final class PieceIterator implements Iterator<LeafByteString> {
732     private final ArrayDeque<RopeByteString> breadCrumbs;
733     private LeafByteString next;
734 
PieceIterator(ByteString root)735     private PieceIterator(ByteString root) {
736       if (root instanceof RopeByteString) {
737         RopeByteString rbs = (RopeByteString) root;
738         breadCrumbs = new ArrayDeque<>(rbs.getTreeDepth());
739         breadCrumbs.push(rbs);
740         next = getLeafByLeft(rbs.left);
741       } else {
742         breadCrumbs = null;
743         next = (LeafByteString) root;
744       }
745     }
746 
getLeafByLeft(ByteString root)747     private LeafByteString getLeafByLeft(ByteString root) {
748       ByteString pos = root;
749       while (pos instanceof RopeByteString) {
750         RopeByteString rbs = (RopeByteString) pos;
751         breadCrumbs.push(rbs);
752         pos = rbs.left;
753       }
754       return (LeafByteString) pos;
755     }
756 
getNextNonEmptyLeaf()757     private LeafByteString getNextNonEmptyLeaf() {
758       while (true) {
759         // Almost always, we go through this loop exactly once.  However, if
760         // we discover an empty string in the rope, we toss it and try again.
761         if (breadCrumbs == null || breadCrumbs.isEmpty()) {
762           return null;
763         } else {
764           LeafByteString result = getLeafByLeft(breadCrumbs.pop().right);
765           if (!result.isEmpty()) {
766             return result;
767           }
768         }
769       }
770     }
771 
772     @Override
hasNext()773     public boolean hasNext() {
774       return next != null;
775     }
776 
777     /**
778      * Returns the next item and advances one {@link com.google.protobuf.ByteString.LeafByteString}.
779      *
780      * @return next non-empty LeafByteString or {@code null}
781      */
782     @Override
next()783     public LeafByteString next() {
784       if (next == null) {
785         throw new NoSuchElementException();
786       }
787       LeafByteString result = next;
788       next = getNextNonEmptyLeaf();
789       return result;
790     }
791 
792     @Override
remove()793     public void remove() {
794       throw new UnsupportedOperationException();
795     }
796   }
797 
798   // =================================================================
799   // Serializable
800 
801   private static final long serialVersionUID = 1L;
802 
writeReplace()803   Object writeReplace() {
804     return ByteString.wrap(toByteArray());
805   }
806 
readObject(@uppressWarnings"unused") ObjectInputStream in)807   private void readObject(@SuppressWarnings("unused") ObjectInputStream in) throws IOException {
808     throw new InvalidObjectException("RopeByteStream instances are not to be serialized directly");
809   }
810 
811   /** This class is the {@link RopeByteString} equivalent for {@link ByteArrayInputStream}. */
812   private class RopeInputStream extends InputStream {
813     // Iterates through the pieces of the rope
814     private PieceIterator pieceIterator;
815     // The current piece
816     private LeafByteString currentPiece;
817     // The size of the current piece
818     private int currentPieceSize;
819     // The index of the next byte to read in the current piece
820     private int currentPieceIndex;
821     // The offset of the start of the current piece in the rope byte string
822     private int currentPieceOffsetInRope;
823     // Offset in the buffer at which user called mark();
824     private int mark;
825 
RopeInputStream()826     public RopeInputStream() {
827       initialize();
828     }
829 
830     /**
831      * Reads up to {@code len} bytes of data into array {@code b}.
832      *
833      * <p>Note that {@link InputStream#read(byte[], int, int)} and {@link
834      * ByteArrayInputStream#read(byte[], int, int)} behave inconsistently when reading 0 bytes at
835      * EOF; the interface defines the return value to be 0 and the latter returns -1. We use the
836      * latter behavior so that all ByteString streams are consistent.
837      *
838      * @return -1 if at EOF, otherwise the actual number of bytes read.
839      */
840     @Override
read(byte[] b, int offset, int length)841     public int read(byte[] b, int offset, int length) {
842       if (b == null) {
843         throw new NullPointerException();
844       } else if (offset < 0 || length < 0 || length > b.length - offset) {
845         throw new IndexOutOfBoundsException();
846       }
847       int bytesRead = readSkipInternal(b, offset, length);
848       if (bytesRead == 0) {
849         return -1;
850       } else {
851         return bytesRead;
852       }
853     }
854 
855     @Override
skip(long length)856     public long skip(long length) {
857       if (length < 0) {
858         throw new IndexOutOfBoundsException();
859       } else if (length > Integer.MAX_VALUE) {
860         length = Integer.MAX_VALUE;
861       }
862       return readSkipInternal(null, 0, (int) length);
863     }
864 
865     /**
866      * Internal implementation of read and skip. If b != null, then read the next {@code length}
867      * bytes into the buffer {@code b} at offset {@code offset}. If b == null, then skip the next
868      * {@code length} bytes.
869      *
870      * <p>This method assumes that all error checking has already happened.
871      *
872      * <p>Returns the actual number of bytes read or skipped.
873      */
readSkipInternal(byte[] b, int offset, int length)874     private int readSkipInternal(byte[] b, int offset, int length) {
875       int bytesRemaining = length;
876       while (bytesRemaining > 0) {
877         advanceIfCurrentPieceFullyRead();
878         if (currentPiece == null) {
879           break;
880         } else {
881           // Copy the bytes from this piece.
882           int currentPieceRemaining = currentPieceSize - currentPieceIndex;
883           int count = Math.min(currentPieceRemaining, bytesRemaining);
884           if (b != null) {
885             currentPiece.copyTo(b, currentPieceIndex, offset, count);
886             offset += count;
887           }
888           currentPieceIndex += count;
889           bytesRemaining -= count;
890         }
891       }
892       // Return the number of bytes read.
893       return length - bytesRemaining;
894     }
895 
896     @Override
read()897     public int read() throws IOException {
898       advanceIfCurrentPieceFullyRead();
899       if (currentPiece == null) {
900         return -1;
901       } else {
902         return currentPiece.byteAt(currentPieceIndex++) & 0xFF;
903       }
904     }
905 
906     @Override
available()907     public int available() throws IOException {
908       int bytesRead = currentPieceOffsetInRope + currentPieceIndex;
909       return RopeByteString.this.size() - bytesRead;
910     }
911 
912     @Override
markSupported()913     public boolean markSupported() {
914       return true;
915     }
916 
917     @Override
mark(int readAheadLimit)918     public void mark(int readAheadLimit) {
919       // Set the mark to our position in the byte string
920       mark = currentPieceOffsetInRope + currentPieceIndex;
921     }
922 
923     @Override
reset()924     public synchronized void reset() {
925       // Just reinitialize and skip the specified number of bytes.
926       initialize();
927       readSkipInternal(null, 0, mark);
928     }
929 
930     /** Common initialization code used by both the constructor and reset() */
initialize()931     private void initialize() {
932       pieceIterator = new PieceIterator(RopeByteString.this);
933       currentPiece = pieceIterator.next();
934       currentPieceSize = currentPiece.size();
935       currentPieceIndex = 0;
936       currentPieceOffsetInRope = 0;
937     }
938 
939     /**
940      * Skips to the next piece if we have read all the data in the current piece. Sets currentPiece
941      * to null if we have reached the end of the input.
942      */
advanceIfCurrentPieceFullyRead()943     private void advanceIfCurrentPieceFullyRead() {
944       if (currentPiece != null && currentPieceIndex == currentPieceSize) {
945         // Generally, we can only go through this loop at most once, since
946         // empty strings can't end up in a rope.  But better to test.
947         currentPieceOffsetInRope += currentPieceSize;
948         currentPieceIndex = 0;
949         if (pieceIterator.hasNext()) {
950           currentPiece = pieceIterator.next();
951           currentPieceSize = currentPiece.size();
952         } else {
953           currentPiece = null;
954           currentPieceSize = 0;
955         }
956       }
957     }
958   }
959 }
960