• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 package com.google.protobuf;
32 
33 import java.io.ByteArrayInputStream;
34 import java.io.IOException;
35 import java.io.InputStream;
36 import java.io.InvalidObjectException;
37 import java.io.ObjectInputStream;
38 import java.io.OutputStream;
39 import java.nio.ByteBuffer;
40 import java.nio.charset.Charset;
41 import java.util.ArrayDeque;
42 import java.util.ArrayList;
43 import java.util.Arrays;
44 import java.util.Iterator;
45 import java.util.List;
46 import java.util.NoSuchElementException;
47 
48 /**
49  * Class to represent {@code ByteStrings} formed by concatenation of other ByteStrings, without
50  * copying the data in the pieces. The concatenation is represented as a tree whose leaf nodes are
51  * each a {@link com.google.protobuf.ByteString.LeafByteString}.
52  *
53  * <p>Most of the operation here is inspired by the now-famous paper <a
54  * href="https://web.archive.org/web/20060202015456/http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
55  * BAP95 </a> Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and michael plass
56  *
57  * <p>The algorithms described in the paper have been implemented for character strings in {@code
58  * com.google.common.string.Rope} and in the c++ class {@code cord.cc}.
59  *
60  * <p>Fundamentally the Rope algorithm represents the collection of pieces as a binary tree. BAP95
61  * uses a Fibonacci bound relating depth to a minimum sequence length, sequences that are too short
62  * relative to their depth cause a tree rebalance. More precisely, a tree of depth d is "balanced"
63  * in the terminology of BAP95 if its length is at least F(d+2), where F(n) is the n-th Fibonacci
64  * number. Thus for depths 0, 1, 2, 3, 4, 5,... we have minimum lengths 1, 2, 3, 5, 8, 13,...
65  *
66  * @author carlanton@google.com (Carl Haverl)
67  */
68 final class RopeByteString extends ByteString {
69 
70   /**
71    * BAP95. Let Fn be the nth Fibonacci number. A {@link RopeByteString} of depth n is "balanced",
72    * i.e flat enough, if its length is at least Fn+2, e.g. a "balanced" {@link RopeByteString} of
73    * depth 1 must have length at least 2, of depth 4 must have length >= 8, etc.
74    *
75    * <p>There's nothing special about using the Fibonacci numbers for this, but they are a
76    * reasonable sequence for encapsulating the idea that we are OK with longer strings being encoded
77    * in deeper binary trees.
78    *
79    * <p>For 32-bit integers, this array has length 46.
80    *
81    * <p>The correctness of this constant array is validated in tests.
82    */
83   static final int[] minLengthByDepth = {
84     1,
85     1,
86     2,
87     3,
88     5,
89     8,
90     13,
91     21,
92     34,
93     55,
94     89,
95     144,
96     233,
97     377,
98     610,
99     987,
100     1597,
101     2584,
102     4181,
103     6765,
104     10946,
105     17711,
106     28657,
107     46368,
108     75025,
109     121393,
110     196418,
111     317811,
112     514229,
113     832040,
114     1346269,
115     2178309,
116     3524578,
117     5702887,
118     9227465,
119     14930352,
120     24157817,
121     39088169,
122     63245986,
123     102334155,
124     165580141,
125     267914296,
126     433494437,
127     701408733,
128     1134903170,
129     1836311903,
130     Integer.MAX_VALUE
131   };
132 
133   private final int totalLength;
134   private final ByteString left;
135   private final ByteString right;
136   private final int leftLength;
137   private final int treeDepth;
138 
139   /**
140    * Create a new RopeByteString, which can be thought of as a new tree node, by recording
141    * references to the two given strings.
142    *
143    * @param left string on the left of this node, should have {@code size() > 0}
144    * @param right string on the right of this node, should have {@code size() > 0}
145    */
RopeByteString(ByteString left, ByteString right)146   private RopeByteString(ByteString left, ByteString right) {
147     this.left = left;
148     this.right = right;
149     leftLength = left.size();
150     totalLength = leftLength + right.size();
151     treeDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
152   }
153 
154   /**
155    * Concatenate the given strings while performing various optimizations to slow the growth rate of
156    * tree depth and tree node count. The result is either a {@link
157    * com.google.protobuf.ByteString.LeafByteString} or a {@link RopeByteString} depending on which
158    * optimizations, if any, were applied.
159    *
160    * <p>Small pieces of length less than {@link ByteString#CONCATENATE_BY_COPY_SIZE} may be copied
161    * by value here, as in BAP95. Large pieces are referenced without copy.
162    *
163    * @param left string on the left
164    * @param right string on the right
165    * @return concatenation representing the same sequence as the given strings
166    */
concatenate(ByteString left, ByteString right)167   static ByteString concatenate(ByteString left, ByteString right) {
168     if (right.size() == 0) {
169       return left;
170     }
171 
172     if (left.size() == 0) {
173       return right;
174     }
175 
176     final int newLength = left.size() + right.size();
177     if (newLength < ByteString.CONCATENATE_BY_COPY_SIZE) {
178       // Optimization from BAP95: For short (leaves in paper, but just short
179       // here) total length, do a copy of data to a new leaf.
180       return concatenateBytes(left, right);
181     }
182 
183     if (left instanceof RopeByteString) {
184       final RopeByteString leftRope = (RopeByteString) left;
185       if (leftRope.right.size() + right.size() < CONCATENATE_BY_COPY_SIZE) {
186         // Optimization from BAP95: As an optimization of the case where the
187         // ByteString is constructed by repeated concatenate, recognize the case
188         // where a short string is concatenated to a left-hand node whose
189         // right-hand branch is short.  In the paper this applies to leaves, but
190         // we just look at the length here. This has the advantage of shedding
191         // references to unneeded data when substrings have been taken.
192         //
193         // When we recognize this case, we do a copy of the data and create a
194         // new parent node so that the depth of the result is the same as the
195         // given left tree.
196         ByteString newRight = concatenateBytes(leftRope.right, right);
197         return new RopeByteString(leftRope.left, newRight);
198       }
199 
200       if (leftRope.left.getTreeDepth() > leftRope.right.getTreeDepth()
201           && leftRope.getTreeDepth() > right.getTreeDepth()) {
202         // Typically for concatenate-built strings the left-side is deeper than
203         // the right.  This is our final attempt to concatenate without
204         // increasing the tree depth.  We'll redo the node on the RHS.  This
205         // is yet another optimization for building the string by repeatedly
206         // concatenating on the right.
207         ByteString newRight = new RopeByteString(leftRope.right, right);
208         return new RopeByteString(leftRope.left, newRight);
209       }
210     }
211 
212     // Fine, we'll add a node and increase the tree depth--unless we rebalance ;^)
213     int newDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
214     if (newLength >= minLengthByDepth[newDepth]) {
215       // The tree is shallow enough, so don't rebalance
216       return new RopeByteString(left, right);
217     }
218 
219     return new Balancer().balance(left, right);
220   }
221 
222   /**
223    * Concatenates two strings by copying data values. This is called in a few cases in order to
224    * reduce the growth of the number of tree nodes.
225    *
226    * @param left string on the left
227    * @param right string on the right
228    * @return string formed by copying data bytes
229    */
concatenateBytes(ByteString left, ByteString right)230   private static ByteString concatenateBytes(ByteString left, ByteString right) {
231     int leftSize = left.size();
232     int rightSize = right.size();
233     byte[] bytes = new byte[leftSize + rightSize];
234     left.copyTo(bytes, 0, 0, leftSize);
235     right.copyTo(bytes, 0, leftSize, rightSize);
236     return ByteString.wrap(bytes); // Constructor wraps bytes
237   }
238 
239   /**
240    * Create a new RopeByteString for testing only while bypassing all the defenses of {@link
241    * #concatenate(ByteString, ByteString)}. This allows testing trees of specific structure. We are
242    * also able to insert empty leaves, though these are dis-allowed, so that we can make sure the
243    * implementation can withstand their presence.
244    *
245    * @param left string on the left of this node
246    * @param right string on the right of this node
247    * @return an unsafe instance for testing only
248    */
newInstanceForTest(ByteString left, ByteString right)249   static RopeByteString newInstanceForTest(ByteString left, ByteString right) {
250     return new RopeByteString(left, right);
251   }
252 
253   /**
254    * Gets the byte at the given index. Throws {@link ArrayIndexOutOfBoundsException} for
255    * backwards-compatibility reasons although it would more properly be {@link
256    * IndexOutOfBoundsException}.
257    *
258    * @param index index of byte
259    * @return the value
260    * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
261    */
262   @Override
byteAt(int index)263   public byte byteAt(int index) {
264     checkIndex(index, totalLength);
265     return internalByteAt(index);
266   }
267 
268   @Override
internalByteAt(int index)269   byte internalByteAt(int index) {
270     // Find the relevant piece by recursive descent
271     if (index < leftLength) {
272       return left.internalByteAt(index);
273     }
274 
275     return right.internalByteAt(index - leftLength);
276   }
277 
278   @Override
size()279   public int size() {
280     return totalLength;
281   }
282 
283   @Override
iterator()284   public ByteIterator iterator() {
285     return new AbstractByteIterator() {
286       final PieceIterator pieces = new PieceIterator(RopeByteString.this);
287       ByteIterator current = nextPiece();
288 
289       private ByteIterator nextPiece() {
290         // NOTE: PieceIterator is guaranteed to return non-empty pieces, so this method will always
291         // return non-empty iterators (or null)
292         return pieces.hasNext() ? pieces.next().iterator() : null;
293       }
294 
295       @Override
296       public boolean hasNext() {
297         return current != null;
298       }
299 
300       @Override
301       public byte nextByte() {
302         if (current == null) {
303           throw new NoSuchElementException();
304         }
305         byte b = current.nextByte();
306         if (!current.hasNext()) {
307           current = nextPiece();
308         }
309         return b;
310       }
311     };
312   }
313 
314   // =================================================================
315   // Pieces
316 
317   @Override
getTreeDepth()318   protected int getTreeDepth() {
319     return treeDepth;
320   }
321 
322   /**
323    * Determines if the tree is balanced according to BAP95, which means the tree is flat-enough with
324    * respect to the bounds. Note that this definition of balanced is one where sub-trees of balanced
325    * trees are not necessarily balanced.
326    *
327    * @return true if the tree is balanced
328    */
329   @Override
isBalanced()330   protected boolean isBalanced() {
331     return totalLength >= minLengthByDepth[treeDepth];
332   }
333 
334   /**
335    * Takes a substring of this one. This involves recursive descent along the left and right edges
336    * of the substring, and referencing any wholly contained segments in between. Any leaf nodes
337    * entirely uninvolved in the substring will not be referenced by the substring.
338    *
339    * <p>Substrings of {@code length < 2} should result in at most a single recursive call chain,
340    * terminating at a leaf node. Thus the result will be a {@link
341    * com.google.protobuf.ByteString.LeafByteString}.
342    *
343    * @param beginIndex start at this index
344    * @param endIndex the last character is the one before this index
345    * @return substring leaf node or tree
346    */
347   @Override
substring(int beginIndex, int endIndex)348   public ByteString substring(int beginIndex, int endIndex) {
349     final int length = checkRange(beginIndex, endIndex, totalLength);
350 
351     if (length == 0) {
352       // Empty substring
353       return ByteString.EMPTY;
354     }
355 
356     if (length == totalLength) {
357       // The whole string
358       return this;
359     }
360 
361     // Proper substring
362     if (endIndex <= leftLength) {
363       // Substring on the left
364       return left.substring(beginIndex, endIndex);
365     }
366 
367     if (beginIndex >= leftLength) {
368       // Substring on the right
369       return right.substring(beginIndex - leftLength, endIndex - leftLength);
370     }
371 
372     // Split substring
373     ByteString leftSub = left.substring(beginIndex);
374     ByteString rightSub = right.substring(0, endIndex - leftLength);
375     // Intentionally not rebalancing, since in many cases these two
376     // substrings will already be less deep than the top-level
377     // RopeByteString we're taking a substring of.
378     return new RopeByteString(leftSub, rightSub);
379   }
380 
381   // =================================================================
382   // ByteString -> byte[]
383 
384   @Override
copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)385   protected void copyToInternal(
386       byte[] target, int sourceOffset, int targetOffset, int numberToCopy) {
387     if (sourceOffset + numberToCopy <= leftLength) {
388       left.copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
389     } else if (sourceOffset >= leftLength) {
390       right.copyToInternal(target, sourceOffset - leftLength, targetOffset, numberToCopy);
391     } else {
392       int leftLength = this.leftLength - sourceOffset;
393       left.copyToInternal(target, sourceOffset, targetOffset, leftLength);
394       right.copyToInternal(target, 0, targetOffset + leftLength, numberToCopy - leftLength);
395     }
396   }
397 
398   @Override
copyTo(ByteBuffer target)399   public void copyTo(ByteBuffer target) {
400     left.copyTo(target);
401     right.copyTo(target);
402   }
403 
404   @Override
asReadOnlyByteBuffer()405   public ByteBuffer asReadOnlyByteBuffer() {
406     ByteBuffer byteBuffer = ByteBuffer.wrap(toByteArray());
407     return byteBuffer.asReadOnlyBuffer();
408   }
409 
410   @Override
asReadOnlyByteBufferList()411   public List<ByteBuffer> asReadOnlyByteBufferList() {
412     // Walk through the list of LeafByteString's that make up this
413     // rope, and add each one as a read-only ByteBuffer.
414     List<ByteBuffer> result = new ArrayList<ByteBuffer>();
415     PieceIterator pieces = new PieceIterator(this);
416     while (pieces.hasNext()) {
417       LeafByteString byteString = pieces.next();
418       result.add(byteString.asReadOnlyByteBuffer());
419     }
420     return result;
421   }
422 
423   @Override
writeTo(OutputStream outputStream)424   public void writeTo(OutputStream outputStream) throws IOException {
425     left.writeTo(outputStream);
426     right.writeTo(outputStream);
427   }
428 
429   @Override
writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)430   void writeToInternal(OutputStream out, int sourceOffset, int numberToWrite) throws IOException {
431     if (sourceOffset + numberToWrite <= leftLength) {
432       left.writeToInternal(out, sourceOffset, numberToWrite);
433     } else if (sourceOffset >= leftLength) {
434       right.writeToInternal(out, sourceOffset - leftLength, numberToWrite);
435     } else {
436       int numberToWriteInLeft = leftLength - sourceOffset;
437       left.writeToInternal(out, sourceOffset, numberToWriteInLeft);
438       right.writeToInternal(out, 0, numberToWrite - numberToWriteInLeft);
439     }
440   }
441 
442   @Override
writeTo(ByteOutput output)443   void writeTo(ByteOutput output) throws IOException {
444     left.writeTo(output);
445     right.writeTo(output);
446   }
447 
448   @Override
writeToReverse(ByteOutput output)449   void writeToReverse(ByteOutput output) throws IOException {
450     right.writeToReverse(output);
451     left.writeToReverse(output);
452   }
453 
454   @Override
toStringInternal(Charset charset)455   protected String toStringInternal(Charset charset) {
456     return new String(toByteArray(), charset);
457   }
458 
459   // =================================================================
460   // UTF-8 decoding
461 
462   @Override
isValidUtf8()463   public boolean isValidUtf8() {
464     int leftPartial = left.partialIsValidUtf8(Utf8.COMPLETE, 0, leftLength);
465     int state = right.partialIsValidUtf8(leftPartial, 0, right.size());
466     return state == Utf8.COMPLETE;
467   }
468 
469   @Override
partialIsValidUtf8(int state, int offset, int length)470   protected int partialIsValidUtf8(int state, int offset, int length) {
471     int toIndex = offset + length;
472     if (toIndex <= leftLength) {
473       return left.partialIsValidUtf8(state, offset, length);
474     } else if (offset >= leftLength) {
475       return right.partialIsValidUtf8(state, offset - leftLength, length);
476     } else {
477       int leftLength = this.leftLength - offset;
478       int leftPartial = left.partialIsValidUtf8(state, offset, leftLength);
479       return right.partialIsValidUtf8(leftPartial, 0, length - leftLength);
480     }
481   }
482 
483   // =================================================================
484   // equals() and hashCode()
485 
486   @Override
equals(Object other)487   public boolean equals(Object other) {
488     if (other == this) {
489       return true;
490     }
491     if (!(other instanceof ByteString)) {
492       return false;
493     }
494 
495     ByteString otherByteString = (ByteString) other;
496     if (totalLength != otherByteString.size()) {
497       return false;
498     }
499     if (totalLength == 0) {
500       return true;
501     }
502 
503     // You don't really want to be calling equals on long strings, but since
504     // we cache the hashCode, we effectively cache inequality. We use the cached
505     // hashCode if it's already computed.  It's arguable we should compute the
506     // hashCode here, and if we're going to be testing a bunch of byteStrings,
507     // it might even make sense.
508     int thisHash = peekCachedHashCode();
509     int thatHash = otherByteString.peekCachedHashCode();
510     if (thisHash != 0 && thatHash != 0 && thisHash != thatHash) {
511       return false;
512     }
513 
514     return equalsFragments(otherByteString);
515   }
516 
517   /**
518    * Determines if this string is equal to another of the same length by iterating over the leaf
519    * nodes. On each step of the iteration, the overlapping segments of the leaves are compared.
520    *
521    * @param other string of the same length as this one
522    * @return true if the values of this string equals the value of the given one
523    */
equalsFragments(ByteString other)524   private boolean equalsFragments(ByteString other) {
525     int thisOffset = 0;
526     Iterator<LeafByteString> thisIter = new PieceIterator(this);
527     LeafByteString thisString = thisIter.next();
528 
529     int thatOffset = 0;
530     Iterator<LeafByteString> thatIter = new PieceIterator(other);
531     LeafByteString thatString = thatIter.next();
532 
533     int pos = 0;
534     while (true) {
535       int thisRemaining = thisString.size() - thisOffset;
536       int thatRemaining = thatString.size() - thatOffset;
537       int bytesToCompare = Math.min(thisRemaining, thatRemaining);
538 
539       // At least one of the offsets will be zero
540       boolean stillEqual =
541           (thisOffset == 0)
542               ? thisString.equalsRange(thatString, thatOffset, bytesToCompare)
543               : thatString.equalsRange(thisString, thisOffset, bytesToCompare);
544       if (!stillEqual) {
545         return false;
546       }
547 
548       pos += bytesToCompare;
549       if (pos >= totalLength) {
550         if (pos == totalLength) {
551           return true;
552         }
553         throw new IllegalStateException();
554       }
555       // We always get to the end of at least one of the pieces
556       if (bytesToCompare == thisRemaining) { // If reached end of this
557         thisOffset = 0;
558         thisString = thisIter.next();
559       } else {
560         thisOffset += bytesToCompare;
561       }
562       if (bytesToCompare == thatRemaining) { // If reached end of that
563         thatOffset = 0;
564         thatString = thatIter.next();
565       } else {
566         thatOffset += bytesToCompare;
567       }
568     }
569   }
570 
571   @Override
partialHash(int h, int offset, int length)572   protected int partialHash(int h, int offset, int length) {
573     int toIndex = offset + length;
574     if (toIndex <= leftLength) {
575       return left.partialHash(h, offset, length);
576     } else if (offset >= leftLength) {
577       return right.partialHash(h, offset - leftLength, length);
578     } else {
579       int leftLength = this.leftLength - offset;
580       int leftPartial = left.partialHash(h, offset, leftLength);
581       return right.partialHash(leftPartial, 0, length - leftLength);
582     }
583   }
584 
585   // =================================================================
586   // Input stream
587 
588   @Override
newCodedInput()589   public CodedInputStream newCodedInput() {
590     return CodedInputStream.newInstance(new RopeInputStream());
591   }
592 
593   @Override
newInput()594   public InputStream newInput() {
595     return new RopeInputStream();
596   }
597 
598   /**
599    * This class implements the balancing algorithm of BAP95. In the paper the authors use an array
600    * to keep track of pieces, while here we use a stack. The tree is balanced by traversing subtrees
601    * in left to right order, and the stack always contains the part of the string we've traversed so
602    * far.
603    *
604    * <p>One surprising aspect of the algorithm is the result of balancing is not necessarily
605    * balanced, though it is nearly balanced. For details, see BAP95.
606    */
607   private static class Balancer {
608     // Stack containing the part of the string, starting from the left, that
609     // we've already traversed.  The final string should be the equivalent of
610     // concatenating the strings on the stack from bottom to top.
611     private final ArrayDeque<ByteString> prefixesStack = new ArrayDeque<>();
612 
balance(ByteString left, ByteString right)613     private ByteString balance(ByteString left, ByteString right) {
614       doBalance(left);
615       doBalance(right);
616 
617       // Sweep stack to gather the result
618       ByteString partialString = prefixesStack.pop();
619       while (!prefixesStack.isEmpty()) {
620         ByteString newLeft = prefixesStack.pop();
621         partialString = new RopeByteString(newLeft, partialString);
622       }
623       // We should end up with a RopeByteString since at a minimum we will
624       // create one from concatenating left and right
625       return partialString;
626     }
627 
doBalance(ByteString root)628     private void doBalance(ByteString root) {
629       // BAP95: Insert balanced subtrees whole. This means the result might not
630       // be balanced, leading to repeated rebalancings on concatenate. However,
631       // these rebalancings are shallow due to ignoring balanced subtrees, and
632       // relatively few calls to insert() result.
633       if (root.isBalanced()) {
634         insert(root);
635       } else if (root instanceof RopeByteString) {
636         RopeByteString rbs = (RopeByteString) root;
637         doBalance(rbs.left);
638         doBalance(rbs.right);
639       } else {
640         throw new IllegalArgumentException(
641             "Has a new type of ByteString been created? Found " + root.getClass());
642       }
643     }
644 
645     /**
646      * Push a string on the balance stack (BAP95). BAP95 uses an array and calls the elements in the
647      * array 'bins'. We instead use a stack, so the 'bins' of lengths are represented by differences
648      * between the elements of minLengthByDepth.
649      *
650      * <p>If the length bin for our string, and all shorter length bins, are empty, we just push it
651      * on the stack. Otherwise, we need to start concatenating, putting the given string in the
652      * "middle" and continuing until we land in an empty length bin that matches the length of our
653      * concatenation.
654      *
655      * @param byteString string to place on the balance stack
656      */
insert(ByteString byteString)657     private void insert(ByteString byteString) {
658       int depthBin = getDepthBinForLength(byteString.size());
659       int binEnd = minLengthByDepth[depthBin + 1];
660 
661       // BAP95: Concatenate all trees occupying bins representing the length of
662       // our new piece or of shorter pieces, to the extent that is possible.
663       // The goal is to clear the bin which our piece belongs in, but that may
664       // not be entirely possible if there aren't enough longer bins occupied.
665       if (prefixesStack.isEmpty() || prefixesStack.peek().size() >= binEnd) {
666         prefixesStack.push(byteString);
667       } else {
668         int binStart = minLengthByDepth[depthBin];
669 
670         // Concatenate the subtrees of shorter length
671         ByteString newTree = prefixesStack.pop();
672         while (!prefixesStack.isEmpty() && prefixesStack.peek().size() < binStart) {
673           ByteString left = prefixesStack.pop();
674           newTree = new RopeByteString(left, newTree);
675         }
676 
677         // Concatenate the given string
678         newTree = new RopeByteString(newTree, byteString);
679 
680         // Continue concatenating until we land in an empty bin
681         while (!prefixesStack.isEmpty()) {
682           depthBin = getDepthBinForLength(newTree.size());
683           binEnd = minLengthByDepth[depthBin + 1];
684           if (prefixesStack.peek().size() < binEnd) {
685             ByteString left = prefixesStack.pop();
686             newTree = new RopeByteString(left, newTree);
687           } else {
688             break;
689           }
690         }
691         prefixesStack.push(newTree);
692       }
693     }
694 
getDepthBinForLength(int length)695     private int getDepthBinForLength(int length) {
696       int depth = Arrays.binarySearch(minLengthByDepth, length);
697       if (depth < 0) {
698         // It wasn't an exact match, so convert to the index of the containing
699         // fragment, which is one less even than the insertion point.
700         int insertionPoint = -(depth + 1);
701         depth = insertionPoint - 1;
702       }
703 
704       return depth;
705     }
706   }
707 
708   /**
709    * This class is a continuable tree traversal, which keeps the state information which would exist
710    * on the stack in a recursive traversal instead on a stack of "Bread Crumbs". The maximum depth
711    * of the stack in this iterator is the same as the depth of the tree being traversed.
712    *
713    * <p>This iterator is used to implement {@link RopeByteString#equalsFragments(ByteString)}.
714    */
715   private static final class PieceIterator implements Iterator<LeafByteString> {
716     private final ArrayDeque<RopeByteString> breadCrumbs;
717     private LeafByteString next;
718 
PieceIterator(ByteString root)719     private PieceIterator(ByteString root) {
720       if (root instanceof RopeByteString) {
721         RopeByteString rbs = (RopeByteString) root;
722         breadCrumbs = new ArrayDeque<>(rbs.getTreeDepth());
723         breadCrumbs.push(rbs);
724         next = getLeafByLeft(rbs.left);
725       } else {
726         breadCrumbs = null;
727         next = (LeafByteString) root;
728       }
729     }
730 
getLeafByLeft(ByteString root)731     private LeafByteString getLeafByLeft(ByteString root) {
732       ByteString pos = root;
733       while (pos instanceof RopeByteString) {
734         RopeByteString rbs = (RopeByteString) pos;
735         breadCrumbs.push(rbs);
736         pos = rbs.left;
737       }
738       return (LeafByteString) pos;
739     }
740 
getNextNonEmptyLeaf()741     private LeafByteString getNextNonEmptyLeaf() {
742       while (true) {
743         // Almost always, we go through this loop exactly once.  However, if
744         // we discover an empty string in the rope, we toss it and try again.
745         if (breadCrumbs == null || breadCrumbs.isEmpty()) {
746           return null;
747         } else {
748           LeafByteString result = getLeafByLeft(breadCrumbs.pop().right);
749           if (!result.isEmpty()) {
750             return result;
751           }
752         }
753       }
754     }
755 
756     @Override
hasNext()757     public boolean hasNext() {
758       return next != null;
759     }
760 
761     /**
762      * Returns the next item and advances one {@link com.google.protobuf.ByteString.LeafByteString}.
763      *
764      * @return next non-empty LeafByteString or {@code null}
765      */
766     @Override
next()767     public LeafByteString next() {
768       if (next == null) {
769         throw new NoSuchElementException();
770       }
771       LeafByteString result = next;
772       next = getNextNonEmptyLeaf();
773       return result;
774     }
775 
776     @Override
remove()777     public void remove() {
778       throw new UnsupportedOperationException();
779     }
780   }
781 
782   // =================================================================
783   // Serializable
784 
785   private static final long serialVersionUID = 1L;
786 
writeReplace()787   Object writeReplace() {
788     return ByteString.wrap(toByteArray());
789   }
790 
readObject(@uppressWarnings"unused") ObjectInputStream in)791   private void readObject(@SuppressWarnings("unused") ObjectInputStream in) throws IOException {
792     throw new InvalidObjectException("RopeByteStream instances are not to be serialized directly");
793   }
794 
795   /** This class is the {@link RopeByteString} equivalent for {@link ByteArrayInputStream}. */
796   private class RopeInputStream extends InputStream {
797     // Iterates through the pieces of the rope
798     private PieceIterator pieceIterator;
799     // The current piece
800     private LeafByteString currentPiece;
801     // The size of the current piece
802     private int currentPieceSize;
803     // The index of the next byte to read in the current piece
804     private int currentPieceIndex;
805     // The offset of the start of the current piece in the rope byte string
806     private int currentPieceOffsetInRope;
807     // Offset in the buffer at which user called mark();
808     private int mark;
809 
RopeInputStream()810     public RopeInputStream() {
811       initialize();
812     }
813 
814     @Override
read(byte[] b, int offset, int length)815     public int read(byte[] b, int offset, int length) {
816       if (b == null) {
817         throw new NullPointerException();
818       } else if (offset < 0 || length < 0 || length > b.length - offset) {
819         throw new IndexOutOfBoundsException();
820       }
821       return readSkipInternal(b, offset, length);
822     }
823 
824     @Override
skip(long length)825     public long skip(long length) {
826       if (length < 0) {
827         throw new IndexOutOfBoundsException();
828       } else if (length > Integer.MAX_VALUE) {
829         length = Integer.MAX_VALUE;
830       }
831       return readSkipInternal(null, 0, (int) length);
832     }
833 
834     /**
835      * Internal implementation of read and skip. If b != null, then read the next {@code length}
836      * bytes into the buffer {@code b} at offset {@code offset}. If b == null, then skip the next
837      * {@code length} bytes.
838      *
839      * <p>This method assumes that all error checking has already happened.
840      *
841      * <p>Returns the actual number of bytes read or skipped.
842      */
readSkipInternal(byte[] b, int offset, int length)843     private int readSkipInternal(byte[] b, int offset, int length) {
844       int bytesRemaining = length;
845       while (bytesRemaining > 0) {
846         advanceIfCurrentPieceFullyRead();
847         if (currentPiece == null) {
848           if (bytesRemaining == length) {
849             // We didn't manage to read anything
850             return -1;
851           }
852           break;
853         } else {
854           // Copy the bytes from this piece.
855           int currentPieceRemaining = currentPieceSize - currentPieceIndex;
856           int count = Math.min(currentPieceRemaining, bytesRemaining);
857           if (b != null) {
858             currentPiece.copyTo(b, currentPieceIndex, offset, count);
859             offset += count;
860           }
861           currentPieceIndex += count;
862           bytesRemaining -= count;
863         }
864       }
865       // Return the number of bytes read.
866       return length - bytesRemaining;
867     }
868 
869     @Override
read()870     public int read() throws IOException {
871       advanceIfCurrentPieceFullyRead();
872       if (currentPiece == null) {
873         return -1;
874       } else {
875         return currentPiece.byteAt(currentPieceIndex++) & 0xFF;
876       }
877     }
878 
879     @Override
available()880     public int available() throws IOException {
881       int bytesRead = currentPieceOffsetInRope + currentPieceIndex;
882       return RopeByteString.this.size() - bytesRead;
883     }
884 
885     @Override
markSupported()886     public boolean markSupported() {
887       return true;
888     }
889 
890     @Override
mark(int readAheadLimit)891     public void mark(int readAheadLimit) {
892       // Set the mark to our position in the byte string
893       mark = currentPieceOffsetInRope + currentPieceIndex;
894     }
895 
896     @Override
reset()897     public synchronized void reset() {
898       // Just reinitialize and skip the specified number of bytes.
899       initialize();
900       readSkipInternal(null, 0, mark);
901     }
902 
903     /** Common initialization code used by both the constructor and reset() */
initialize()904     private void initialize() {
905       pieceIterator = new PieceIterator(RopeByteString.this);
906       currentPiece = pieceIterator.next();
907       currentPieceSize = currentPiece.size();
908       currentPieceIndex = 0;
909       currentPieceOffsetInRope = 0;
910     }
911 
912     /**
913      * Skips to the next piece if we have read all the data in the current piece. Sets currentPiece
914      * to null if we have reached the end of the input.
915      */
advanceIfCurrentPieceFullyRead()916     private void advanceIfCurrentPieceFullyRead() {
917       if (currentPiece != null && currentPieceIndex == currentPieceSize) {
918         // Generally, we can only go through this loop at most once, since
919         // empty strings can't end up in a rope.  But better to test.
920         currentPieceOffsetInRope += currentPieceSize;
921         currentPieceIndex = 0;
922         if (pieceIterator.hasNext()) {
923           currentPiece = pieceIterator.next();
924           currentPieceSize = currentPiece.size();
925         } else {
926           currentPiece = null;
927           currentPieceSize = 0;
928         }
929       }
930     }
931   }
932 }
933