• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 package com.google.protobuf;
32 
33 import static com.google.protobuf.TextFormatEscaper.escapeBytes;
34 import static java.lang.Integer.toHexString;
35 import static java.lang.System.identityHashCode;
36 
37 import java.io.ByteArrayInputStream;
38 import java.io.ByteArrayOutputStream;
39 import java.io.IOException;
40 import java.io.InputStream;
41 import java.io.InvalidObjectException;
42 import java.io.ObjectInputStream;
43 import java.io.OutputStream;
44 import java.io.Serializable;
45 import java.io.UnsupportedEncodingException;
46 import java.nio.ByteBuffer;
47 import java.nio.charset.Charset;
48 import java.nio.charset.UnsupportedCharsetException;
49 import java.util.ArrayList;
50 import java.util.Arrays;
51 import java.util.Collection;
52 import java.util.Collections;
53 import java.util.Comparator;
54 import java.util.Iterator;
55 import java.util.List;
56 import java.util.Locale;
57 import java.util.NoSuchElementException;
58 
59 /**
60  * Immutable sequence of bytes. Provides conversions to and from {@code byte[]}, {@link
61  * java.lang.String}, {@link ByteBuffer}, {@link InputStream}, {@link OutputStream}. Also provides a
62  * conversion to {@link CodedInputStream}.
63  *
64  * <p>Like {@link String}, the contents of a {@link ByteString} can never be observed to change, not
65  * even in the presence of a data race or incorrect API usage in the client code.
66  *
67  * <p>Substring is supported by sharing the reference to the immutable underlying bytes.
68  * Concatenation is likewise supported without copying (long strings) by building a tree of pieces
69  * in {@link RopeByteString}.
70  *
71  * @author crazybob@google.com Bob Lee
72  * @author kenton@google.com Kenton Varda
73  * @author carlanton@google.com Carl Haverl
74  * @author martinrb@google.com Martin Buchholz
75  */
76 public abstract class ByteString implements Iterable<Byte>, Serializable {
77 
78   /**
79    * When two strings to be concatenated have a combined length shorter than this, we just copy
80    * their bytes on {@link #concat(ByteString)}. The trade-off is copy size versus the overhead of
81    * creating tree nodes in {@link RopeByteString}.
82    */
83   static final int CONCATENATE_BY_COPY_SIZE = 128;
84 
85   /**
86    * When copying an InputStream into a ByteString with .readFrom(), the chunks in the underlying
87    * rope start at 256 bytes, but double each iteration up to 8192 bytes.
88    */
89   static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b
90 
91   static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k
92 
93   /** Empty {@code ByteString}. */
94   public static final ByteString EMPTY = new LiteralByteString(Internal.EMPTY_BYTE_ARRAY);
95 
96   /**
97    * An interface to efficiently copy {@code byte[]}.
98    *
99    * <p>One of the noticeable costs of copying a byte[] into a new array using {@code
100    * System.arraycopy} is nullification of a new buffer before the copy. It has been shown the
101    * Hotspot VM is capable to intrisicfy {@code Arrays.copyOfRange} operation to avoid this
102    * expensive nullification and provide substantial performance gain. Unfortunately this does not
103    * hold on Android runtimes and could make the copy slightly slower due to additional code in the
104    * {@code Arrays.copyOfRange}. Thus we provide two different implementation for array copier for
105    * Hotspot and Android runtimes.
106    */
107   private interface ByteArrayCopier {
108     /** Copies the specified range of the specified array into a new array */
copyFrom(byte[] bytes, int offset, int size)109     byte[] copyFrom(byte[] bytes, int offset, int size);
110   }
111 
112   /** Implementation of {@code ByteArrayCopier} which uses {@link System#arraycopy}. */
113   private static final class SystemByteArrayCopier implements ByteArrayCopier {
114     @Override
copyFrom(byte[] bytes, int offset, int size)115     public byte[] copyFrom(byte[] bytes, int offset, int size) {
116       byte[] copy = new byte[size];
117       System.arraycopy(bytes, offset, copy, 0, size);
118       return copy;
119     }
120   }
121 
122   /** Implementation of {@code ByteArrayCopier} which uses {@link Arrays#copyOfRange}. */
123   private static final class ArraysByteArrayCopier implements ByteArrayCopier {
124     @Override
copyFrom(byte[] bytes, int offset, int size)125     public byte[] copyFrom(byte[] bytes, int offset, int size) {
126       return Arrays.copyOfRange(bytes, offset, offset + size);
127     }
128   }
129 
130   private static final ByteArrayCopier byteArrayCopier;
131 
132   static {
133     byteArrayCopier =
134         Android.isOnAndroidDevice() ? new SystemByteArrayCopier() : new ArraysByteArrayCopier();
135   }
136 
137   /**
138    * Cached hash value. Intentionally accessed via a data race, which is safe because of the Java
139    * Memory Model's "no out-of-thin-air values" guarantees for ints. A value of 0 implies that the
140    * hash has not been set.
141    */
142   private int hash = 0;
143 
144   // This constructor is here to prevent subclassing outside of this package,
ByteString()145   ByteString() {}
146 
147   /**
148    * Gets the byte at the given index. This method should be used only for random access to
149    * individual bytes. To access bytes sequentially, use the {@link ByteIterator} returned by {@link
150    * #iterator()}, and call {@link #substring(int, int)} first if necessary.
151    *
152    * @param index index of byte
153    * @return the value
154    * @throws IndexOutOfBoundsException {@code index < 0 or index >= size}
155    */
byteAt(int index)156   public abstract byte byteAt(int index);
157 
158   /**
159    * Gets the byte at the given index, assumes bounds checking has already been performed.
160    *
161    * @param index index of byte
162    * @return the value
163    * @throws IndexOutOfBoundsException {@code index < 0 or index >= size}
164    */
internalByteAt(int index)165   abstract byte internalByteAt(int index);
166 
167   /**
168    * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString. To avoid
169    * auto-boxing, you may get the iterator manually and call {@link ByteIterator#nextByte()}.
170    *
171    * @return the iterator
172    */
173   @Override
iterator()174   public ByteIterator iterator() {
175     return new AbstractByteIterator() {
176       private int position = 0;
177       private final int limit = size();
178 
179       @Override
180       public boolean hasNext() {
181         return position < limit;
182       }
183 
184       @Override
185       public byte nextByte() {
186         int currentPos = position;
187         if (currentPos >= limit) {
188           throw new NoSuchElementException();
189         }
190         position = currentPos + 1;
191         return internalByteAt(currentPos);
192       }
193     };
194   }
195 
196   /**
197    * This interface extends {@code Iterator<Byte>}, so that we can return an unboxed {@code byte}.
198    */
199   public interface ByteIterator extends Iterator<Byte> {
200     /**
201      * An alternative to {@link Iterator#next()} that returns an unboxed primitive {@code byte}.
202      *
203      * @return the next {@code byte} in the iteration
204      * @throws NoSuchElementException if the iteration has no more elements
205      */
nextByte()206     byte nextByte();
207   }
208 
209   abstract static class AbstractByteIterator implements ByteIterator {
210     @Override
next()211     public final Byte next() {
212       // Boxing calls Byte.valueOf(byte), which does not instantiate.
213       return nextByte();
214     }
215 
216     @Override
remove()217     public final void remove() {
218       throw new UnsupportedOperationException();
219     }
220   }
221 
222   /**
223    * Gets the number of bytes.
224    *
225    * @return size in bytes
226    */
size()227   public abstract int size();
228 
229   /**
230    * Returns {@code true} if the size is {@code 0}, {@code false} otherwise.
231    *
232    * @return true if this is zero bytes long
233    */
isEmpty()234   public final boolean isEmpty() {
235     return size() == 0;
236   }
237 
238   // =================================================================
239   // Comparison
240 
241   private static final int UNSIGNED_BYTE_MASK = 0xFF;
242 
243   /**
244    * Returns the value of the given byte as an integer, interpreting the byte as an unsigned value.
245    * That is, returns {@code value + 256} if {@code value} is negative; {@code value} itself
246    * otherwise.
247    *
248    * <p>Note: This code was copied from {@link com.google.common.primitives.UnsignedBytes#toInt}, as
249    * Guava libraries cannot be used in the {@code com.google.protobuf} package.
250    */
toInt(byte value)251   private static int toInt(byte value) {
252     return value & UNSIGNED_BYTE_MASK;
253   }
254 
255   /**
256    * Compares two {@link ByteString}s lexicographically, treating their contents as unsigned byte
257    * values between 0 and 255 (inclusive).
258    *
259    * <p>For example, {@code (byte) -1} is considered to be greater than {@code (byte) 1} because it
260    * is interpreted as an unsigned value, {@code 255}.
261    */
262   private static final Comparator<ByteString> UNSIGNED_LEXICOGRAPHICAL_COMPARATOR =
263       new Comparator<ByteString>() {
264         @Override
265         public int compare(ByteString former, ByteString latter) {
266           ByteIterator formerBytes = former.iterator();
267           ByteIterator latterBytes = latter.iterator();
268 
269           while (formerBytes.hasNext() && latterBytes.hasNext()) {
270             // Note: This code was copied from com.google.common.primitives.UnsignedBytes#compare,
271             // as Guava libraries cannot be used in the {@code com.google.protobuf} package.
272             int result =
273                 Integer.compare(toInt(formerBytes.nextByte()), toInt(latterBytes.nextByte()));
274             if (result != 0) {
275               return result;
276             }
277           }
278 
279           return Integer.compare(former.size(), latter.size());
280         }
281       };
282 
283   /**
284    * Returns a {@link Comparator} which compares {@link ByteString}-s lexicographically
285    * as sequences of unsigned bytes (i.e. values between 0 and 255, inclusive).
286    *
287    * <p>For example, {@code (byte) -1} is considered to be greater than {@code (byte) 1} because it
288    * is interpreted as an unsigned value, {@code 255}:
289    *
290    * <ul>
291    *   <li>{@code `-1` -> 0b11111111 (two's complement) -> 255}
292    *   <li>{@code `1` -> 0b00000001 -> 1}
293    * </ul>
294    */
unsignedLexicographicalComparator()295   public static Comparator<ByteString> unsignedLexicographicalComparator() {
296     return UNSIGNED_LEXICOGRAPHICAL_COMPARATOR;
297   }
298 
299   // =================================================================
300   // ByteString -> substring
301 
302   /**
303    * Return the substring from {@code beginIndex}, inclusive, to the end of the string.
304    *
305    * @param beginIndex start at this index
306    * @return substring sharing underlying data
307    * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or {@code beginIndex > size()}.
308    */
substring(int beginIndex)309   public final ByteString substring(int beginIndex) {
310     return substring(beginIndex, size());
311   }
312 
313   /**
314    * Return the substring from {@code beginIndex}, inclusive, to {@code endIndex}, exclusive.
315    *
316    * @param beginIndex start at this index
317    * @param endIndex the last character is the one before this index
318    * @return substring sharing underlying data
319    * @throws IndexOutOfBoundsException if {@code beginIndex < 0}, {@code endIndex > size()}, or
320    *     {@code beginIndex > endIndex}.
321    */
substring(int beginIndex, int endIndex)322   public abstract ByteString substring(int beginIndex, int endIndex);
323 
324   /**
325    * Tests if this bytestring starts with the specified prefix. Similar to {@link
326    * String#startsWith(String)}
327    *
328    * @param prefix the prefix.
329    * @return <code>true</code> if the byte sequence represented by the argument is a prefix of the
330    *     byte sequence represented by this string; <code>false</code> otherwise.
331    */
startsWith(ByteString prefix)332   public final boolean startsWith(ByteString prefix) {
333     return size() >= prefix.size() && substring(0, prefix.size()).equals(prefix);
334   }
335 
336   /**
337    * Tests if this bytestring ends with the specified suffix. Similar to {@link
338    * String#endsWith(String)}
339    *
340    * @param suffix the suffix.
341    * @return <code>true</code> if the byte sequence represented by the argument is a suffix of the
342    *     byte sequence represented by this string; <code>false</code> otherwise.
343    */
endsWith(ByteString suffix)344   public final boolean endsWith(ByteString suffix) {
345     return size() >= suffix.size() && substring(size() - suffix.size()).equals(suffix);
346   }
347 
348   // =================================================================
349   // byte[] -> ByteString
350 
351   /**
352    * Copies the given bytes into a {@code ByteString}.
353    *
354    * @param bytes source array
355    * @param offset offset in source array
356    * @param size number of bytes to copy
357    * @return new {@code ByteString}
358    * @throws IndexOutOfBoundsException if {@code offset} or {@code size} are out of bounds
359    */
copyFrom(byte[] bytes, int offset, int size)360   public static ByteString copyFrom(byte[] bytes, int offset, int size) {
361     checkRange(offset, offset + size, bytes.length);
362     return new LiteralByteString(byteArrayCopier.copyFrom(bytes, offset, size));
363   }
364 
365   /**
366    * Copies the given bytes into a {@code ByteString}.
367    *
368    * @param bytes to copy
369    * @return new {@code ByteString}
370    */
copyFrom(byte[] bytes)371   public static ByteString copyFrom(byte[] bytes) {
372     return copyFrom(bytes, 0, bytes.length);
373   }
374 
375   /** Wraps the given bytes into a {@code ByteString}. Intended for internal only usage. */
wrap(ByteBuffer buffer)376   static ByteString wrap(ByteBuffer buffer) {
377     if (buffer.hasArray()) {
378       final int offset = buffer.arrayOffset();
379       return ByteString.wrap(buffer.array(), offset + buffer.position(), buffer.remaining());
380     } else {
381       return new NioByteString(buffer);
382     }
383   }
384 
385   /**
386    * Wraps the given bytes into a {@code ByteString}. Intended for internal only usage to force a
387    * classload of ByteString before LiteralByteString.
388    */
wrap(byte[] bytes)389   static ByteString wrap(byte[] bytes) {
390     // TODO(dweis): Return EMPTY when bytes are empty to reduce allocations?
391     return new LiteralByteString(bytes);
392   }
393 
394   /**
395    * Wraps the given bytes into a {@code ByteString}. Intended for internal only usage to force a
396    * classload of ByteString before BoundedByteString and LiteralByteString.
397    */
wrap(byte[] bytes, int offset, int length)398   static ByteString wrap(byte[] bytes, int offset, int length) {
399     return new BoundedByteString(bytes, offset, length);
400   }
401 
402   /**
403    * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into a {@code
404    * ByteString}.
405    *
406    * @param bytes source buffer
407    * @param size number of bytes to copy
408    * @return new {@code ByteString}
409    * @throws IndexOutOfBoundsException if {@code size > bytes.remaining()}
410    */
copyFrom(ByteBuffer bytes, int size)411   public static ByteString copyFrom(ByteBuffer bytes, int size) {
412     checkRange(0, size, bytes.remaining());
413     byte[] copy = new byte[size];
414     bytes.get(copy);
415     return new LiteralByteString(copy);
416   }
417 
418   /**
419    * Copies the remaining bytes from a {@code java.nio.ByteBuffer} into a {@code ByteString}.
420    *
421    * @param bytes sourceBuffer
422    * @return new {@code ByteString}
423    */
copyFrom(ByteBuffer bytes)424   public static ByteString copyFrom(ByteBuffer bytes) {
425     return copyFrom(bytes, bytes.remaining());
426   }
427 
428   /**
429    * Encodes {@code text} into a sequence of bytes using the named charset and returns the result as
430    * a {@code ByteString}.
431    *
432    * @param text source string
433    * @param charsetName encoding to use
434    * @return new {@code ByteString}
435    * @throws UnsupportedEncodingException if the encoding isn't found
436    */
copyFrom(String text, String charsetName)437   public static ByteString copyFrom(String text, String charsetName)
438       throws UnsupportedEncodingException {
439     return new LiteralByteString(text.getBytes(charsetName));
440   }
441 
442   /**
443    * Encodes {@code text} into a sequence of bytes using the named charset and returns the result as
444    * a {@code ByteString}.
445    *
446    * @param text source string
447    * @param charset encode using this charset
448    * @return new {@code ByteString}
449    */
copyFrom(String text, Charset charset)450   public static ByteString copyFrom(String text, Charset charset) {
451     return new LiteralByteString(text.getBytes(charset));
452   }
453 
454   /**
455    * Encodes {@code text} into a sequence of UTF-8 bytes and returns the result as a {@code
456    * ByteString}.
457    *
458    * @param text source string
459    * @return new {@code ByteString}
460    */
copyFromUtf8(String text)461   public static ByteString copyFromUtf8(String text) {
462     return new LiteralByteString(text.getBytes(Internal.UTF_8));
463   }
464 
465   // =================================================================
466   // InputStream -> ByteString
467 
468   /**
469    * Completely reads the given stream's bytes into a {@code ByteString}, blocking if necessary
470    * until all bytes are read through to the end of the stream.
471    *
472    * <p><b>Performance notes:</b> The returned {@code ByteString} is an immutable tree of byte
473    * arrays ("chunks") of the stream data. The first chunk is small, with subsequent chunks each
474    * being double the size, up to 8K.
475    *
476    * <p>Each byte read from the input stream will be copied twice to ensure that the resulting
477    * ByteString is truly immutable.
478    *
479    * @param streamToDrain The source stream, which is read completely but not closed.
480    * @return A new {@code ByteString} which is made up of chunks of various sizes, depending on the
481    *     behavior of the underlying stream.
482    * @throws IOException IOException is thrown if there is a problem reading the underlying stream.
483    */
readFrom(InputStream streamToDrain)484   public static ByteString readFrom(InputStream streamToDrain) throws IOException {
485     return readFrom(streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE);
486   }
487 
488   /**
489    * Completely reads the given stream's bytes into a {@code ByteString}, blocking if necessary
490    * until all bytes are read through to the end of the stream.
491    *
492    * <p><b>Performance notes:</b> The returned {@code ByteString} is an immutable tree of byte
493    * arrays ("chunks") of the stream data. The chunkSize parameter sets the size of these byte
494    * arrays.
495    *
496    * <p>Each byte read from the input stream will be copied twice to ensure that the resulting
497    * ByteString is truly immutable.
498    *
499    * @param streamToDrain The source stream, which is read completely but not closed.
500    * @param chunkSize The size of the chunks in which to read the stream.
501    * @return A new {@code ByteString} which is made up of chunks of the given size.
502    * @throws IOException IOException is thrown if there is a problem reading the underlying stream.
503    */
readFrom(InputStream streamToDrain, int chunkSize)504   public static ByteString readFrom(InputStream streamToDrain, int chunkSize) throws IOException {
505     return readFrom(streamToDrain, chunkSize, chunkSize);
506   }
507 
508   // Helper method that takes the chunk size range as a parameter.
readFrom(InputStream streamToDrain, int minChunkSize, int maxChunkSize)509   public static ByteString readFrom(InputStream streamToDrain, int minChunkSize, int maxChunkSize)
510       throws IOException {
511     Collection<ByteString> results = new ArrayList<ByteString>();
512 
513     // copy the inbound bytes into a list of chunks; the chunk size
514     // grows exponentially to support both short and long streams.
515     int chunkSize = minChunkSize;
516     while (true) {
517       ByteString chunk = readChunk(streamToDrain, chunkSize);
518       if (chunk == null) {
519         break;
520       }
521       results.add(chunk);
522       chunkSize = Math.min(chunkSize * 2, maxChunkSize);
523     }
524 
525     return ByteString.copyFrom(results);
526   }
527 
528   /**
529    * Blocks until a chunk of the given size can be made from the stream, or EOF is reached. Calls
530    * read() repeatedly in case the given stream implementation doesn't completely fill the given
531    * buffer in one read() call.
532    *
533    * @return A chunk of the desired size, or else a chunk as large as was available when end of
534    *     stream was reached. Returns null if the given stream had no more data in it.
535    */
readChunk(InputStream in, final int chunkSize)536   private static ByteString readChunk(InputStream in, final int chunkSize) throws IOException {
537     final byte[] buf = new byte[chunkSize];
538     int bytesRead = 0;
539     while (bytesRead < chunkSize) {
540       final int count = in.read(buf, bytesRead, chunkSize - bytesRead);
541       if (count == -1) {
542         break;
543       }
544       bytesRead += count;
545     }
546 
547     if (bytesRead == 0) {
548       return null;
549     }
550 
551     // Always make a copy since InputStream could steal a reference to buf.
552     return ByteString.copyFrom(buf, 0, bytesRead);
553   }
554 
555   // =================================================================
556   // Multiple ByteStrings -> One ByteString
557 
558   /**
559    * Concatenate the given {@code ByteString} to this one. Short concatenations, of total size
560    * smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are produced by copying the
561    * underlying bytes (as per Rope.java, <a
562    * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
563    * BAP95 </a>. In general, the concatenate involves no copying.
564    *
565    * @param other string to concatenate
566    * @return a new {@code ByteString} instance
567    */
concat(ByteString other)568   public final ByteString concat(ByteString other) {
569     if (Integer.MAX_VALUE - size() < other.size()) {
570       throw new IllegalArgumentException(
571           "ByteString would be too long: " + size() + "+" + other.size());
572     }
573 
574     return RopeByteString.concatenate(this, other);
575   }
576 
577   /**
578    * Concatenates all byte strings in the iterable and returns the result. This is designed to run
579    * in O(list size), not O(total bytes).
580    *
581    * <p>The returned {@code ByteString} is not necessarily a unique object. If the list is empty,
582    * the returned object is the singleton empty {@code ByteString}. If the list has only one
583    * element, that {@code ByteString} will be returned without copying.
584    *
585    * @param byteStrings strings to be concatenated
586    * @return new {@code ByteString}
587    */
copyFrom(Iterable<ByteString> byteStrings)588   public static ByteString copyFrom(Iterable<ByteString> byteStrings) {
589     // Determine the size;
590     final int size;
591     if (!(byteStrings instanceof Collection)) {
592       int tempSize = 0;
593       for (Iterator<ByteString> iter = byteStrings.iterator();
594           iter.hasNext();
595           iter.next(), ++tempSize) {}
596       size = tempSize;
597     } else {
598       size = ((Collection<ByteString>) byteStrings).size();
599     }
600 
601     if (size == 0) {
602       return EMPTY;
603     }
604 
605     return balancedConcat(byteStrings.iterator(), size);
606   }
607 
608   // Internal function used by copyFrom(Iterable<ByteString>).
609   // Create a balanced concatenation of the next "length" elements from the
610   // iterable.
balancedConcat(Iterator<ByteString> iterator, int length)611   private static ByteString balancedConcat(Iterator<ByteString> iterator, int length) {
612     if (length < 1) {
613       throw new IllegalArgumentException(String.format("length (%s) must be >= 1", length));
614     }
615     ByteString result;
616     if (length == 1) {
617       result = iterator.next();
618     } else {
619       int halfLength = length >>> 1;
620       ByteString left = balancedConcat(iterator, halfLength);
621       ByteString right = balancedConcat(iterator, length - halfLength);
622       result = left.concat(right);
623     }
624     return result;
625   }
626 
627   // =================================================================
628   // ByteString -> byte[]
629 
630   /**
631    * Copies bytes into a buffer at the given offset.
632    *
633    * <p>To copy a subset of bytes, you call this method on the return value of {@link
634    * #substring(int, int)}. Example: {@code byteString.substring(start, end).copyTo(target, offset)}
635    *
636    * @param target buffer to copy into
637    * @param offset in the target buffer
638    * @throws IndexOutOfBoundsException if the offset is negative or too large
639    */
copyTo(byte[] target, int offset)640   public void copyTo(byte[] target, int offset) {
641     copyTo(target, 0, offset, size());
642   }
643 
644   /**
645    * Copies bytes into a buffer.
646    *
647    * @param target buffer to copy into
648    * @param sourceOffset offset within these bytes
649    * @param targetOffset offset within the target buffer
650    * @param numberToCopy number of bytes to copy
651    * @throws IndexOutOfBoundsException if an offset or size is negative or too large
652    * @deprecated Instead, call {@code byteString.substring(sourceOffset, sourceOffset +
653    *     numberToCopy).copyTo(target, targetOffset)}
654    */
655   @Deprecated
copyTo(byte[] target, int sourceOffset, int targetOffset, int numberToCopy)656   public final void copyTo(byte[] target, int sourceOffset, int targetOffset, int numberToCopy) {
657     checkRange(sourceOffset, sourceOffset + numberToCopy, size());
658     checkRange(targetOffset, targetOffset + numberToCopy, target.length);
659     if (numberToCopy > 0) {
660       copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
661     }
662   }
663 
664   /**
665    * Internal (package private) implementation of {@link #copyTo(byte[],int,int,int)}. It assumes
666    * that all error checking has already been performed and that {@code numberToCopy > 0}.
667    */
copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)668   protected abstract void copyToInternal(
669       byte[] target, int sourceOffset, int targetOffset, int numberToCopy);
670 
671   /**
672    * Copies bytes into a ByteBuffer.
673    *
674    * <p>To copy a subset of bytes, you call this method on the return value of {@link
675    * #substring(int, int)}. Example: {@code byteString.substring(start, end).copyTo(target)}
676    *
677    * @param target ByteBuffer to copy into.
678    * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only
679    * @throws java.nio.BufferOverflowException if the {@code target}'s remaining() space is not large
680    *     enough to hold the data.
681    */
copyTo(ByteBuffer target)682   public abstract void copyTo(ByteBuffer target);
683 
684   /**
685    * Copies bytes to a {@code byte[]}.
686    *
687    * @return copied bytes
688    */
toByteArray()689   public final byte[] toByteArray() {
690     final int size = size();
691     if (size == 0) {
692       return Internal.EMPTY_BYTE_ARRAY;
693     }
694     byte[] result = new byte[size];
695     copyToInternal(result, 0, 0, size);
696     return result;
697   }
698 
699   /**
700    * Writes a copy of the contents of this byte string to the specified output stream argument.
701    *
702    * @param out the output stream to which to write the data.
703    * @throws IOException if an I/O error occurs.
704    */
writeTo(OutputStream out)705   public abstract void writeTo(OutputStream out) throws IOException;
706 
707   /**
708    * Writes a specified part of this byte string to an output stream.
709    *
710    * @param out the output stream to which to write the data.
711    * @param sourceOffset offset within these bytes
712    * @param numberToWrite number of bytes to write
713    * @throws IOException if an I/O error occurs.
714    * @throws IndexOutOfBoundsException if an offset or size is negative or too large
715    */
writeTo(OutputStream out, int sourceOffset, int numberToWrite)716   final void writeTo(OutputStream out, int sourceOffset, int numberToWrite) throws IOException {
717     checkRange(sourceOffset, sourceOffset + numberToWrite, size());
718     if (numberToWrite > 0) {
719       writeToInternal(out, sourceOffset, numberToWrite);
720     }
721   }
722 
723   /**
724    * Internal version of {@link #writeTo(OutputStream,int,int)} that assumes all error checking has
725    * already been done.
726    */
writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)727   abstract void writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)
728       throws IOException;
729 
730   /**
731    * Writes this {@link ByteString} to the provided {@link ByteOutput}. Calling this method may
732    * result in multiple operations on the target {@link ByteOutput}.
733    *
734    * <p>This method may expose internal backing buffers of the {@link ByteString} to the {@link
735    * ByteOutput} in order to avoid additional copying overhead. It would be possible for a malicious
736    * {@link ByteOutput} to corrupt the {@link ByteString}. Use with caution!
737    *
738    * @param byteOutput the output target to receive the bytes
739    * @throws IOException if an I/O error occurs
740    * @see UnsafeByteOperations#unsafeWriteTo(ByteString, ByteOutput)
741    */
writeTo(ByteOutput byteOutput)742   abstract void writeTo(ByteOutput byteOutput) throws IOException;
743 
744   /**
745    * This method behaves exactly the same as {@link #writeTo(ByteOutput)} unless the {@link
746    * ByteString} is a rope. For ropes, the leaf nodes are written in reverse order to the {@code
747    * byteOutput}.
748    *
749    * @param byteOutput the output target to receive the bytes
750    * @throws IOException if an I/O error occurs
751    * @see UnsafeByteOperations#unsafeWriteToReverse(ByteString, ByteOutput)
752    */
writeToReverse(ByteOutput byteOutput)753   abstract void writeToReverse(ByteOutput byteOutput) throws IOException;
754 
755   /**
756    * Constructs a read-only {@code java.nio.ByteBuffer} whose content is equal to the contents of
757    * this byte string. The result uses the same backing array as the byte string, if possible.
758    *
759    * @return wrapped bytes
760    */
asReadOnlyByteBuffer()761   public abstract ByteBuffer asReadOnlyByteBuffer();
762 
763   /**
764    * Constructs a list of read-only {@code java.nio.ByteBuffer} objects such that the concatenation
765    * of their contents is equal to the contents of this byte string. The result uses the same
766    * backing arrays as the byte string.
767    *
768    * <p>By returning a list, implementations of this method may be able to avoid copying even when
769    * there are multiple backing arrays.
770    *
771    * @return a list of wrapped bytes
772    */
asReadOnlyByteBufferList()773   public abstract List<ByteBuffer> asReadOnlyByteBufferList();
774 
775   /**
776    * Constructs a new {@code String} by decoding the bytes using the specified charset.
777    *
778    * @param charsetName encode using this charset
779    * @return new string
780    * @throws UnsupportedEncodingException if charset isn't recognized
781    */
toString(String charsetName)782   public final String toString(String charsetName) throws UnsupportedEncodingException {
783     try {
784       return toString(Charset.forName(charsetName));
785     } catch (UnsupportedCharsetException e) {
786       UnsupportedEncodingException exception = new UnsupportedEncodingException(charsetName);
787       exception.initCause(e);
788       throw exception;
789     }
790   }
791 
792   /**
793    * Constructs a new {@code String} by decoding the bytes using the specified charset. Returns the
794    * same empty String if empty.
795    *
796    * @param charset encode using this charset
797    * @return new string
798    */
toString(Charset charset)799   public final String toString(Charset charset) {
800     return size() == 0 ? "" : toStringInternal(charset);
801   }
802 
803   /**
804    * Constructs a new {@code String} by decoding the bytes using the specified charset.
805    *
806    * @param charset encode using this charset
807    * @return new string
808    */
toStringInternal(Charset charset)809   protected abstract String toStringInternal(Charset charset);
810 
811   // =================================================================
812   // UTF-8 decoding
813 
814   /**
815    * Constructs a new {@code String} by decoding the bytes as UTF-8.
816    *
817    * @return new string using UTF-8 encoding
818    */
toStringUtf8()819   public final String toStringUtf8() {
820     return toString(Internal.UTF_8);
821   }
822 
823   /**
824    * Tells whether this {@code ByteString} represents a well-formed UTF-8 byte sequence, such that
825    * the original bytes can be converted to a String object and then round tripped back to bytes
826    * without loss.
827    *
828    * <p>More precisely, returns {@code true} whenever:
829    *
830    * <pre>{@code
831    * Arrays.equals(byteString.toByteArray(),
832    *     new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8"))
833    * }</pre>
834    *
835    * <p>This method returns {@code false} for "overlong" byte sequences, as well as for 3-byte
836    * sequences that would map to a surrogate character, in accordance with the restricted definition
837    * of UTF-8 introduced in Unicode 3.1. Note that the UTF-8 decoder included in Oracle's JDK has
838    * been modified to also reject "overlong" byte sequences, but (as of 2011) still accepts 3-byte
839    * surrogate character byte sequences.
840    *
841    * <p>See the Unicode Standard,<br>
842    * Table 3-6. <em>UTF-8 Bit Distribution</em>,<br>
843    * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
844    *
845    * @return whether the bytes in this {@code ByteString} are a well-formed UTF-8 byte sequence
846    */
isValidUtf8()847   public abstract boolean isValidUtf8();
848 
849   /**
850    * Tells whether the given byte sequence is a well-formed, malformed, or incomplete UTF-8 byte
851    * sequence. This method accepts and returns a partial state result, allowing the bytes for a
852    * complete UTF-8 byte sequence to be composed from multiple {@code ByteString} segments.
853    *
854    * @param state either {@code 0} (if this is the initial decoding operation) or the value returned
855    *     from a call to a partial decoding method for the previous bytes
856    * @param offset offset of the first byte to check
857    * @param length number of bytes to check
858    * @return {@code -1} if the partial byte sequence is definitely malformed, {@code 0} if it is
859    *     well-formed (no additional input needed), or, if the byte sequence is "incomplete", i.e.
860    *     apparently terminated in the middle of a character, an opaque integer "state" value
861    *     containing enough information to decode the character when passed to a subsequent
862    *     invocation of a partial decoding method.
863    */
partialIsValidUtf8(int state, int offset, int length)864   protected abstract int partialIsValidUtf8(int state, int offset, int length);
865 
866   // =================================================================
867   // equals() and hashCode()
868 
869   @Override
870   public abstract boolean equals(Object o);
871 
872   /** Base class for leaf {@link ByteString}s (i.e. non-ropes). */
873   abstract static class LeafByteString extends ByteString {
874     @Override
getTreeDepth()875     protected final int getTreeDepth() {
876       return 0;
877     }
878 
879     @Override
isBalanced()880     protected final boolean isBalanced() {
881       return true;
882     }
883 
884     @Override
writeToReverse(ByteOutput byteOutput)885     void writeToReverse(ByteOutput byteOutput) throws IOException {
886       writeTo(byteOutput);
887     }
888 
889     /**
890      * Check equality of the substring of given length of this object starting at zero with another
891      * {@code ByteString} substring starting at offset.
892      *
893      * @param other what to compare a substring in
894      * @param offset offset into other
895      * @param length number of bytes to compare
896      * @return true for equality of substrings, else false.
897      */
equalsRange(ByteString other, int offset, int length)898     abstract boolean equalsRange(ByteString other, int offset, int length);
899   }
900 
901   /**
902    * Compute the hashCode using the traditional algorithm from {@link ByteString}.
903    *
904    * @return hashCode value
905    */
906   @Override
hashCode()907   public final int hashCode() {
908     int h = hash;
909 
910     if (h == 0) {
911       int size = size();
912       h = partialHash(size, 0, size);
913       if (h == 0) {
914         h = 1;
915       }
916       hash = h;
917     }
918     return h;
919   }
920 
921   // =================================================================
922   // Input stream
923 
924   /**
925    * Creates an {@code InputStream} which can be used to read the bytes.
926    *
927    * <p>The {@link InputStream} returned by this method is guaranteed to be completely non-blocking.
928    * The method {@link InputStream#available()} returns the number of bytes remaining in the stream.
929    * The methods {@link InputStream#read(byte[])}, {@link InputStream#read(byte[],int,int)} and
930    * {@link InputStream#skip(long)} will read/skip as many bytes as are available. The method {@link
931    * InputStream#markSupported()} returns {@code true}.
932    *
933    * <p>The methods in the returned {@link InputStream} might <b>not</b> be thread safe.
934    *
935    * @return an input stream that returns the bytes of this byte string.
936    */
newInput()937   public abstract InputStream newInput();
938 
939   /**
940    * Creates a {@link CodedInputStream} which can be used to read the bytes. Using this is often
941    * more efficient than creating a {@link CodedInputStream} that wraps the result of {@link
942    * #newInput()}.
943    *
944    * @return stream based on wrapped data
945    */
newCodedInput()946   public abstract CodedInputStream newCodedInput();
947 
948   // =================================================================
949   // Output stream
950 
951   /**
952    * Creates a new {@link Output} with the given initial capacity. Call {@link
953    * Output#toByteString()} to create the {@code ByteString} instance.
954    *
955    * <p>A {@link ByteString.Output} offers the same functionality as a {@link
956    * ByteArrayOutputStream}, except that it returns a {@link ByteString} rather than a {@code byte}
957    * array.
958    *
959    * @param initialCapacity estimate of number of bytes to be written
960    * @return {@code OutputStream} for building a {@code ByteString}
961    */
newOutput(int initialCapacity)962   public static Output newOutput(int initialCapacity) {
963     return new Output(initialCapacity);
964   }
965 
966   /**
967    * Creates a new {@link Output}. Call {@link Output#toByteString()} to create the {@code
968    * ByteString} instance.
969    *
970    * <p>A {@link ByteString.Output} offers the same functionality as a {@link
971    * ByteArrayOutputStream}, except that it returns a {@link ByteString} rather than a {@code byte
972    * array}.
973    *
974    * @return {@code OutputStream} for building a {@code ByteString}
975    */
newOutput()976   public static Output newOutput() {
977     return new Output(CONCATENATE_BY_COPY_SIZE);
978   }
979 
980   /**
981    * Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to create the {@code
982    * ByteString} instance.
983    */
984   public static final class Output extends OutputStream {
985     // Implementation note.
986     // The public methods of this class must be synchronized.  ByteStrings
987     // are guaranteed to be immutable.  Without some sort of locking, it could
988     // be possible for one thread to call toByteSring(), while another thread
989     // is still modifying the underlying byte array.
990 
991     private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
992     // argument passed by user, indicating initial capacity.
993     private final int initialCapacity;
994     // ByteStrings to be concatenated to create the result
995     private final ArrayList<ByteString> flushedBuffers;
996     // Total number of bytes in the ByteStrings of flushedBuffers
997     private int flushedBuffersTotalBytes;
998     // Current buffer to which we are writing
999     private byte[] buffer;
1000     // Location in buffer[] to which we write the next byte.
1001     private int bufferPos;
1002 
1003     /**
1004      * Creates a new ByteString output stream with the specified initial capacity.
1005      *
1006      * @param initialCapacity the initial capacity of the output stream.
1007      */
Output(int initialCapacity)1008     Output(int initialCapacity) {
1009       if (initialCapacity < 0) {
1010         throw new IllegalArgumentException("Buffer size < 0");
1011       }
1012       this.initialCapacity = initialCapacity;
1013       this.flushedBuffers = new ArrayList<ByteString>();
1014       this.buffer = new byte[initialCapacity];
1015     }
1016 
1017     @Override
write(int b)1018     public synchronized void write(int b) {
1019       if (bufferPos == buffer.length) {
1020         flushFullBuffer(1);
1021       }
1022       buffer[bufferPos++] = (byte) b;
1023     }
1024 
1025     @Override
write(byte[] b, int offset, int length)1026     public synchronized void write(byte[] b, int offset, int length) {
1027       if (length <= buffer.length - bufferPos) {
1028         // The bytes can fit into the current buffer.
1029         System.arraycopy(b, offset, buffer, bufferPos, length);
1030         bufferPos += length;
1031       } else {
1032         // Use up the current buffer
1033         int copySize = buffer.length - bufferPos;
1034         System.arraycopy(b, offset, buffer, bufferPos, copySize);
1035         offset += copySize;
1036         length -= copySize;
1037         // Flush the buffer, and get a new buffer at least big enough to cover
1038         // what we still need to output
1039         flushFullBuffer(length);
1040         System.arraycopy(b, offset, buffer, /* count= */ 0, length);
1041         bufferPos = length;
1042       }
1043     }
1044 
1045     /**
1046      * Creates a byte string. Its size is the current size of this output stream and its output has
1047      * been copied to it.
1048      *
1049      * @return the current contents of this output stream, as a byte string.
1050      */
toByteString()1051     public synchronized ByteString toByteString() {
1052       flushLastBuffer();
1053       return ByteString.copyFrom(flushedBuffers);
1054     }
1055 
1056     /** Implement java.util.Arrays.copyOf() for jdk 1.5. */
copyArray(byte[] buffer, int length)1057     private byte[] copyArray(byte[] buffer, int length) {
1058       byte[] result = new byte[length];
1059       System.arraycopy(buffer, 0, result, 0, Math.min(buffer.length, length));
1060       return result;
1061     }
1062 
1063     /**
1064      * Writes the complete contents of this byte array output stream to the specified output stream
1065      * argument.
1066      *
1067      * @param out the output stream to which to write the data.
1068      * @throws IOException if an I/O error occurs.
1069      */
writeTo(OutputStream out)1070     public void writeTo(OutputStream out) throws IOException {
1071       ByteString[] cachedFlushBuffers;
1072       byte[] cachedBuffer;
1073       int cachedBufferPos;
1074       synchronized (this) {
1075         // Copy the information we need into local variables so as to hold
1076         // the lock for as short a time as possible.
1077         cachedFlushBuffers = flushedBuffers.toArray(new ByteString[flushedBuffers.size()]);
1078         cachedBuffer = buffer;
1079         cachedBufferPos = bufferPos;
1080       }
1081       for (ByteString byteString : cachedFlushBuffers) {
1082         byteString.writeTo(out);
1083       }
1084 
1085       out.write(copyArray(cachedBuffer, cachedBufferPos));
1086     }
1087 
1088     /**
1089      * Returns the current size of the output stream.
1090      *
1091      * @return the current size of the output stream
1092      */
size()1093     public synchronized int size() {
1094       return flushedBuffersTotalBytes + bufferPos;
1095     }
1096 
1097     /**
1098      * Resets this stream, so that all currently accumulated output in the output stream is
1099      * discarded. The output stream can be used again, reusing the already allocated buffer space.
1100      */
reset()1101     public synchronized void reset() {
1102       flushedBuffers.clear();
1103       flushedBuffersTotalBytes = 0;
1104       bufferPos = 0;
1105     }
1106 
1107     @Override
toString()1108     public String toString() {
1109       return String.format(
1110           "<ByteString.Output@%s size=%d>",
1111           Integer.toHexString(System.identityHashCode(this)), size());
1112     }
1113 
1114     /**
1115      * Internal function used by writers. The current buffer is full, and the writer needs a new
1116      * buffer whose size is at least the specified minimum size.
1117      */
flushFullBuffer(int minSize)1118     private void flushFullBuffer(int minSize) {
1119       flushedBuffers.add(new LiteralByteString(buffer));
1120       flushedBuffersTotalBytes += buffer.length;
1121       // We want to increase our total capacity by 50%, but as a minimum,
1122       // the new buffer should also at least be >= minSize and
1123       // >= initial Capacity.
1124       int newSize = Math.max(initialCapacity, Math.max(minSize, flushedBuffersTotalBytes >>> 1));
1125       buffer = new byte[newSize];
1126       bufferPos = 0;
1127     }
1128 
1129     /**
1130      * Internal function used by {@link #toByteString()}. The current buffer may or may not be full,
1131      * but it needs to be flushed.
1132      */
flushLastBuffer()1133     private void flushLastBuffer() {
1134       if (bufferPos < buffer.length) {
1135         if (bufferPos > 0) {
1136           byte[] bufferCopy = copyArray(buffer, bufferPos);
1137           flushedBuffers.add(new LiteralByteString(bufferCopy));
1138         }
1139         // We reuse this buffer for further writes.
1140       } else {
1141         // Buffer is completely full.  Huzzah.
1142         flushedBuffers.add(new LiteralByteString(buffer));
1143         // 99% of the time, we're not going to use this OutputStream again.
1144         // We set buffer to an empty byte stream so that we're handling this
1145         // case without wasting space.  In the rare case that more writes
1146         // *do* occur, this empty buffer will be flushed and an appropriately
1147         // sized new buffer will be created.
1148         buffer = EMPTY_BYTE_ARRAY;
1149       }
1150       flushedBuffersTotalBytes += bufferPos;
1151       bufferPos = 0;
1152     }
1153   }
1154 
1155   /**
1156    * Constructs a new {@code ByteString} builder, which allows you to efficiently construct a {@code
1157    * ByteString} by writing to a {@link CodedOutputStream}. Using this is much more efficient than
1158    * calling {@code newOutput()} and wrapping that in a {@code CodedOutputStream}.
1159    *
1160    * <p>This is package-private because it's a somewhat confusing interface. Users can call {@link
1161    * Message#toByteString()} instead of calling this directly.
1162    *
1163    * @param size The target byte size of the {@code ByteString}. You must write exactly this many
1164    *     bytes before building the result.
1165    * @return the builder
1166    */
newCodedBuilder(int size)1167   static CodedBuilder newCodedBuilder(int size) {
1168     return new CodedBuilder(size);
1169   }
1170 
1171   /** See {@link ByteString#newCodedBuilder(int)}. */
1172   static final class CodedBuilder {
1173     private final CodedOutputStream output;
1174     private final byte[] buffer;
1175 
CodedBuilder(int size)1176     private CodedBuilder(int size) {
1177       buffer = new byte[size];
1178       output = CodedOutputStream.newInstance(buffer);
1179     }
1180 
build()1181     public ByteString build() {
1182       output.checkNoSpaceLeft();
1183 
1184       // We can be confident that the CodedOutputStream will not modify the
1185       // underlying bytes anymore because it already wrote all of them.  So,
1186       // no need to make a copy.
1187       return new LiteralByteString(buffer);
1188     }
1189 
getCodedOutput()1190     public CodedOutputStream getCodedOutput() {
1191       return output;
1192     }
1193   }
1194 
1195   // =================================================================
1196   // Methods {@link RopeByteString} needs on instances, which aren't part of the
1197   // public API.
1198 
1199   /**
1200    * Return the depth of the tree representing this {@code ByteString}, if any, whose root is this
1201    * node. If this is a leaf node, return 0.
1202    *
1203    * @return tree depth or zero
1204    */
getTreeDepth()1205   protected abstract int getTreeDepth();
1206 
1207   /**
1208    * Return {@code true} if this ByteString is literal (a leaf node) or a flat-enough tree in the
1209    * sense of {@link RopeByteString}.
1210    *
1211    * @return true if the tree is flat enough
1212    */
isBalanced()1213   protected abstract boolean isBalanced();
1214 
1215   /**
1216    * Return the cached hash code if available.
1217    *
1218    * @return value of cached hash code or 0 if not computed yet
1219    */
peekCachedHashCode()1220   protected final int peekCachedHashCode() {
1221     return hash;
1222   }
1223 
1224   /**
1225    * Compute the hash across the value bytes starting with the given hash, and return the result.
1226    * This is used to compute the hash across strings represented as a set of pieces by allowing the
1227    * hash computation to be continued from piece to piece.
1228    *
1229    * @param h starting hash value
1230    * @param offset offset into this value to start looking at data values
1231    * @param length number of data values to include in the hash computation
1232    * @return ending hash value
1233    */
partialHash(int h, int offset, int length)1234   protected abstract int partialHash(int h, int offset, int length);
1235 
1236   /**
1237    * Checks that the given index falls within the specified array size.
1238    *
1239    * @param index the index position to be tested
1240    * @param size the length of the array
1241    * @throws IndexOutOfBoundsException if the index does not fall within the array.
1242    */
checkIndex(int index, int size)1243   static void checkIndex(int index, int size) {
1244     if ((index | (size - (index + 1))) < 0) {
1245       if (index < 0) {
1246         throw new ArrayIndexOutOfBoundsException("Index < 0: " + index);
1247       }
1248       throw new ArrayIndexOutOfBoundsException("Index > length: " + index + ", " + size);
1249     }
1250   }
1251 
1252   /**
1253    * Checks that the given range falls within the bounds of an array
1254    *
1255    * @param startIndex the start index of the range (inclusive)
1256    * @param endIndex the end index of the range (exclusive)
1257    * @param size the size of the array.
1258    * @return the length of the range.
1259    * @throws IndexOutOfBoundsException some or all of the range falls outside of the array.
1260    */
checkRange(int startIndex, int endIndex, int size)1261   static int checkRange(int startIndex, int endIndex, int size) {
1262     final int length = endIndex - startIndex;
1263     if ((startIndex | endIndex | length | (size - endIndex)) < 0) {
1264       if (startIndex < 0) {
1265         throw new IndexOutOfBoundsException("Beginning index: " + startIndex + " < 0");
1266       }
1267       if (endIndex < startIndex) {
1268         throw new IndexOutOfBoundsException(
1269             "Beginning index larger than ending index: " + startIndex + ", " + endIndex);
1270       }
1271       // endIndex >= size
1272       throw new IndexOutOfBoundsException("End index: " + endIndex + " >= " + size);
1273     }
1274     return length;
1275   }
1276 
1277   @Override
toString()1278   public final String toString() {
1279     return String.format(
1280         Locale.ROOT,
1281         "<ByteString@%s size=%d contents=\"%s\">",
1282         toHexString(identityHashCode(this)),
1283         size(),
1284         truncateAndEscapeForDisplay());
1285   }
1286 
truncateAndEscapeForDisplay()1287   private String truncateAndEscapeForDisplay() {
1288     final int limit = 50;
1289 
1290     return size() <= limit ? escapeBytes(this) : escapeBytes(substring(0, limit - 3)) + "...";
1291   }
1292 
1293   /**
1294    * This class implements a {@link com.google.protobuf.ByteString} backed by a single array of
1295    * bytes, contiguous in memory. It supports substring by pointing to only a sub-range of the
1296    * underlying byte array, meaning that a substring will reference the full byte-array of the
1297    * string it's made from, exactly as with {@link String}.
1298    *
1299    * @author carlanton@google.com (Carl Haverl)
1300    */
1301   // Keep this class private to avoid deadlocks in classloading across threads as ByteString's
1302   // static initializer loads LiteralByteString and another thread loads LiteralByteString.
1303   private static class LiteralByteString extends ByteString.LeafByteString {
1304     private static final long serialVersionUID = 1L;
1305 
1306     protected final byte[] bytes;
1307 
1308     /**
1309      * Creates a {@code LiteralByteString} backed by the given array, without copying.
1310      *
1311      * @param bytes array to wrap
1312      */
LiteralByteString(byte[] bytes)1313     LiteralByteString(byte[] bytes) {
1314       if (bytes == null) {
1315         throw new NullPointerException();
1316       }
1317       this.bytes = bytes;
1318     }
1319 
1320     @Override
byteAt(int index)1321     public byte byteAt(int index) {
1322       // Unlike most methods in this class, this one is a direct implementation
1323       // ignoring the potential offset because we need to do range-checking in the
1324       // substring case anyway.
1325       return bytes[index];
1326     }
1327 
1328     @Override
internalByteAt(int index)1329     byte internalByteAt(int index) {
1330       return bytes[index];
1331     }
1332 
1333     @Override
size()1334     public int size() {
1335       return bytes.length;
1336     }
1337 
1338     // =================================================================
1339     // ByteString -> substring
1340 
1341     @Override
substring(int beginIndex, int endIndex)1342     public final ByteString substring(int beginIndex, int endIndex) {
1343       final int length = checkRange(beginIndex, endIndex, size());
1344 
1345       if (length == 0) {
1346         return ByteString.EMPTY;
1347       }
1348 
1349       return new BoundedByteString(bytes, getOffsetIntoBytes() + beginIndex, length);
1350     }
1351 
1352     // =================================================================
1353     // ByteString -> byte[]
1354 
1355     @Override
copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)1356     protected void copyToInternal(
1357         byte[] target, int sourceOffset, int targetOffset, int numberToCopy) {
1358       // Optimized form, not for subclasses, since we don't call
1359       // getOffsetIntoBytes() or check the 'numberToCopy' parameter.
1360       // TODO(nathanmittler): Is not calling getOffsetIntoBytes really saving that much?
1361       System.arraycopy(bytes, sourceOffset, target, targetOffset, numberToCopy);
1362     }
1363 
1364     @Override
copyTo(ByteBuffer target)1365     public final void copyTo(ByteBuffer target) {
1366       target.put(bytes, getOffsetIntoBytes(), size()); // Copies bytes
1367     }
1368 
1369     @Override
asReadOnlyByteBuffer()1370     public final ByteBuffer asReadOnlyByteBuffer() {
1371       return ByteBuffer.wrap(bytes, getOffsetIntoBytes(), size()).asReadOnlyBuffer();
1372     }
1373 
1374     @Override
asReadOnlyByteBufferList()1375     public final List<ByteBuffer> asReadOnlyByteBufferList() {
1376       return Collections.singletonList(asReadOnlyByteBuffer());
1377     }
1378 
1379     @Override
writeTo(OutputStream outputStream)1380     public final void writeTo(OutputStream outputStream) throws IOException {
1381       outputStream.write(toByteArray());
1382     }
1383 
1384     @Override
writeToInternal(OutputStream outputStream, int sourceOffset, int numberToWrite)1385     final void writeToInternal(OutputStream outputStream, int sourceOffset, int numberToWrite)
1386         throws IOException {
1387       outputStream.write(bytes, getOffsetIntoBytes() + sourceOffset, numberToWrite);
1388     }
1389 
1390     @Override
writeTo(ByteOutput output)1391     final void writeTo(ByteOutput output) throws IOException {
1392       output.writeLazy(bytes, getOffsetIntoBytes(), size());
1393     }
1394 
1395     @Override
toStringInternal(Charset charset)1396     protected final String toStringInternal(Charset charset) {
1397       return new String(bytes, getOffsetIntoBytes(), size(), charset);
1398     }
1399 
1400     // =================================================================
1401     // UTF-8 decoding
1402 
1403     @Override
isValidUtf8()1404     public final boolean isValidUtf8() {
1405       int offset = getOffsetIntoBytes();
1406       return Utf8.isValidUtf8(bytes, offset, offset + size());
1407     }
1408 
1409     @Override
partialIsValidUtf8(int state, int offset, int length)1410     protected final int partialIsValidUtf8(int state, int offset, int length) {
1411       int index = getOffsetIntoBytes() + offset;
1412       return Utf8.partialIsValidUtf8(state, bytes, index, index + length);
1413     }
1414 
1415     // =================================================================
1416     // equals() and hashCode()
1417 
1418     @Override
equals(Object other)1419     public final boolean equals(Object other) {
1420       if (other == this) {
1421         return true;
1422       }
1423       if (!(other instanceof ByteString)) {
1424         return false;
1425       }
1426 
1427       if (size() != ((ByteString) other).size()) {
1428         return false;
1429       }
1430       if (size() == 0) {
1431         return true;
1432       }
1433 
1434       if (other instanceof LiteralByteString) {
1435         LiteralByteString otherAsLiteral = (LiteralByteString) other;
1436         // If we know the hash codes and they are not equal, we know the byte
1437         // strings are not equal.
1438         int thisHash = peekCachedHashCode();
1439         int thatHash = otherAsLiteral.peekCachedHashCode();
1440         if (thisHash != 0 && thatHash != 0 && thisHash != thatHash) {
1441           return false;
1442         }
1443 
1444         return equalsRange((LiteralByteString) other, 0, size());
1445       } else {
1446         // RopeByteString and NioByteString.
1447         return other.equals(this);
1448       }
1449     }
1450 
1451     /**
1452      * Check equality of the substring of given length of this object starting at zero with another
1453      * {@code LiteralByteString} substring starting at offset.
1454      *
1455      * @param other what to compare a substring in
1456      * @param offset offset into other
1457      * @param length number of bytes to compare
1458      * @return true for equality of substrings, else false.
1459      */
1460     @Override
equalsRange(ByteString other, int offset, int length)1461     final boolean equalsRange(ByteString other, int offset, int length) {
1462       if (length > other.size()) {
1463         throw new IllegalArgumentException("Length too large: " + length + size());
1464       }
1465       if (offset + length > other.size()) {
1466         throw new IllegalArgumentException(
1467             "Ran off end of other: " + offset + ", " + length + ", " + other.size());
1468       }
1469 
1470       if (other instanceof LiteralByteString) {
1471         LiteralByteString lbsOther = (LiteralByteString) other;
1472         byte[] thisBytes = bytes;
1473         byte[] otherBytes = lbsOther.bytes;
1474         int thisLimit = getOffsetIntoBytes() + length;
1475         for (int thisIndex = getOffsetIntoBytes(),
1476                 otherIndex = lbsOther.getOffsetIntoBytes() + offset;
1477             (thisIndex < thisLimit);
1478             ++thisIndex, ++otherIndex) {
1479           if (thisBytes[thisIndex] != otherBytes[otherIndex]) {
1480             return false;
1481           }
1482         }
1483         return true;
1484       }
1485 
1486       return other.substring(offset, offset + length).equals(substring(0, length));
1487     }
1488 
1489     @Override
partialHash(int h, int offset, int length)1490     protected final int partialHash(int h, int offset, int length) {
1491       return Internal.partialHash(h, bytes, getOffsetIntoBytes() + offset, length);
1492     }
1493 
1494     // =================================================================
1495     // Input stream
1496 
1497     @Override
newInput()1498     public final InputStream newInput() {
1499       return new ByteArrayInputStream(bytes, getOffsetIntoBytes(), size()); // No copy
1500     }
1501 
1502     @Override
newCodedInput()1503     public final CodedInputStream newCodedInput() {
1504       // We trust CodedInputStream not to modify the bytes, or to give anyone
1505       // else access to them.
1506       return CodedInputStream.newInstance(
1507           bytes, getOffsetIntoBytes(), size(), /* bufferIsImmutable= */ true);
1508     }
1509 
1510     // =================================================================
1511     // Internal methods
1512 
1513     /**
1514      * Offset into {@code bytes[]} to use, non-zero for substrings.
1515      *
1516      * @return always 0 for this class
1517      */
getOffsetIntoBytes()1518     protected int getOffsetIntoBytes() {
1519       return 0;
1520     }
1521   }
1522 
1523   /**
1524    * This class is used to represent the substring of a {@link ByteString} over a single byte array.
1525    * In terms of the public API of {@link ByteString}, you end up here by calling {@link
1526    * ByteString#copyFrom(byte[])} followed by {@link ByteString#substring(int, int)}.
1527    *
1528    * <p>This class contains most of the overhead involved in creating a substring from a {@link
1529    * LiteralByteString}. The overhead involves some range-checking and two extra fields.
1530    *
1531    * @author carlanton@google.com (Carl Haverl)
1532    */
1533   // Keep this class private to avoid deadlocks in classloading across threads as ByteString's
1534   // static initializer loads LiteralByteString and another thread loads BoundedByteString.
1535   private static final class BoundedByteString extends LiteralByteString {
1536 
1537     private final int bytesOffset;
1538     private final int bytesLength;
1539 
1540     /**
1541      * Creates a {@code BoundedByteString} backed by the sub-range of given array, without copying.
1542      *
1543      * @param bytes array to wrap
1544      * @param offset index to first byte to use in bytes
1545      * @param length number of bytes to use from bytes
1546      * @throws IllegalArgumentException if {@code offset < 0}, {@code length < 0}, or if {@code
1547      *     offset + length > bytes.length}.
1548      */
BoundedByteString(byte[] bytes, int offset, int length)1549     BoundedByteString(byte[] bytes, int offset, int length) {
1550       super(bytes);
1551       checkRange(offset, offset + length, bytes.length);
1552 
1553       this.bytesOffset = offset;
1554       this.bytesLength = length;
1555     }
1556 
1557     /**
1558      * Gets the byte at the given index. Throws {@link ArrayIndexOutOfBoundsException} for
1559      * backwards-compatibility reasons although it would more properly be {@link
1560      * IndexOutOfBoundsException}.
1561      *
1562      * @param index index of byte
1563      * @return the value
1564      * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
1565      */
1566     @Override
byteAt(int index)1567     public byte byteAt(int index) {
1568       // We must check the index ourselves as we cannot rely on Java array index
1569       // checking for substrings.
1570       checkIndex(index, size());
1571       return bytes[bytesOffset + index];
1572     }
1573 
1574     @Override
internalByteAt(int index)1575     byte internalByteAt(int index) {
1576       return bytes[bytesOffset + index];
1577     }
1578 
1579     @Override
size()1580     public int size() {
1581       return bytesLength;
1582     }
1583 
1584     @Override
getOffsetIntoBytes()1585     protected int getOffsetIntoBytes() {
1586       return bytesOffset;
1587     }
1588 
1589     // =================================================================
1590     // ByteString -> byte[]
1591 
1592     @Override
copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)1593     protected void copyToInternal(
1594         byte[] target, int sourceOffset, int targetOffset, int numberToCopy) {
1595       System.arraycopy(
1596           bytes, getOffsetIntoBytes() + sourceOffset, target, targetOffset, numberToCopy);
1597     }
1598 
1599     // =================================================================
1600     // Serializable
1601 
1602     private static final long serialVersionUID = 1L;
1603 
writeReplace()1604     Object writeReplace() {
1605       return ByteString.wrap(toByteArray());
1606     }
1607 
readObject(@uppressWarnings"unused") ObjectInputStream in)1608     private void readObject(@SuppressWarnings("unused") ObjectInputStream in) throws IOException {
1609       throw new InvalidObjectException(
1610           "BoundedByteStream instances are not to be serialized directly");
1611     }
1612   }
1613 }
1614