• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 package com.google.protobuf;
9 
10 import static com.google.protobuf.Internal.checkNotNull;
11 import static com.google.protobuf.TextFormatEscaper.escapeBytes;
12 import static java.lang.Integer.toHexString;
13 import static java.lang.System.identityHashCode;
14 
15 import java.io.ByteArrayInputStream;
16 import java.io.ByteArrayOutputStream;
17 import java.io.IOException;
18 import java.io.InputStream;
19 import java.io.InvalidObjectException;
20 import java.io.ObjectInputStream;
21 import java.io.OutputStream;
22 import java.io.Serializable;
23 import java.io.UnsupportedEncodingException;
24 import java.nio.ByteBuffer;
25 import java.nio.ByteOrder;
26 import java.nio.InvalidMarkException;
27 import java.nio.charset.Charset;
28 import java.nio.charset.UnsupportedCharsetException;
29 import java.util.ArrayList;
30 import java.util.Arrays;
31 import java.util.Collection;
32 import java.util.Collections;
33 import java.util.Comparator;
34 import java.util.Iterator;
35 import java.util.List;
36 import java.util.Locale;
37 import java.util.NoSuchElementException;
38 
39 /**
40  * Immutable sequence of bytes. Provides conversions to and from {@code byte[]}, {@link
41  * java.lang.String}, {@link ByteBuffer}, {@link InputStream}, {@link OutputStream}. Also provides a
42  * conversion to {@link CodedInputStream}.
43  *
44  * <p>Like {@link String}, the contents of a {@link ByteString} can never be observed to change, not
45  * even in the presence of a data race or incorrect API usage in the client code.
46  *
47  * <p>Substring is supported by sharing the reference to the immutable underlying bytes.
48  * Concatenation is likewise supported without copying (long strings) by building a tree of pieces
49  * in {@link RopeByteString}.
50  *
51  * @author crazybob@google.com Bob Lee
52  * @author kenton@google.com Kenton Varda
53  * @author carlanton@google.com Carl Haverl
54  * @author martinrb@google.com Martin Buchholz
55  */
56 @CheckReturnValue
57 public abstract class ByteString implements Iterable<Byte>, Serializable {
58   private static final long serialVersionUID = 1L;
59 
60   /**
61    * When two strings to be concatenated have a combined length shorter than this, we just copy
62    * their bytes on {@link #concat(ByteString)}. The trade-off is copy size versus the overhead of
63    * creating tree nodes in {@link RopeByteString}.
64    */
65   static final int CONCATENATE_BY_COPY_SIZE = 128;
66 
67   /**
68    * When copying an InputStream into a ByteString with .readFrom(), the chunks in the underlying
69    * rope start at 256 bytes, but double each iteration up to 8192 bytes.
70    */
71   static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b
72 
73   static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k
74 
75   /** Empty {@code ByteString}. */
76   public static final ByteString EMPTY = new LiteralByteString(Internal.EMPTY_BYTE_ARRAY);
77 
78   /**
79    * An interface to efficiently copy {@code byte[]}.
80    *
81    * <p>One of the noticeable costs of copying a byte[] into a new array using {@code
82    * System.arraycopy} is nullification of a new buffer before the copy. It has been shown the
83    * Hotspot VM is capable to intrisicfy {@code Arrays.copyOfRange} operation to avoid this
84    * expensive nullification and provide substantial performance gain. Unfortunately this does not
85    * hold on Android runtimes and could make the copy slightly slower due to additional code in the
86    * {@code Arrays.copyOfRange}. Thus we provide two different implementation for array copier for
87    * Hotspot and Android runtimes.
88    */
89   private interface ByteArrayCopier {
90     /** Copies the specified range of the specified array into a new array */
copyFrom(byte[] bytes, int offset, int size)91     byte[] copyFrom(byte[] bytes, int offset, int size);
92   }
93 
94   /** Implementation of {@code ByteArrayCopier} which uses {@link System#arraycopy}. */
95   private static final class SystemByteArrayCopier implements ByteArrayCopier {
96     @Override
copyFrom(byte[] bytes, int offset, int size)97     public byte[] copyFrom(byte[] bytes, int offset, int size) {
98       byte[] copy = new byte[size];
99       System.arraycopy(bytes, offset, copy, 0, size);
100       return copy;
101     }
102   }
103 
104   /** Implementation of {@code ByteArrayCopier} which uses {@link Arrays#copyOfRange}. */
105   private static final class ArraysByteArrayCopier implements ByteArrayCopier {
106     @Override
copyFrom(byte[] bytes, int offset, int size)107     public byte[] copyFrom(byte[] bytes, int offset, int size) {
108       return Arrays.copyOfRange(bytes, offset, offset + size);
109     }
110   }
111 
112   private static final ByteArrayCopier byteArrayCopier;
113 
114   static {
115     byteArrayCopier =
116         Android.isOnAndroidDevice() ? new SystemByteArrayCopier() : new ArraysByteArrayCopier();
117   }
118 
119   /**
120    * Cached hash value. Intentionally accessed via a data race, which is safe because of the Java
121    * Memory Model's "no out-of-thin-air values" guarantees for ints. A value of 0 implies that the
122    * hash has not been set.
123    */
124   private int hash = 0;
125 
126   // This constructor is here to prevent subclassing outside of this package,
ByteString()127   ByteString() {}
128 
129   /**
130    * Gets the byte at the given index. This method should be used only for random access to
131    * individual bytes. To access bytes sequentially, use the {@link ByteIterator} returned by {@link
132    * #iterator()}, and call {@link #substring(int, int)} first if necessary.
133    *
134    * @param index index of byte
135    * @return the value
136    * @throws IndexOutOfBoundsException {@code index < 0 or index >= size}
137    */
byteAt(int index)138   public abstract byte byteAt(int index);
139 
140   /**
141    * Gets the byte at the given index, assumes bounds checking has already been performed.
142    *
143    * @param index index of byte
144    * @return the value
145    * @throws IndexOutOfBoundsException {@code index < 0 or index >= size}
146    */
internalByteAt(int index)147   abstract byte internalByteAt(int index);
148 
149   /**
150    * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString. To avoid
151    * auto-boxing, you may get the iterator manually and call {@link ByteIterator#nextByte()}.
152    *
153    * @return the iterator
154    */
155   @Override
iterator()156   public ByteIterator iterator() {
157     return new AbstractByteIterator() {
158       private int position = 0;
159       private final int limit = size();
160 
161       @Override
162       public boolean hasNext() {
163         return position < limit;
164       }
165 
166       @Override
167       public byte nextByte() {
168         int currentPos = position;
169         if (currentPos >= limit) {
170           throw new NoSuchElementException();
171         }
172         position = currentPos + 1;
173         return internalByteAt(currentPos);
174       }
175     };
176   }
177 
178   /**
179    * This interface extends {@code Iterator<Byte>}, so that we can return an unboxed {@code byte}.
180    */
181   public interface ByteIterator extends Iterator<Byte> {
182     /**
183      * An alternative to {@link Iterator#next()} that returns an unboxed primitive {@code byte}.
184      *
185      * @return the next {@code byte} in the iteration
186      * @throws NoSuchElementException if the iteration has no more elements
187      */
nextByte()188     byte nextByte();
189   }
190 
191   abstract static class AbstractByteIterator implements ByteIterator {
192     @Override
next()193     public final Byte next() {
194       // Boxing calls Byte.valueOf(byte), which does not instantiate.
195       return nextByte();
196     }
197 
198     @Override
remove()199     public final void remove() {
200       throw new UnsupportedOperationException();
201     }
202   }
203 
204   /**
205    * Gets the number of bytes.
206    *
207    * @return size in bytes
208    */
size()209   public abstract int size();
210 
211   /**
212    * Returns {@code true} if the size is {@code 0}, {@code false} otherwise.
213    *
214    * @return true if this is zero bytes long
215    */
isEmpty()216   public final boolean isEmpty() {
217     return size() == 0;
218   }
219 
220   /** Returns an empty {@code ByteString} of size {@code 0}. */
empty()221   public static final ByteString empty() {
222     return EMPTY;
223   }
224 
225   // =================================================================
226   // Comparison
227 
228   private static final int UNSIGNED_BYTE_MASK = 0xFF;
229 
230   /**
231    * Returns the value of the given byte as an integer, interpreting the byte as an unsigned value.
232    * That is, returns {@code value + 256} if {@code value} is negative; {@code value} itself
233    * otherwise.
234    *
235    * <p>Note: This code was copied from {@link com.google.common.primitives.UnsignedBytes#toInt}, as
236    * Guava libraries cannot be used in the {@code com.google.protobuf} package.
237    */
toInt(byte value)238   private static int toInt(byte value) {
239     return value & UNSIGNED_BYTE_MASK;
240   }
241 
242   /** Returns the numeric value of the given character in hex, or -1 if invalid. */
hexDigit(char c)243   private static int hexDigit(char c) {
244     if (c >= '0' && c <= '9') {
245       return c - '0';
246     } else if (c >= 'A' && c <= 'F') {
247       return c - 'A' + 10;
248     } else if (c >= 'a' && c <= 'f') {
249       return c - 'a' + 10;
250     } else {
251       return -1;
252     }
253   }
254 
255   /**
256    * Returns the numeric value of the given character at index in hexString.
257    *
258    * @throws NumberFormatException if the hexString character is invalid.
259    */
extractHexDigit(String hexString, int index)260   private static int extractHexDigit(String hexString, int index) {
261     int digit = hexDigit(hexString.charAt(index));
262     if (digit == -1) {
263       throw new NumberFormatException(
264           "Invalid hexString "
265               + hexString
266               + " must only contain [0-9a-fA-F] but contained "
267               + hexString.charAt(index)
268               + " at index "
269               + index);
270     }
271     return digit;
272   }
273 
274   /**
275    * Compares two {@link ByteString}s lexicographically, treating their contents as unsigned byte
276    * values between 0 and 255 (inclusive).
277    *
278    * <p>For example, {@code (byte) -1} is considered to be greater than {@code (byte) 1} because it
279    * is interpreted as an unsigned value, {@code 255}.
280    */
281   private static final Comparator<ByteString> UNSIGNED_LEXICOGRAPHICAL_COMPARATOR =
282       new Comparator<ByteString>() {
283         @Override
284         public int compare(ByteString former, ByteString latter) {
285           ByteIterator formerBytes = former.iterator();
286           ByteIterator latterBytes = latter.iterator();
287 
288           while (formerBytes.hasNext() && latterBytes.hasNext()) {
289             int result =
290                 Integer.compare(toInt(formerBytes.nextByte()), toInt(latterBytes.nextByte()));
291             if (result != 0) {
292               return result;
293             }
294           }
295           return Integer.compare(former.size(), latter.size());
296         }
297       };
298 
299   /**
300    * Returns a {@link Comparator} which compares {@link ByteString}-s lexicographically as sequences
301    * of unsigned bytes (i.e. values between 0 and 255, inclusive).
302    *
303    * <p>For example, {@code (byte) -1} is considered to be greater than {@code (byte) 1} because it
304    * is interpreted as an unsigned value, {@code 255}:
305    *
306    * <ul>
307    *   <li>{@code `-1` -> 0b11111111 (two's complement) -> 255}
308    *   <li>{@code `1` -> 0b00000001 -> 1}
309    * </ul>
310    */
unsignedLexicographicalComparator()311   public static Comparator<ByteString> unsignedLexicographicalComparator() {
312     return UNSIGNED_LEXICOGRAPHICAL_COMPARATOR;
313   }
314 
315   // =================================================================
316   // ByteString -> substring
317 
318   /**
319    * Return the substring from {@code beginIndex}, inclusive, to the end of the string.
320    *
321    * @param beginIndex start at this index
322    * @return substring sharing underlying data
323    * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or {@code beginIndex > size()}.
324    */
substring(int beginIndex)325   public final ByteString substring(int beginIndex) {
326     return substring(beginIndex, size());
327   }
328 
329   /**
330    * Return the substring from {@code beginIndex}, inclusive, to {@code endIndex}, exclusive.
331    *
332    * @param beginIndex start at this index
333    * @param endIndex the last character is the one before this index
334    * @return substring sharing underlying data
335    * @throws IndexOutOfBoundsException if {@code beginIndex < 0}, {@code endIndex > size()}, or
336    *     {@code beginIndex > endIndex}.
337    */
substring(int beginIndex, int endIndex)338   public abstract ByteString substring(int beginIndex, int endIndex);
339 
340   /**
341    * Tests if this bytestring starts with the specified prefix. Similar to {@link
342    * String#startsWith(String)}
343    *
344    * @param prefix the prefix.
345    * @return <code>true</code> if the byte sequence represented by the argument is a prefix of the
346    *     byte sequence represented by this string; <code>false</code> otherwise.
347    */
startsWith(ByteString prefix)348   public final boolean startsWith(ByteString prefix) {
349     return size() >= prefix.size() && substring(0, prefix.size()).equals(prefix);
350   }
351 
352   /**
353    * Tests if this bytestring ends with the specified suffix. Similar to {@link
354    * String#endsWith(String)}
355    *
356    * @param suffix the suffix.
357    * @return <code>true</code> if the byte sequence represented by the argument is a suffix of the
358    *     byte sequence represented by this string; <code>false</code> otherwise.
359    */
endsWith(ByteString suffix)360   public final boolean endsWith(ByteString suffix) {
361     return size() >= suffix.size() && substring(size() - suffix.size()).equals(suffix);
362   }
363 
364   // =================================================================
365   // String -> ByteString
366 
367   /**
368    * Returns a {@code ByteString} from a hexadecimal String.
369    *
370    * @param hexString String of hexadecimal digits to create {@code ByteString} from.
371    * @throws NumberFormatException if the hexString does not contain a parsable hex String.
372    */
fromHex(@ompileTimeConstant String hexString)373   public static ByteString fromHex(@CompileTimeConstant String hexString) {
374     if (hexString.length() % 2 != 0) {
375       throw new NumberFormatException(
376           "Invalid hexString " + hexString + " of length " + hexString.length() + " must be even.");
377     }
378     byte[] bytes = new byte[hexString.length() / 2];
379     for (int i = 0; i < bytes.length; i++) {
380       int d1 = extractHexDigit(hexString, 2 * i);
381       int d2 = extractHexDigit(hexString, 2 * i + 1);
382       bytes[i] = (byte) (d1 << 4 | d2);
383     }
384     return new LiteralByteString(bytes);
385   }
386 
387   // =================================================================
388   // byte[] -> ByteString
389 
390   /**
391    * Copies the given bytes into a {@code ByteString}.
392    *
393    * @param bytes source array
394    * @param offset offset in source array
395    * @param size number of bytes to copy
396    * @return new {@code ByteString}
397    * @throws IndexOutOfBoundsException if {@code offset} or {@code size} are out of bounds
398    */
copyFrom(byte[] bytes, int offset, int size)399   public static ByteString copyFrom(byte[] bytes, int offset, int size) {
400     checkRange(offset, offset + size, bytes.length);
401     return new LiteralByteString(byteArrayCopier.copyFrom(bytes, offset, size));
402   }
403 
404   /**
405    * Copies the given bytes into a {@code ByteString}.
406    *
407    * @param bytes to copy
408    * @return new {@code ByteString}
409    */
copyFrom(byte[] bytes)410   public static ByteString copyFrom(byte[] bytes) {
411     return copyFrom(bytes, 0, bytes.length);
412   }
413 
414   /**
415    * Wraps the given bytes into a {@code ByteString}. Intended for internal usage within the
416    * library.
417    */
wrap(ByteBuffer buffer)418   static ByteString wrap(ByteBuffer buffer) {
419     if (buffer.hasArray()) {
420       final int offset = buffer.arrayOffset();
421       return ByteString.wrap(buffer.array(), offset + buffer.position(), buffer.remaining());
422     } else {
423       return new NioByteString(buffer);
424     }
425   }
426 
427   // For use in tests
nioByteString(ByteBuffer buffer)428   static ByteString nioByteString(ByteBuffer buffer) {
429     return new NioByteString(buffer);
430   }
431 
432   /**
433    * Wraps the given bytes into a {@code ByteString}. Intended for internal usage within the library
434    * to force a classload of ByteString before LiteralByteString.
435    */
wrap(byte[] bytes)436   static ByteString wrap(byte[] bytes) {
437     // TODO: Return EMPTY when bytes are empty to reduce allocations?
438     return new LiteralByteString(bytes);
439   }
440 
441   /**
442    * Wraps the given bytes into a {@code ByteString}. Intended for internal usage within the library
443    * to force a classload of ByteString before BoundedByteString and LiteralByteString.
444    */
wrap(byte[] bytes, int offset, int length)445   static ByteString wrap(byte[] bytes, int offset, int length) {
446     return new BoundedByteString(bytes, offset, length);
447   }
448 
449   /**
450    * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into a {@code
451    * ByteString}.
452    *
453    * @param bytes source buffer
454    * @param size number of bytes to copy
455    * @return new {@code ByteString}
456    * @throws IndexOutOfBoundsException if {@code size > bytes.remaining()}
457    */
copyFrom(ByteBuffer bytes, int size)458   public static ByteString copyFrom(ByteBuffer bytes, int size) {
459     checkRange(0, size, bytes.remaining());
460     byte[] copy = new byte[size];
461     bytes.get(copy);
462     return new LiteralByteString(copy);
463   }
464 
465   /**
466    * Copies the remaining bytes from a {@code java.nio.ByteBuffer} into a {@code ByteString}.
467    *
468    * @param bytes sourceBuffer
469    * @return new {@code ByteString}
470    */
copyFrom(ByteBuffer bytes)471   public static ByteString copyFrom(ByteBuffer bytes) {
472     return copyFrom(bytes, bytes.remaining());
473   }
474 
475   /**
476    * Encodes {@code text} into a sequence of bytes using the named charset and returns the result as
477    * a {@code ByteString}.
478    *
479    * @param text source string
480    * @param charsetName encoding to use
481    * @return new {@code ByteString}
482    * @throws UnsupportedEncodingException if the encoding isn't found
483    */
copyFrom(String text, String charsetName)484   public static ByteString copyFrom(String text, String charsetName)
485       throws UnsupportedEncodingException {
486     return new LiteralByteString(text.getBytes(charsetName));
487   }
488 
489   /**
490    * Encodes {@code text} into a sequence of bytes using the named charset and returns the result as
491    * a {@code ByteString}.
492    *
493    * @param text source string
494    * @param charset encode using this charset
495    * @return new {@code ByteString}
496    */
copyFrom(String text, Charset charset)497   public static ByteString copyFrom(String text, Charset charset) {
498     return new LiteralByteString(text.getBytes(charset));
499   }
500 
501   /**
502    * Encodes {@code text} into a sequence of UTF-8 bytes and returns the result as a {@code
503    * ByteString}.
504    *
505    * @param text source string
506    * @return new {@code ByteString}
507    */
copyFromUtf8(String text)508   public static ByteString copyFromUtf8(String text) {
509     return new LiteralByteString(text.getBytes(Internal.UTF_8));
510   }
511 
512   // =================================================================
513   // InputStream -> ByteString
514 
515   /**
516    * Completely reads the given stream's bytes into a {@code ByteString}, blocking if necessary
517    * until all bytes are read through to the end of the stream.
518    *
519    * <p><b>Performance notes:</b> The returned {@code ByteString} is an immutable tree of byte
520    * arrays ("chunks") of the stream data. The first chunk is small, with subsequent chunks each
521    * being double the size, up to 8K.
522    *
523    * <p>Each byte read from the input stream will be copied twice to ensure that the resulting
524    * ByteString is truly immutable.
525    *
526    * @param streamToDrain The source stream, which is read completely but not closed.
527    * @return A new {@code ByteString} which is made up of chunks of various sizes, depending on the
528    *     behavior of the underlying stream.
529    * @throws IOException if there is a problem reading the underlying stream
530    * @throws IllegalArgumentException if the stream supplies more than Integer.MAX_VALUE bytes
531    */
readFrom(InputStream streamToDrain)532   public static ByteString readFrom(InputStream streamToDrain) throws IOException {
533     return readFrom(streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE);
534   }
535 
536   /**
537    * Completely reads the given stream's bytes into a {@code ByteString}, blocking if necessary
538    * until all bytes are read through to the end of the stream.
539    *
540    * <p><b>Performance notes:</b> The returned {@code ByteString} is an immutable tree of byte
541    * arrays ("chunks") of the stream data. The chunkSize parameter sets the size of these byte
542    * arrays.
543    *
544    * <p>Each byte read from the input stream will be copied twice to ensure that the resulting
545    * ByteString is truly immutable.
546    *
547    * @param streamToDrain The source stream, which is read completely but not closed.
548    * @param chunkSize The size of the chunks in which to read the stream.
549    * @return A new {@code ByteString} which is made up of chunks of the given size.
550    * @throws IOException if there is a problem reading the underlying stream
551    * @throws IllegalArgumentException if the stream supplies more than Integer.MAX_VALUE bytes
552    */
readFrom(InputStream streamToDrain, int chunkSize)553   public static ByteString readFrom(InputStream streamToDrain, int chunkSize) throws IOException {
554     return readFrom(streamToDrain, chunkSize, chunkSize);
555   }
556 
557   /**
558    * Helper method that takes the chunk size range as a parameter.
559    *
560    * @param streamToDrain the source stream, which is read completely but not closed
561    * @param minChunkSize the minimum size of the chunks in which to read the stream
562    * @param maxChunkSize the maximum size of the chunks in which to read the stream
563    * @return a new {@code ByteString} which is made up of chunks within the given size range
564    * @throws IOException if there is a problem reading the underlying stream
565    * @throws IllegalArgumentException if the stream supplies more than Integer.MAX_VALUE bytes
566    */
readFrom(InputStream streamToDrain, int minChunkSize, int maxChunkSize)567   public static ByteString readFrom(InputStream streamToDrain, int minChunkSize, int maxChunkSize)
568       throws IOException {
569     Collection<ByteString> results = new ArrayList<ByteString>();
570 
571     // copy the inbound bytes into a list of chunks; the chunk size
572     // grows exponentially to support both short and long streams.
573     int chunkSize = minChunkSize;
574     while (true) {
575       ByteString chunk = readChunk(streamToDrain, chunkSize);
576       if (chunk == null) {
577         break;
578       }
579       results.add(chunk);
580       chunkSize = Math.min(chunkSize * 2, maxChunkSize);
581     }
582 
583     return ByteString.copyFrom(results);
584   }
585 
586   /**
587    * Blocks until a chunk of the given size can be made from the stream, or EOF is reached. Calls
588    * read() repeatedly in case the given stream implementation doesn't completely fill the given
589    * buffer in one read() call.
590    *
591    * @return A chunk of the desired size, or else a chunk as large as was available when end of
592    *     stream was reached. Returns null if the given stream had no more data in it.
593    */
readChunk(InputStream in, final int chunkSize)594   private static ByteString readChunk(InputStream in, final int chunkSize) throws IOException {
595     final byte[] buf = new byte[chunkSize];
596     int bytesRead = 0;
597     while (bytesRead < chunkSize) {
598       final int count = in.read(buf, bytesRead, chunkSize - bytesRead);
599       if (count == -1) {
600         break;
601       }
602       bytesRead += count;
603     }
604 
605     if (bytesRead == 0) {
606       return null;
607     }
608 
609     // Always make a copy since InputStream could steal a reference to buf.
610     return ByteString.copyFrom(buf, 0, bytesRead);
611   }
612 
613   // =================================================================
614   // Multiple ByteStrings -> One ByteString
615 
616   /**
617    * Concatenate the given {@code ByteString} to this one. Short concatenations, of total size
618    * smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are produced by copying the
619    * underlying bytes (as per Rope.java, <a
620    * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
621    * BAP95 </a>. In general, the concatenate involves no copying.
622    *
623    * @param other string to concatenate
624    * @return a new {@code ByteString} instance
625    * @throws IllegalArgumentException if the combined size of the two byte strings exceeds
626    *     Integer.MAX_VALUE
627    */
concat(ByteString other)628   public final ByteString concat(ByteString other) {
629     if (Integer.MAX_VALUE - size() < other.size()) {
630       throw new IllegalArgumentException(
631           "ByteString would be too long: " + size() + "+" + other.size());
632     }
633 
634     return RopeByteString.concatenate(this, other);
635   }
636 
637   /**
638    * Concatenates all byte strings in the iterable and returns the result. This is designed to run
639    * in O(list size), not O(total bytes).
640    *
641    * <p>The returned {@code ByteString} is not necessarily a unique object. If the list is empty,
642    * the returned object is the singleton empty {@code ByteString}. If the list has only one
643    * element, that {@code ByteString} will be returned without copying.
644    *
645    * @param byteStrings strings to be concatenated
646    * @return new {@code ByteString}
647    * @throws IllegalArgumentException if the combined size of the byte strings exceeds
648    *     Integer.MAX_VALUE
649    */
copyFrom(Iterable<ByteString> byteStrings)650   public static ByteString copyFrom(Iterable<ByteString> byteStrings) {
651     // Determine the size;
652     final int size;
653     if (!(byteStrings instanceof Collection)) {
654       int tempSize = 0;
655       for (Iterator<ByteString> iter = byteStrings.iterator();
656           iter.hasNext();
657           iter.next(), ++tempSize) {}
658       size = tempSize;
659     } else {
660       size = ((Collection<ByteString>) byteStrings).size();
661     }
662 
663     if (size == 0) {
664       return EMPTY;
665     }
666 
667     return balancedConcat(byteStrings.iterator(), size);
668   }
669 
670   // Internal function used by copyFrom(Iterable<ByteString>).
671   // Create a balanced concatenation of the next "length" elements from the
672   // iterable.
balancedConcat(Iterator<ByteString> iterator, int length)673   private static ByteString balancedConcat(Iterator<ByteString> iterator, int length) {
674     if (length < 1) {
675       throw new IllegalArgumentException(String.format("length (%s) must be >= 1", length));
676     }
677     ByteString result;
678     if (length == 1) {
679       result = iterator.next();
680     } else {
681       int halfLength = length >>> 1;
682       ByteString left = balancedConcat(iterator, halfLength);
683       ByteString right = balancedConcat(iterator, length - halfLength);
684       result = left.concat(right);
685     }
686     return result;
687   }
688 
689   // =================================================================
690   // ByteString -> byte[]
691 
692   /**
693    * Copies bytes into a buffer at the given offset.
694    *
695    * <p>To copy a subset of bytes, you call this method on the return value of {@link
696    * #substring(int, int)}. Example: {@code byteString.substring(start, end).copyTo(target, offset)}
697    *
698    * @param target buffer to copy into
699    * @param offset in the target buffer
700    * @throws IndexOutOfBoundsException if the offset is negative or too large
701    */
copyTo(byte[] target, int offset)702   public void copyTo(byte[] target, int offset) {
703     copyTo(target, 0, offset, size());
704   }
705 
706   /**
707    * Copies bytes into a buffer.
708    *
709    * @param target buffer to copy into
710    * @param sourceOffset offset within these bytes
711    * @param targetOffset offset within the target buffer
712    * @param numberToCopy number of bytes to copy
713    * @throws IndexOutOfBoundsException if an offset or size is negative or too large
714    * @deprecated Instead, call {@code byteString.substring(sourceOffset, sourceOffset +
715    *     numberToCopy).copyTo(target, targetOffset)}
716    */
717   @Deprecated
copyTo(byte[] target, int sourceOffset, int targetOffset, int numberToCopy)718   public final void copyTo(byte[] target, int sourceOffset, int targetOffset, int numberToCopy) {
719     checkRange(sourceOffset, sourceOffset + numberToCopy, size());
720     checkRange(targetOffset, targetOffset + numberToCopy, target.length);
721     if (numberToCopy > 0) {
722       copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
723     }
724   }
725 
726   /**
727    * Internal (package private) implementation of {@link #copyTo(byte[],int,int,int)}. It assumes
728    * that all error checking has already been performed and that {@code numberToCopy > 0}.
729    */
copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)730   protected abstract void copyToInternal(
731       byte[] target, int sourceOffset, int targetOffset, int numberToCopy);
732 
733   /**
734    * Copies bytes into a ByteBuffer.
735    *
736    * <p>To copy a subset of bytes, you call this method on the return value of {@link
737    * #substring(int, int)}. Example: {@code byteString.substring(start, end).copyTo(target)}
738    *
739    * @param target ByteBuffer to copy into.
740    * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only
741    * @throws java.nio.BufferOverflowException if the {@code target}'s remaining() space is not large
742    *     enough to hold the data.
743    */
copyTo(ByteBuffer target)744   public abstract void copyTo(ByteBuffer target);
745 
746   /**
747    * Copies bytes to a {@code byte[]}.
748    *
749    * @return copied bytes
750    */
toByteArray()751   public final byte[] toByteArray() {
752     final int size = size();
753     if (size == 0) {
754       return Internal.EMPTY_BYTE_ARRAY;
755     }
756     byte[] result = new byte[size];
757     copyToInternal(result, 0, 0, size);
758     return result;
759   }
760 
761   /**
762    * Writes a copy of the contents of this byte string to the specified output stream argument.
763    *
764    * @param out the output stream to which to write the data.
765    * @throws IOException if an I/O error occurs.
766    */
writeTo(OutputStream out)767   public abstract void writeTo(OutputStream out) throws IOException;
768 
769   /**
770    * Writes a specified part of this byte string to an output stream.
771    *
772    * @param out the output stream to which to write the data.
773    * @param sourceOffset offset within these bytes
774    * @param numberToWrite number of bytes to write
775    * @throws IOException if an I/O error occurs.
776    * @throws IndexOutOfBoundsException if an offset or size is negative or too large
777    */
writeTo(OutputStream out, int sourceOffset, int numberToWrite)778   final void writeTo(OutputStream out, int sourceOffset, int numberToWrite) throws IOException {
779     checkRange(sourceOffset, sourceOffset + numberToWrite, size());
780     if (numberToWrite > 0) {
781       writeToInternal(out, sourceOffset, numberToWrite);
782     }
783   }
784 
785   /**
786    * Internal version of {@link #writeTo(OutputStream,int,int)} that assumes all error checking has
787    * already been done.
788    */
writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)789   abstract void writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)
790       throws IOException;
791 
792   /**
793    * Writes this {@link ByteString} to the provided {@link ByteOutput}. Calling this method may
794    * result in multiple operations on the target {@link ByteOutput}.
795    *
796    * <p>This method may expose internal backing buffers of the {@link ByteString} to the {@link
797    * ByteOutput} in order to avoid additional copying overhead. It would be possible for a malicious
798    * {@link ByteOutput} to corrupt the {@link ByteString}. Use with caution!
799    *
800    * @param byteOutput the output target to receive the bytes
801    * @throws IOException if an I/O error occurs
802    * @see UnsafeByteOperations#unsafeWriteTo(ByteString, ByteOutput)
803    */
writeTo(ByteOutput byteOutput)804   abstract void writeTo(ByteOutput byteOutput) throws IOException;
805 
806   /**
807    * This method behaves exactly the same as {@link #writeTo(ByteOutput)} unless the {@link
808    * ByteString} is a rope. For ropes, the leaf nodes are written in reverse order to the {@code
809    * byteOutput}.
810    *
811    * @param byteOutput the output target to receive the bytes
812    * @throws IOException if an I/O error occurs
813    * @see UnsafeByteOperations#unsafeWriteToReverse(ByteString, ByteOutput)
814    */
writeToReverse(ByteOutput byteOutput)815   abstract void writeToReverse(ByteOutput byteOutput) throws IOException;
816 
817   /**
818    * Constructs a read-only {@code java.nio.ByteBuffer} whose content is equal to the contents of
819    * this byte string. The result uses the same backing array as the byte string, if possible.
820    *
821    * @return wrapped bytes
822    */
asReadOnlyByteBuffer()823   public abstract ByteBuffer asReadOnlyByteBuffer();
824 
825   /**
826    * Constructs a list of read-only {@code java.nio.ByteBuffer} objects such that the concatenation
827    * of their contents is equal to the contents of this byte string. The result uses the same
828    * backing arrays as the byte string.
829    *
830    * <p>By returning a list, implementations of this method may be able to avoid copying even when
831    * there are multiple backing arrays.
832    *
833    * @return a list of wrapped bytes
834    */
asReadOnlyByteBufferList()835   public abstract List<ByteBuffer> asReadOnlyByteBufferList();
836 
837   /**
838    * Constructs a new {@code String} by decoding the bytes using the specified charset.
839    *
840    * @param charsetName encode using this charset
841    * @return new string
842    * @throws UnsupportedEncodingException if charset isn't recognized
843    */
toString(String charsetName)844   public final String toString(String charsetName) throws UnsupportedEncodingException {
845     try {
846       return toString(Charset.forName(charsetName));
847     } catch (UnsupportedCharsetException e) {
848       UnsupportedEncodingException exception = new UnsupportedEncodingException(charsetName);
849       exception.initCause(e);
850       throw exception;
851     }
852   }
853 
854   /**
855    * Constructs a new {@code String} by decoding the bytes using the specified charset. Returns the
856    * same empty String if empty.
857    *
858    * @param charset encode using this charset
859    * @return new string
860    */
toString(Charset charset)861   public final String toString(Charset charset) {
862     return size() == 0 ? "" : toStringInternal(charset);
863   }
864 
865   /**
866    * Constructs a new {@code String} by decoding the bytes using the specified charset.
867    *
868    * @param charset encode using this charset
869    * @return new string
870    */
toStringInternal(Charset charset)871   protected abstract String toStringInternal(Charset charset);
872 
873   // =================================================================
874   // UTF-8 decoding
875 
876   /**
877    * Constructs a new {@code String} by decoding the bytes as UTF-8.
878    *
879    * @return new string using UTF-8 encoding
880    */
toStringUtf8()881   public final String toStringUtf8() {
882     return toString(Internal.UTF_8);
883   }
884 
885   /**
886    * Tells whether this {@code ByteString} represents a well-formed UTF-8 byte sequence, such that
887    * the original bytes can be converted to a String object and then round tripped back to bytes
888    * without loss.
889    *
890    * <p>More precisely, returns {@code true} whenever:
891    *
892    * <pre>{@code
893    * Arrays.equals(byteString.toByteArray(),
894    *     new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8"))
895    * }</pre>
896    *
897    * <p>This method returns {@code false} for "overlong" byte sequences, as well as for 3-byte
898    * sequences that would map to a surrogate character, in accordance with the restricted definition
899    * of UTF-8 introduced in Unicode 3.1. Note that the UTF-8 decoder included in Oracle's JDK has
900    * been modified to also reject "overlong" byte sequences, but (as of 2011) still accepts 3-byte
901    * surrogate character byte sequences.
902    *
903    * <p>See the Unicode Standard,<br>
904    * Table 3-6. <em>UTF-8 Bit Distribution</em>,<br>
905    * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
906    *
907    * @return whether the bytes in this {@code ByteString} are a well-formed UTF-8 byte sequence
908    */
isValidUtf8()909   public abstract boolean isValidUtf8();
910 
911   /**
912    * Tells whether the given byte sequence is a well-formed, malformed, or incomplete UTF-8 byte
913    * sequence. This method accepts and returns a partial state result, allowing the bytes for a
914    * complete UTF-8 byte sequence to be composed from multiple {@code ByteString} segments.
915    *
916    * @param state either {@code 0} (if this is the initial decoding operation) or the value returned
917    *     from a call to a partial decoding method for the previous bytes
918    * @param offset offset of the first byte to check
919    * @param length number of bytes to check
920    * @return {@code -1} if the partial byte sequence is definitely malformed, {@code 0} if it is
921    *     well-formed (no additional input needed), or, if the byte sequence is "incomplete", i.e.
922    *     apparently terminated in the middle of a character, an opaque integer "state" value
923    *     containing enough information to decode the character when passed to a subsequent
924    *     invocation of a partial decoding method.
925    */
partialIsValidUtf8(int state, int offset, int length)926   protected abstract int partialIsValidUtf8(int state, int offset, int length);
927 
928   // =================================================================
929   // equals() and hashCode()
930 
931   @Override
932   public abstract boolean equals(Object o);
933 
934   /** Base class for leaf {@link ByteString}s (i.e. non-ropes). */
935   abstract static class LeafByteString extends ByteString {
936     private static final long serialVersionUID = 1L;
937 
938     @Override
getTreeDepth()939     protected final int getTreeDepth() {
940       return 0;
941     }
942 
943     @Override
isBalanced()944     protected final boolean isBalanced() {
945       return true;
946     }
947 
948     @Override
writeToReverse(ByteOutput byteOutput)949     void writeToReverse(ByteOutput byteOutput) throws IOException {
950       writeTo(byteOutput);
951     }
952 
953     /**
954      * Check equality of the substring of given length of this object starting at zero with another
955      * {@code ByteString} substring starting at offset.
956      *
957      * @param other what to compare a substring in
958      * @param offset offset into other
959      * @param length number of bytes to compare
960      * @return true for equality of substrings, else false.
961      */
equalsRange(ByteString other, int offset, int length)962     abstract boolean equalsRange(ByteString other, int offset, int length);
963 
LeafByteString()964     private LeafByteString() {}
965   }
966 
967   /**
968    * Compute the hashCode using the traditional algorithm from {@link ByteString}.
969    *
970    * @return hashCode value
971    */
972   @Override
hashCode()973   public final int hashCode() {
974     int h = hash;
975 
976     if (h == 0) {
977       int size = size();
978       h = partialHash(size, 0, size);
979       if (h == 0) {
980         h = 1;
981       }
982       hash = h;
983     }
984     return h;
985   }
986 
987   // =================================================================
988   // Input stream
989 
990   /**
991    * Creates an {@code InputStream} which can be used to read the bytes.
992    *
993    * <p>The {@link InputStream} returned by this method is guaranteed to be completely non-blocking.
994    * The method {@link InputStream#available()} returns the number of bytes remaining in the stream.
995    * The methods {@link InputStream#read(byte[])}, {@link InputStream#read(byte[],int,int)} and
996    * {@link InputStream#skip(long)} will read/skip as many bytes as are available. The method {@link
997    * InputStream#markSupported()} returns {@code true}.
998    *
999    * <p>The methods in the returned {@link InputStream} might <b>not</b> be thread safe.
1000    *
1001    * @return an input stream that returns the bytes of this byte string.
1002    */
newInput()1003   public abstract InputStream newInput();
1004 
1005   /**
1006    * Creates a {@link CodedInputStream} which can be used to read the bytes. Using this is often
1007    * more efficient than creating a {@link CodedInputStream} that wraps the result of {@link
1008    * #newInput()}.
1009    *
1010    * @return stream based on wrapped data
1011    */
newCodedInput()1012   public abstract CodedInputStream newCodedInput();
1013 
1014   // =================================================================
1015   // Output stream
1016 
1017   /**
1018    * Creates a new {@link Output} with the given initial capacity. Call {@link
1019    * Output#toByteString()} to create the {@code ByteString} instance.
1020    *
1021    * <p>A {@link ByteString.Output} offers the same functionality as a {@link
1022    * ByteArrayOutputStream}, except that it returns a {@link ByteString} rather than a {@code byte}
1023    * array.
1024    *
1025    * @param initialCapacity estimate of number of bytes to be written
1026    * @return {@code OutputStream} for building a {@code ByteString}
1027    */
newOutput(int initialCapacity)1028   public static Output newOutput(int initialCapacity) {
1029     return new Output(initialCapacity);
1030   }
1031 
1032   /**
1033    * Creates a new {@link Output}. Call {@link Output#toByteString()} to create the {@code
1034    * ByteString} instance.
1035    *
1036    * <p>A {@link ByteString.Output} offers the same functionality as a {@link
1037    * ByteArrayOutputStream}, except that it returns a {@link ByteString} rather than a {@code byte
1038    * array}.
1039    *
1040    * @return {@code OutputStream} for building a {@code ByteString}
1041    */
newOutput()1042   public static Output newOutput() {
1043     return new Output(CONCATENATE_BY_COPY_SIZE);
1044   }
1045 
1046   /**
1047    * Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to create the {@code
1048    * ByteString} instance.
1049    */
1050   public static final class Output extends OutputStream {
1051     // Implementation note.
1052     // The public methods of this class must be synchronized.  ByteStrings
1053     // are guaranteed to be immutable.  Without some sort of locking, it could
1054     // be possible for one thread to call toByteString(), while another thread
1055     // is still modifying the underlying byte array.
1056 
1057     private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
1058     // argument passed by user, indicating initial capacity.
1059     private final int initialCapacity;
1060     // ByteStrings to be concatenated to create the result
1061     private final ArrayList<ByteString> flushedBuffers;
1062     // Total number of bytes in the ByteStrings of flushedBuffers
1063     private int flushedBuffersTotalBytes;
1064     // Current buffer to which we are writing
1065     private byte[] buffer;
1066     // Location in buffer[] to which we write the next byte.
1067     private int bufferPos;
1068 
1069     /**
1070      * Creates a new ByteString output stream with the specified initial capacity.
1071      *
1072      * @param initialCapacity the initial capacity of the output stream.
1073      */
Output(int initialCapacity)1074     Output(int initialCapacity) {
1075       if (initialCapacity < 0) {
1076         throw new IllegalArgumentException("Buffer size < 0");
1077       }
1078       this.initialCapacity = initialCapacity;
1079       this.flushedBuffers = new ArrayList<ByteString>();
1080       this.buffer = new byte[initialCapacity];
1081     }
1082 
1083     @Override
write(int b)1084     public synchronized void write(int b) {
1085       if (bufferPos == buffer.length) {
1086         flushFullBuffer(1);
1087       }
1088       buffer[bufferPos++] = (byte) b;
1089     }
1090 
1091     @Override
write(byte[] b, int offset, int length)1092     public synchronized void write(byte[] b, int offset, int length) {
1093       if (length <= buffer.length - bufferPos) {
1094         // The bytes can fit into the current buffer.
1095         System.arraycopy(b, offset, buffer, bufferPos, length);
1096         bufferPos += length;
1097       } else {
1098         // Use up the current buffer
1099         int copySize = buffer.length - bufferPos;
1100         System.arraycopy(b, offset, buffer, bufferPos, copySize);
1101         offset += copySize;
1102         length -= copySize;
1103         // Flush the buffer, and get a new buffer at least big enough to cover
1104         // what we still need to output
1105         flushFullBuffer(length);
1106         System.arraycopy(b, offset, buffer, /* count= */ 0, length);
1107         bufferPos = length;
1108       }
1109     }
1110 
1111     /**
1112      * Creates a byte string with the size and contents of this output stream. This does not create
1113      * a new copy of the underlying bytes. If the stream size grows dynamically, the runtime is
1114      * O(log n) in respect to the number of bytes written to the {@link Output}. If the stream size
1115      * stays within the initial capacity, the runtime is O(1).
1116      *
1117      * @return the current contents of this output stream, as a byte string.
1118      */
toByteString()1119     public synchronized ByteString toByteString() {
1120       flushLastBuffer();
1121       return ByteString.copyFrom(flushedBuffers);
1122     }
1123 
1124     /**
1125      * Writes the complete contents of this byte array output stream to the specified output stream
1126      * argument.
1127      *
1128      * @param out the output stream to which to write the data.
1129      * @throws IOException if an I/O error occurs.
1130      */
writeTo(OutputStream out)1131     public void writeTo(OutputStream out) throws IOException {
1132       ByteString[] cachedFlushBuffers;
1133       byte[] cachedBuffer;
1134       int cachedBufferPos;
1135       synchronized (this) {
1136         // Copy the information we need into local variables so as to hold
1137         // the lock for as short a time as possible.
1138         cachedFlushBuffers = flushedBuffers.toArray(new ByteString[0]);
1139         cachedBuffer = buffer;
1140         cachedBufferPos = bufferPos;
1141       }
1142       for (ByteString byteString : cachedFlushBuffers) {
1143         byteString.writeTo(out);
1144       }
1145 
1146       out.write(Arrays.copyOf(cachedBuffer, cachedBufferPos));
1147     }
1148 
1149     /**
1150      * Returns the current size of the output stream.
1151      *
1152      * @return the current size of the output stream
1153      */
size()1154     public synchronized int size() {
1155       return flushedBuffersTotalBytes + bufferPos;
1156     }
1157 
1158     /**
1159      * Resets this stream, so that all currently accumulated output in the output stream is
1160      * discarded. The output stream can be used again, reusing the already allocated buffer space.
1161      */
reset()1162     public synchronized void reset() {
1163       flushedBuffers.clear();
1164       flushedBuffersTotalBytes = 0;
1165       bufferPos = 0;
1166     }
1167 
1168     @Override
toString()1169     public String toString() {
1170       return String.format(
1171           "<ByteString.Output@%s size=%d>",
1172           Integer.toHexString(System.identityHashCode(this)), size());
1173     }
1174 
1175     /**
1176      * Internal function used by writers. The current buffer is full, and the writer needs a new
1177      * buffer whose size is at least the specified minimum size.
1178      */
flushFullBuffer(int minSize)1179     private void flushFullBuffer(int minSize) {
1180       flushedBuffers.add(new LiteralByteString(buffer));
1181       flushedBuffersTotalBytes += buffer.length;
1182       // We want to increase our total capacity by 50%, but as a minimum,
1183       // the new buffer should also at least be >= minSize and
1184       // >= initial Capacity.
1185       int newSize = Math.max(initialCapacity, Math.max(minSize, flushedBuffersTotalBytes >>> 1));
1186       buffer = new byte[newSize];
1187       bufferPos = 0;
1188     }
1189 
1190     /**
1191      * Internal function used by {@link #toByteString()}. The current buffer may or may not be full,
1192      * but it needs to be flushed.
1193      */
flushLastBuffer()1194     private void flushLastBuffer() {
1195       if (bufferPos < buffer.length) {
1196         if (bufferPos > 0) {
1197           byte[] bufferCopy = Arrays.copyOf(buffer, bufferPos);
1198           flushedBuffers.add(new LiteralByteString(bufferCopy));
1199         }
1200         // We reuse this buffer for further writes.
1201       } else {
1202         // Buffer is completely full.  Huzzah.
1203         flushedBuffers.add(new LiteralByteString(buffer));
1204         // 99% of the time, we're not going to use this OutputStream again.
1205         // We set buffer to an empty byte stream so that we're handling this
1206         // case without wasting space.  In the rare case that more writes
1207         // *do* occur, this empty buffer will be flushed and an appropriately
1208         // sized new buffer will be created.
1209         buffer = EMPTY_BYTE_ARRAY;
1210       }
1211       flushedBuffersTotalBytes += bufferPos;
1212       bufferPos = 0;
1213     }
1214   }
1215 
1216   /**
1217    * Constructs a new {@code ByteString} builder, which allows you to efficiently construct a {@code
1218    * ByteString} by writing to a {@link CodedOutputStream}. Using this is much more efficient than
1219    * calling {@code newOutput()} and wrapping that in a {@code CodedOutputStream}.
1220    *
1221    * <p>This is package-private because it's a somewhat confusing interface. Users can call {@link
1222    * Message#toByteString()} instead of calling this directly.
1223    *
1224    * @param size The target byte size of the {@code ByteString}. You must write exactly this many
1225    *     bytes before building the result.
1226    * @return the builder
1227    */
newCodedBuilder(int size)1228   static CodedBuilder newCodedBuilder(int size) {
1229     return new CodedBuilder(size);
1230   }
1231 
1232   /** See {@link ByteString#newCodedBuilder(int)}. */
1233   static final class CodedBuilder {
1234     private final CodedOutputStream output;
1235     private final byte[] buffer;
1236 
CodedBuilder(int size)1237     private CodedBuilder(int size) {
1238       buffer = new byte[size];
1239       output = CodedOutputStream.newInstance(buffer);
1240     }
1241 
build()1242     public ByteString build() {
1243       output.checkNoSpaceLeft();
1244 
1245       // We can be confident that the CodedOutputStream will not modify the
1246       // underlying bytes anymore because it already wrote all of them.  So,
1247       // no need to make a copy.
1248       return new LiteralByteString(buffer);
1249     }
1250 
getCodedOutput()1251     public CodedOutputStream getCodedOutput() {
1252       return output;
1253     }
1254   }
1255 
1256   // =================================================================
1257   // Methods {@link RopeByteString} needs on instances, which aren't part of the
1258   // public API.
1259 
1260   /**
1261    * Return the depth of the tree representing this {@code ByteString}, if any, whose root is this
1262    * node. If this is a leaf node, return 0.
1263    *
1264    * @return tree depth or zero
1265    */
getTreeDepth()1266   protected abstract int getTreeDepth();
1267 
1268   /**
1269    * Return {@code true} if this ByteString is literal (a leaf node) or a flat-enough tree in the
1270    * sense of {@link RopeByteString}.
1271    *
1272    * @return true if the tree is flat enough
1273    */
isBalanced()1274   protected abstract boolean isBalanced();
1275 
1276   /**
1277    * Return the cached hash code if available.
1278    *
1279    * @return value of cached hash code or 0 if not computed yet
1280    */
peekCachedHashCode()1281   protected final int peekCachedHashCode() {
1282     return hash;
1283   }
1284 
1285   /**
1286    * Compute the hash across the value bytes starting with the given hash, and return the result.
1287    * This is used to compute the hash across strings represented as a set of pieces by allowing the
1288    * hash computation to be continued from piece to piece.
1289    *
1290    * @param h starting hash value
1291    * @param offset offset into this value to start looking at data values
1292    * @param length number of data values to include in the hash computation
1293    * @return ending hash value
1294    */
partialHash(int h, int offset, int length)1295   protected abstract int partialHash(int h, int offset, int length);
1296 
1297   /**
1298    * Checks that the given index falls within the specified array size.
1299    *
1300    * @param index the index position to be tested
1301    * @param size the length of the array
1302    * @throws IndexOutOfBoundsException if the index does not fall within the array.
1303    */
checkIndex(int index, int size)1304   static void checkIndex(int index, int size) {
1305     if ((index | (size - (index + 1))) < 0) {
1306       if (index < 0) {
1307         throw new ArrayIndexOutOfBoundsException("Index < 0: " + index);
1308       }
1309       throw new ArrayIndexOutOfBoundsException("Index > length: " + index + ", " + size);
1310     }
1311   }
1312 
1313   /**
1314    * Checks that the given range falls within the bounds of an array
1315    *
1316    * @param startIndex the start index of the range (inclusive)
1317    * @param endIndex the end index of the range (exclusive)
1318    * @param size the size of the array.
1319    * @return the length of the range.
1320    * @throws IndexOutOfBoundsException some or all of the range falls outside of the array.
1321    */
1322   @CanIgnoreReturnValue
checkRange(int startIndex, int endIndex, int size)1323   static int checkRange(int startIndex, int endIndex, int size) {
1324     final int length = endIndex - startIndex;
1325     if ((startIndex | endIndex | length | (size - endIndex)) < 0) {
1326       if (startIndex < 0) {
1327         throw new IndexOutOfBoundsException("Beginning index: " + startIndex + " < 0");
1328       }
1329       if (endIndex < startIndex) {
1330         throw new IndexOutOfBoundsException(
1331             "Beginning index larger than ending index: " + startIndex + ", " + endIndex);
1332       }
1333       // endIndex >= size
1334       throw new IndexOutOfBoundsException("End index: " + endIndex + " >= " + size);
1335     }
1336     return length;
1337   }
1338 
1339   @Override
toString()1340   public final String toString() {
1341     return String.format(
1342         Locale.ROOT,
1343         "<ByteString@%s size=%d contents=\"%s\">",
1344         toHexString(identityHashCode(this)),
1345         size(),
1346         truncateAndEscapeForDisplay());
1347   }
1348 
truncateAndEscapeForDisplay()1349   private String truncateAndEscapeForDisplay() {
1350     final int limit = 50;
1351 
1352     return size() <= limit ? escapeBytes(this) : escapeBytes(substring(0, limit - 3)) + "...";
1353   }
1354 
1355   /**
1356    * This class implements a {@link com.google.protobuf.ByteString} backed by a single array of
1357    * bytes, contiguous in memory. It supports substring by pointing to only a sub-range of the
1358    * underlying byte array, meaning that a substring will reference the full byte-array of the
1359    * string it's made from, exactly as with {@link String}.
1360    *
1361    * @author carlanton@google.com (Carl Haverl)
1362    */
1363   // Keep this class private to avoid deadlocks in classloading across threads as ByteString's
1364   // static initializer loads LiteralByteString and another thread loads LiteralByteString.
1365   private static class LiteralByteString extends ByteString.LeafByteString {
1366     private static final long serialVersionUID = 1L;
1367 
1368     protected final byte[] bytes;
1369 
1370     /**
1371      * Creates a {@code LiteralByteString} backed by the given array, without copying.
1372      *
1373      * @param bytes array to wrap
1374      */
LiteralByteString(byte[] bytes)1375     LiteralByteString(byte[] bytes) {
1376       if (bytes == null) {
1377         throw new NullPointerException();
1378       }
1379       this.bytes = bytes;
1380     }
1381 
1382     @Override
byteAt(int index)1383     public byte byteAt(int index) {
1384       // Unlike most methods in this class, this one is a direct implementation
1385       // ignoring the potential offset because we need to do range-checking in the
1386       // substring case anyway.
1387       return bytes[index];
1388     }
1389 
1390     @Override
internalByteAt(int index)1391     byte internalByteAt(int index) {
1392       return bytes[index];
1393     }
1394 
1395     @Override
size()1396     public int size() {
1397       return bytes.length;
1398     }
1399 
1400     // =================================================================
1401     // ByteString -> substring
1402 
1403     @Override
substring(int beginIndex, int endIndex)1404     public final ByteString substring(int beginIndex, int endIndex) {
1405       final int length = checkRange(beginIndex, endIndex, size());
1406 
1407       if (length == 0) {
1408         return ByteString.EMPTY;
1409       }
1410 
1411       return new BoundedByteString(bytes, getOffsetIntoBytes() + beginIndex, length);
1412     }
1413 
1414     // =================================================================
1415     // ByteString -> byte[]
1416 
1417     @Override
copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)1418     protected void copyToInternal(
1419         byte[] target, int sourceOffset, int targetOffset, int numberToCopy) {
1420       // Optimized form, not for subclasses, since we don't call
1421       // getOffsetIntoBytes() or check the 'numberToCopy' parameter.
1422       // TODO: Is not calling getOffsetIntoBytes really saving that much?
1423       System.arraycopy(bytes, sourceOffset, target, targetOffset, numberToCopy);
1424     }
1425 
1426     @Override
copyTo(ByteBuffer target)1427     public final void copyTo(ByteBuffer target) {
1428       target.put(bytes, getOffsetIntoBytes(), size()); // Copies bytes
1429     }
1430 
1431     @Override
asReadOnlyByteBuffer()1432     public final ByteBuffer asReadOnlyByteBuffer() {
1433       return ByteBuffer.wrap(bytes, getOffsetIntoBytes(), size()).asReadOnlyBuffer();
1434     }
1435 
1436     @Override
asReadOnlyByteBufferList()1437     public final List<ByteBuffer> asReadOnlyByteBufferList() {
1438       return Collections.singletonList(asReadOnlyByteBuffer());
1439     }
1440 
1441     @Override
writeTo(OutputStream outputStream)1442     public final void writeTo(OutputStream outputStream) throws IOException {
1443       outputStream.write(toByteArray());
1444     }
1445 
1446     @Override
writeToInternal(OutputStream outputStream, int sourceOffset, int numberToWrite)1447     final void writeToInternal(OutputStream outputStream, int sourceOffset, int numberToWrite)
1448         throws IOException {
1449       outputStream.write(bytes, getOffsetIntoBytes() + sourceOffset, numberToWrite);
1450     }
1451 
1452     @Override
writeTo(ByteOutput output)1453     final void writeTo(ByteOutput output) throws IOException {
1454       output.writeLazy(bytes, getOffsetIntoBytes(), size());
1455     }
1456 
1457     @Override
toStringInternal(Charset charset)1458     protected final String toStringInternal(Charset charset) {
1459       return new String(bytes, getOffsetIntoBytes(), size(), charset);
1460     }
1461 
1462     // =================================================================
1463     // UTF-8 decoding
1464 
1465     @Override
isValidUtf8()1466     public final boolean isValidUtf8() {
1467       int offset = getOffsetIntoBytes();
1468       return Utf8.isValidUtf8(bytes, offset, offset + size());
1469     }
1470 
1471     @Override
partialIsValidUtf8(int state, int offset, int length)1472     protected final int partialIsValidUtf8(int state, int offset, int length) {
1473       int index = getOffsetIntoBytes() + offset;
1474       return Utf8.partialIsValidUtf8(state, bytes, index, index + length);
1475     }
1476 
1477     // =================================================================
1478     // equals() and hashCode()
1479 
1480     @Override
equals(Object other)1481     public final boolean equals(Object other) {
1482       if (other == this) {
1483         return true;
1484       }
1485       if (!(other instanceof ByteString)) {
1486         return false;
1487       }
1488 
1489       if (size() != ((ByteString) other).size()) {
1490         return false;
1491       }
1492       if (size() == 0) {
1493         return true;
1494       }
1495 
1496       if (other instanceof LiteralByteString) {
1497         LiteralByteString otherAsLiteral = (LiteralByteString) other;
1498         // If we know the hash codes and they are not equal, we know the byte
1499         // strings are not equal.
1500         int thisHash = peekCachedHashCode();
1501         int thatHash = otherAsLiteral.peekCachedHashCode();
1502         if (thisHash != 0 && thatHash != 0 && thisHash != thatHash) {
1503           return false;
1504         }
1505 
1506         return equalsRange((LiteralByteString) other, 0, size());
1507       } else {
1508         // RopeByteString and NioByteString.
1509         return other.equals(this);
1510       }
1511     }
1512 
1513     /**
1514      * Check equality of the substring of given length of this object starting at zero with another
1515      * {@code LiteralByteString} substring starting at offset.
1516      *
1517      * @param other what to compare a substring in
1518      * @param offset offset into other
1519      * @param length number of bytes to compare
1520      * @return true for equality of substrings, else false.
1521      */
1522     @Override
equalsRange(ByteString other, int offset, int length)1523     final boolean equalsRange(ByteString other, int offset, int length) {
1524       if (length > other.size()) {
1525         throw new IllegalArgumentException("Length too large: " + length + size());
1526       }
1527       if (offset + length > other.size()) {
1528         throw new IllegalArgumentException(
1529             "Ran off end of other: " + offset + ", " + length + ", " + other.size());
1530       }
1531 
1532       if (other instanceof LiteralByteString) {
1533         LiteralByteString lbsOther = (LiteralByteString) other;
1534         byte[] thisBytes = bytes;
1535         byte[] otherBytes = lbsOther.bytes;
1536         int thisLimit = getOffsetIntoBytes() + length;
1537         for (int thisIndex = getOffsetIntoBytes(),
1538                 otherIndex = lbsOther.getOffsetIntoBytes() + offset;
1539             (thisIndex < thisLimit);
1540             ++thisIndex, ++otherIndex) {
1541           if (thisBytes[thisIndex] != otherBytes[otherIndex]) {
1542             return false;
1543           }
1544         }
1545         return true;
1546       }
1547 
1548       return other.substring(offset, offset + length).equals(substring(0, length));
1549     }
1550 
1551     @Override
partialHash(int h, int offset, int length)1552     protected final int partialHash(int h, int offset, int length) {
1553       return Internal.partialHash(h, bytes, getOffsetIntoBytes() + offset, length);
1554     }
1555 
1556     // =================================================================
1557     // Input stream
1558 
1559     @Override
newInput()1560     public final InputStream newInput() {
1561       return new ByteArrayInputStream(bytes, getOffsetIntoBytes(), size()); // No copy
1562     }
1563 
1564     @Override
newCodedInput()1565     public final CodedInputStream newCodedInput() {
1566       // We trust CodedInputStream not to modify the bytes, or to give anyone
1567       // else access to them.
1568       return CodedInputStream.newInstance(
1569           bytes, getOffsetIntoBytes(), size(), /* bufferIsImmutable= */ true);
1570     }
1571 
1572     // =================================================================
1573     // Internal methods
1574 
1575     /**
1576      * Offset into {@code bytes[]} to use, non-zero for substrings.
1577      *
1578      * @return always 0 for this class
1579      */
getOffsetIntoBytes()1580     protected int getOffsetIntoBytes() {
1581       return 0;
1582     }
1583   }
1584 
1585   /**
1586    * This class is used to represent the substring of a {@link ByteString} over a single byte array.
1587    * In terms of the public API of {@link ByteString}, you end up here by calling {@link
1588    * ByteString#copyFrom(byte[])} followed by {@link ByteString#substring(int, int)}.
1589    *
1590    * <p>This class contains most of the overhead involved in creating a substring from a {@link
1591    * LiteralByteString}. The overhead involves some range-checking and two extra fields.
1592    *
1593    * @author carlanton@google.com (Carl Haverl)
1594    */
1595   // Keep this class private to avoid deadlocks in classloading across threads as ByteString's
1596   // static initializer loads LiteralByteString and another thread loads BoundedByteString.
1597   private static final class BoundedByteString extends LiteralByteString {
1598     private final int bytesOffset;
1599     private final int bytesLength;
1600 
1601     /**
1602      * Creates a {@code BoundedByteString} backed by the sub-range of given array, without copying.
1603      *
1604      * @param bytes array to wrap
1605      * @param offset index to first byte to use in bytes
1606      * @param length number of bytes to use from bytes
1607      * @throws IllegalArgumentException if {@code offset < 0}, {@code length < 0}, or if {@code
1608      *     offset + length > bytes.length}.
1609      */
BoundedByteString(byte[] bytes, int offset, int length)1610     BoundedByteString(byte[] bytes, int offset, int length) {
1611       super(bytes);
1612       checkRange(offset, offset + length, bytes.length);
1613 
1614       this.bytesOffset = offset;
1615       this.bytesLength = length;
1616     }
1617 
1618     /**
1619      * Gets the byte at the given index. Throws {@link ArrayIndexOutOfBoundsException} for
1620      * backwards-compatibility reasons although it would more properly be {@link
1621      * IndexOutOfBoundsException}.
1622      *
1623      * @param index index of byte
1624      * @return the value
1625      * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
1626      */
1627     @Override
byteAt(int index)1628     public byte byteAt(int index) {
1629       // We must check the index ourselves as we cannot rely on Java array index
1630       // checking for substrings.
1631       checkIndex(index, size());
1632       return bytes[bytesOffset + index];
1633     }
1634 
1635     @Override
internalByteAt(int index)1636     byte internalByteAt(int index) {
1637       return bytes[bytesOffset + index];
1638     }
1639 
1640     @Override
size()1641     public int size() {
1642       return bytesLength;
1643     }
1644 
1645     @Override
getOffsetIntoBytes()1646     protected int getOffsetIntoBytes() {
1647       return bytesOffset;
1648     }
1649 
1650     // =================================================================
1651     // ByteString -> byte[]
1652 
1653     @Override
copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)1654     protected void copyToInternal(
1655         byte[] target, int sourceOffset, int targetOffset, int numberToCopy) {
1656       System.arraycopy(
1657           bytes, getOffsetIntoBytes() + sourceOffset, target, targetOffset, numberToCopy);
1658     }
1659 
1660     // =================================================================
1661     // Serializable
1662 
1663     private static final long serialVersionUID = 1L;
1664 
writeReplace()1665     Object writeReplace() {
1666       return ByteString.wrap(toByteArray());
1667     }
1668 
readObject(@uppressWarnings"unused") ObjectInputStream in)1669     private void readObject(@SuppressWarnings("unused") ObjectInputStream in) throws IOException {
1670       throw new InvalidObjectException(
1671           "BoundedByteStream instances are not to be serialized directly");
1672     }
1673   }
1674 
1675   /** A {@link ByteString} that wraps around a {@link ByteBuffer}. */
1676   // Keep this class private to avoid deadlocks in classloading across threads as ByteString's
1677   // static initializer loads LiteralByteString and another thread loads BoundedByteString.
1678   private static final class NioByteString extends ByteString.LeafByteString {
1679     private final ByteBuffer buffer;
1680 
NioByteString(ByteBuffer buffer)1681     NioByteString(ByteBuffer buffer) {
1682       checkNotNull(buffer, "buffer");
1683 
1684       // Use native byte order for fast fixed32/64 operations.
1685       this.buffer = buffer.slice().order(ByteOrder.nativeOrder());
1686     }
1687 
1688     // =================================================================
1689     // Serializable
1690 
1691     /** Magic method that lets us override serialization behavior. */
writeReplace()1692     private Object writeReplace() {
1693       return ByteString.copyFrom(buffer.slice());
1694     }
1695 
1696     /** Magic method that lets us override deserialization behavior. */
readObject(@uppressWarnings"unused") ObjectInputStream in)1697     private void readObject(@SuppressWarnings("unused") ObjectInputStream in) throws IOException {
1698       throw new InvalidObjectException("NioByteString instances are not to be serialized directly");
1699     }
1700 
1701     // =================================================================
1702 
1703     @Override
byteAt(int index)1704     public byte byteAt(int index) {
1705       try {
1706         return buffer.get(index);
1707       } catch (ArrayIndexOutOfBoundsException e) {
1708         throw e;
1709       } catch (IndexOutOfBoundsException e) {
1710         throw new ArrayIndexOutOfBoundsException(e.getMessage());
1711       }
1712     }
1713 
1714     @Override
internalByteAt(int index)1715     public byte internalByteAt(int index) {
1716       // it isn't possible to avoid the bounds checking inside of ByteBuffer, so just use the
1717       // default
1718       // implementation.
1719       return byteAt(index);
1720     }
1721 
1722     @Override
size()1723     public int size() {
1724       return buffer.remaining();
1725     }
1726 
1727     @Override
substring(int beginIndex, int endIndex)1728     public ByteString substring(int beginIndex, int endIndex) {
1729       try {
1730         ByteBuffer slice = slice(beginIndex, endIndex);
1731         return new NioByteString(slice);
1732       } catch (ArrayIndexOutOfBoundsException e) {
1733         throw e;
1734       } catch (IndexOutOfBoundsException e) {
1735         throw new ArrayIndexOutOfBoundsException(e.getMessage());
1736       }
1737     }
1738 
1739     @Override
copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)1740     protected void copyToInternal(
1741         byte[] target, int sourceOffset, int targetOffset, int numberToCopy) {
1742       ByteBuffer slice = buffer.slice();
1743       Java8Compatibility.position(slice, sourceOffset);
1744       slice.get(target, targetOffset, numberToCopy);
1745     }
1746 
1747     @Override
copyTo(ByteBuffer target)1748     public void copyTo(ByteBuffer target) {
1749       target.put(buffer.slice());
1750     }
1751 
1752     @Override
writeTo(OutputStream out)1753     public void writeTo(OutputStream out) throws IOException {
1754       out.write(toByteArray());
1755     }
1756 
1757     @Override
equalsRange(ByteString other, int offset, int length)1758     boolean equalsRange(ByteString other, int offset, int length) {
1759       return substring(0, length).equals(other.substring(offset, offset + length));
1760     }
1761 
1762     @Override
writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)1763     void writeToInternal(OutputStream out, int sourceOffset, int numberToWrite) throws IOException {
1764       if (buffer.hasArray()) {
1765         // Optimized write for array-backed buffers.
1766         // Note that we're taking the risk that a malicious OutputStream could modify the array.
1767         int bufferOffset = buffer.arrayOffset() + buffer.position() + sourceOffset;
1768         out.write(buffer.array(), bufferOffset, numberToWrite);
1769         return;
1770       }
1771 
1772       ByteBufferWriter.write(slice(sourceOffset, sourceOffset + numberToWrite), out);
1773     }
1774 
1775     @Override
writeTo(ByteOutput output)1776     void writeTo(ByteOutput output) throws IOException {
1777       output.writeLazy(buffer.slice());
1778     }
1779 
1780     @Override
asReadOnlyByteBuffer()1781     public ByteBuffer asReadOnlyByteBuffer() {
1782       return buffer.asReadOnlyBuffer();
1783     }
1784 
1785     @Override
asReadOnlyByteBufferList()1786     public List<ByteBuffer> asReadOnlyByteBufferList() {
1787       return Collections.singletonList(asReadOnlyByteBuffer());
1788     }
1789 
1790     @Override
toStringInternal(Charset charset)1791     protected String toStringInternal(Charset charset) {
1792       final byte[] bytes;
1793       final int offset;
1794       final int length;
1795       if (buffer.hasArray()) {
1796         bytes = buffer.array();
1797         offset = buffer.arrayOffset() + buffer.position();
1798         length = buffer.remaining();
1799       } else {
1800         // TODO: Can we optimize this?
1801         bytes = toByteArray();
1802         offset = 0;
1803         length = bytes.length;
1804       }
1805       return new String(bytes, offset, length, charset);
1806     }
1807 
1808     @Override
isValidUtf8()1809     public boolean isValidUtf8() {
1810       return Utf8.isValidUtf8(buffer);
1811     }
1812 
1813     @Override
partialIsValidUtf8(int state, int offset, int length)1814     protected int partialIsValidUtf8(int state, int offset, int length) {
1815       return Utf8.partialIsValidUtf8(state, buffer, offset, offset + length);
1816     }
1817 
1818     @Override
equals(Object other)1819     public boolean equals(Object other) {
1820       if (other == this) {
1821         return true;
1822       }
1823       if (!(other instanceof ByteString)) {
1824         return false;
1825       }
1826       ByteString otherString = ((ByteString) other);
1827       if (size() != otherString.size()) {
1828         return false;
1829       }
1830       if (size() == 0) {
1831         return true;
1832       }
1833       if (other instanceof NioByteString) {
1834         return buffer.equals(((NioByteString) other).buffer);
1835       }
1836       if (other instanceof RopeByteString) {
1837         return other.equals(this);
1838       }
1839       return buffer.equals(otherString.asReadOnlyByteBuffer());
1840     }
1841 
1842     @Override
partialHash(int h, int offset, int length)1843     protected int partialHash(int h, int offset, int length) {
1844       for (int i = offset; i < offset + length; i++) {
1845         h = h * 31 + buffer.get(i);
1846       }
1847       return h;
1848     }
1849 
1850     @Override
newInput()1851     public InputStream newInput() {
1852       return new InputStream() {
1853         private final ByteBuffer buf = buffer.slice();
1854 
1855         @Override
1856         public void mark(int readlimit) {
1857           Java8Compatibility.mark(buf);
1858         }
1859 
1860         @Override
1861         public boolean markSupported() {
1862           return true;
1863         }
1864 
1865         @Override
1866         public void reset() throws IOException {
1867           try {
1868             Java8Compatibility.reset(buf);
1869           } catch (InvalidMarkException e) {
1870             throw new IOException(e);
1871           }
1872         }
1873 
1874         @Override
1875         public int available() throws IOException {
1876           return buf.remaining();
1877         }
1878 
1879         @Override
1880         public int read() throws IOException {
1881           if (!buf.hasRemaining()) {
1882             return -1;
1883           }
1884           return buf.get() & 0xFF;
1885         }
1886 
1887         @Override
1888         public int read(byte[] bytes, int off, int len) throws IOException {
1889           if (!buf.hasRemaining()) {
1890             return -1;
1891           }
1892 
1893           len = Math.min(len, buf.remaining());
1894           buf.get(bytes, off, len);
1895           return len;
1896         }
1897       };
1898     }
1899 
1900     @Override
newCodedInput()1901     public CodedInputStream newCodedInput() {
1902       return CodedInputStream.newInstance(buffer, true);
1903     }
1904 
1905     /**
1906      * Creates a slice of a range of this buffer.
1907      *
1908      * @param beginIndex the beginning index of the slice (inclusive).
1909      * @param endIndex the end index of the slice (exclusive).
1910      * @return the requested slice.
1911      */
slice(int beginIndex, int endIndex)1912     private ByteBuffer slice(int beginIndex, int endIndex) {
1913       if (beginIndex < buffer.position() || endIndex > buffer.limit() || beginIndex > endIndex) {
1914         throw new IllegalArgumentException(
1915             String.format("Invalid indices [%d, %d]", beginIndex, endIndex));
1916       }
1917 
1918       ByteBuffer slice = buffer.slice();
1919       Java8Compatibility.position(slice, beginIndex - buffer.position());
1920       Java8Compatibility.limit(slice, endIndex - buffer.position());
1921       return slice;
1922     }
1923   }
1924 }
1925