1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2013 Google Inc. All rights reserved. 3 // http://code.google.com/p/protobuf/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf.nano; 32 33 import java.io.IOException; 34 import java.nio.BufferOverflowException; 35 import java.nio.ByteBuffer; 36 import java.nio.ByteOrder; 37 import java.nio.ReadOnlyBufferException; 38 39 /** 40 * Encodes and writes protocol message fields. 41 * 42 * <p>This class contains two kinds of methods: methods that write specific 43 * protocol message constructs and field types (e.g. {@link #writeTag} and 44 * {@link #writeInt32}) and methods that write low-level values (e.g. 45 * {@link #writeRawVarint32} and {@link #writeRawBytes}). If you are 46 * writing encoded protocol messages, you should use the former methods, but if 47 * you are writing some other format of your own design, use the latter. 48 * 49 * <p>This class is totally unsynchronized. 50 * 51 * @author kneton@google.com Kenton Varda 52 */ 53 public final class CodedOutputByteBufferNano { 54 /* max bytes per java UTF-16 char in UTF-8 */ 55 private static final int MAX_UTF8_EXPANSION = 3; 56 private final ByteBuffer buffer; 57 CodedOutputByteBufferNano(final byte[] buffer, final int offset, final int length)58 private CodedOutputByteBufferNano(final byte[] buffer, final int offset, 59 final int length) { 60 this(ByteBuffer.wrap(buffer, offset, length)); 61 } 62 CodedOutputByteBufferNano(final ByteBuffer buffer)63 private CodedOutputByteBufferNano(final ByteBuffer buffer) { 64 this.buffer = buffer; 65 this.buffer.order(ByteOrder.LITTLE_ENDIAN); 66 } 67 68 /** 69 * Create a new {@code CodedOutputStream} that writes directly to the given 70 * byte array. If more bytes are written than fit in the array, 71 * {@link OutOfSpaceException} will be thrown. Writing directly to a flat 72 * array is faster than writing to an {@code OutputStream}. 73 */ newInstance(final byte[] flatArray)74 public static CodedOutputByteBufferNano newInstance(final byte[] flatArray) { 75 return newInstance(flatArray, 0, flatArray.length); 76 } 77 78 /** 79 * Create a new {@code CodedOutputStream} that writes directly to the given 80 * byte array slice. If more bytes are written than fit in the slice, 81 * {@link OutOfSpaceException} will be thrown. Writing directly to a flat 82 * array is faster than writing to an {@code OutputStream}. 83 */ newInstance(final byte[] flatArray, final int offset, final int length)84 public static CodedOutputByteBufferNano newInstance(final byte[] flatArray, 85 final int offset, 86 final int length) { 87 return new CodedOutputByteBufferNano(flatArray, offset, length); 88 } 89 90 // ----------------------------------------------------------------- 91 92 /** Write a {@code double} field, including tag, to the stream. */ writeDouble(final int fieldNumber, final double value)93 public void writeDouble(final int fieldNumber, final double value) 94 throws IOException { 95 writeTag(fieldNumber, WireFormatNano.WIRETYPE_FIXED64); 96 writeDoubleNoTag(value); 97 } 98 99 /** Write a {@code float} field, including tag, to the stream. */ writeFloat(final int fieldNumber, final float value)100 public void writeFloat(final int fieldNumber, final float value) 101 throws IOException { 102 writeTag(fieldNumber, WireFormatNano.WIRETYPE_FIXED32); 103 writeFloatNoTag(value); 104 } 105 106 /** Write a {@code uint64} field, including tag, to the stream. */ writeUInt64(final int fieldNumber, final long value)107 public void writeUInt64(final int fieldNumber, final long value) 108 throws IOException { 109 writeTag(fieldNumber, WireFormatNano.WIRETYPE_VARINT); 110 writeUInt64NoTag(value); 111 } 112 113 /** Write an {@code int64} field, including tag, to the stream. */ writeInt64(final int fieldNumber, final long value)114 public void writeInt64(final int fieldNumber, final long value) 115 throws IOException { 116 writeTag(fieldNumber, WireFormatNano.WIRETYPE_VARINT); 117 writeInt64NoTag(value); 118 } 119 120 /** Write an {@code int32} field, including tag, to the stream. */ writeInt32(final int fieldNumber, final int value)121 public void writeInt32(final int fieldNumber, final int value) 122 throws IOException { 123 writeTag(fieldNumber, WireFormatNano.WIRETYPE_VARINT); 124 writeInt32NoTag(value); 125 } 126 127 /** Write a {@code fixed64} field, including tag, to the stream. */ writeFixed64(final int fieldNumber, final long value)128 public void writeFixed64(final int fieldNumber, final long value) 129 throws IOException { 130 writeTag(fieldNumber, WireFormatNano.WIRETYPE_FIXED64); 131 writeFixed64NoTag(value); 132 } 133 134 /** Write a {@code fixed32} field, including tag, to the stream. */ writeFixed32(final int fieldNumber, final int value)135 public void writeFixed32(final int fieldNumber, final int value) 136 throws IOException { 137 writeTag(fieldNumber, WireFormatNano.WIRETYPE_FIXED32); 138 writeFixed32NoTag(value); 139 } 140 141 /** Write a {@code bool} field, including tag, to the stream. */ writeBool(final int fieldNumber, final boolean value)142 public void writeBool(final int fieldNumber, final boolean value) 143 throws IOException { 144 writeTag(fieldNumber, WireFormatNano.WIRETYPE_VARINT); 145 writeBoolNoTag(value); 146 } 147 148 /** Write a {@code string} field, including tag, to the stream. */ writeString(final int fieldNumber, final String value)149 public void writeString(final int fieldNumber, final String value) 150 throws IOException { 151 writeTag(fieldNumber, WireFormatNano.WIRETYPE_LENGTH_DELIMITED); 152 writeStringNoTag(value); 153 } 154 155 /** Write a {@code group} field, including tag, to the stream. */ writeGroup(final int fieldNumber, final MessageNano value)156 public void writeGroup(final int fieldNumber, final MessageNano value) 157 throws IOException { 158 writeTag(fieldNumber, WireFormatNano.WIRETYPE_START_GROUP); 159 writeGroupNoTag(value); 160 writeTag(fieldNumber, WireFormatNano.WIRETYPE_END_GROUP); 161 } 162 163 /** Write an embedded message field, including tag, to the stream. */ writeMessage(final int fieldNumber, final MessageNano value)164 public void writeMessage(final int fieldNumber, final MessageNano value) 165 throws IOException { 166 writeTag(fieldNumber, WireFormatNano.WIRETYPE_LENGTH_DELIMITED); 167 writeMessageNoTag(value); 168 } 169 170 /** Write a {@code bytes} field, including tag, to the stream. */ writeBytes(final int fieldNumber, final byte[] value)171 public void writeBytes(final int fieldNumber, final byte[] value) 172 throws IOException { 173 writeTag(fieldNumber, WireFormatNano.WIRETYPE_LENGTH_DELIMITED); 174 writeBytesNoTag(value); 175 } 176 177 /** Write a {@code bytes} field, including tag, to the stream. */ writeBytes(final int fieldNumber, final byte[] value, final int offset, final int length)178 public void writeBytes(final int fieldNumber, final byte[] value, 179 final int offset, final int length) 180 throws IOException { 181 writeTag(fieldNumber, WireFormatNano.WIRETYPE_LENGTH_DELIMITED); 182 writeBytesNoTag(value, offset, length); 183 } 184 185 /** Write a {@code uint32} field, including tag, to the stream. */ writeUInt32(final int fieldNumber, final int value)186 public void writeUInt32(final int fieldNumber, final int value) 187 throws IOException { 188 writeTag(fieldNumber, WireFormatNano.WIRETYPE_VARINT); 189 writeUInt32NoTag(value); 190 } 191 192 /** 193 * Write an enum field, including tag, to the stream. Caller is responsible 194 * for converting the enum value to its numeric value. 195 */ writeEnum(final int fieldNumber, final int value)196 public void writeEnum(final int fieldNumber, final int value) 197 throws IOException { 198 writeTag(fieldNumber, WireFormatNano.WIRETYPE_VARINT); 199 writeEnumNoTag(value); 200 } 201 202 /** Write an {@code sfixed32} field, including tag, to the stream. */ writeSFixed32(final int fieldNumber, final int value)203 public void writeSFixed32(final int fieldNumber, final int value) 204 throws IOException { 205 writeTag(fieldNumber, WireFormatNano.WIRETYPE_FIXED32); 206 writeSFixed32NoTag(value); 207 } 208 209 /** Write an {@code sfixed64} field, including tag, to the stream. */ writeSFixed64(final int fieldNumber, final long value)210 public void writeSFixed64(final int fieldNumber, final long value) 211 throws IOException { 212 writeTag(fieldNumber, WireFormatNano.WIRETYPE_FIXED64); 213 writeSFixed64NoTag(value); 214 } 215 216 /** Write an {@code sint32} field, including tag, to the stream. */ writeSInt32(final int fieldNumber, final int value)217 public void writeSInt32(final int fieldNumber, final int value) 218 throws IOException { 219 writeTag(fieldNumber, WireFormatNano.WIRETYPE_VARINT); 220 writeSInt32NoTag(value); 221 } 222 223 /** Write an {@code sint64} field, including tag, to the stream. */ writeSInt64(final int fieldNumber, final long value)224 public void writeSInt64(final int fieldNumber, final long value) 225 throws IOException { 226 writeTag(fieldNumber, WireFormatNano.WIRETYPE_VARINT); 227 writeSInt64NoTag(value); 228 } 229 230 /** 231 * Write a MessageSet extension field to the stream. For historical reasons, 232 * the wire format differs from normal fields. 233 */ 234 // public void writeMessageSetExtension(final int fieldNumber, 235 // final MessageMicro value) 236 // throws IOException { 237 // writeTag(WireFormatMicro.MESSAGE_SET_ITEM, WireFormatMicro.WIRETYPE_START_GROUP); 238 // writeUInt32(WireFormatMicro.MESSAGE_SET_TYPE_ID, fieldNumber); 239 // writeMessage(WireFormatMicro.MESSAGE_SET_MESSAGE, value); 240 // writeTag(WireFormatMicro.MESSAGE_SET_ITEM, WireFormatMicro.WIRETYPE_END_GROUP); 241 // } 242 243 /** 244 * Write an unparsed MessageSet extension field to the stream. For 245 * historical reasons, the wire format differs from normal fields. 246 */ 247 // public void writeRawMessageSetExtension(final int fieldNumber, 248 // final ByteStringMicro value) 249 // throws IOException { 250 // writeTag(WireFormatMicro.MESSAGE_SET_ITEM, WireFormatMicro.WIRETYPE_START_GROUP); 251 // writeUInt32(WireFormatMicro.MESSAGE_SET_TYPE_ID, fieldNumber); 252 // writeBytes(WireFormatMicro.MESSAGE_SET_MESSAGE, value); 253 // writeTag(WireFormatMicro.MESSAGE_SET_ITEM, WireFormatMicro.WIRETYPE_END_GROUP); 254 // } 255 256 // ----------------------------------------------------------------- 257 258 /** Write a {@code double} field to the stream. */ writeDoubleNoTag(final double value)259 public void writeDoubleNoTag(final double value) throws IOException { 260 writeRawLittleEndian64(Double.doubleToLongBits(value)); 261 } 262 263 /** Write a {@code float} field to the stream. */ writeFloatNoTag(final float value)264 public void writeFloatNoTag(final float value) throws IOException { 265 writeRawLittleEndian32(Float.floatToIntBits(value)); 266 } 267 268 /** Write a {@code uint64} field to the stream. */ writeUInt64NoTag(final long value)269 public void writeUInt64NoTag(final long value) throws IOException { 270 writeRawVarint64(value); 271 } 272 273 /** Write an {@code int64} field to the stream. */ writeInt64NoTag(final long value)274 public void writeInt64NoTag(final long value) throws IOException { 275 writeRawVarint64(value); 276 } 277 278 /** Write an {@code int32} field to the stream. */ writeInt32NoTag(final int value)279 public void writeInt32NoTag(final int value) throws IOException { 280 if (value >= 0) { 281 writeRawVarint32(value); 282 } else { 283 // Must sign-extend. 284 writeRawVarint64(value); 285 } 286 } 287 288 /** Write a {@code fixed64} field to the stream. */ writeFixed64NoTag(final long value)289 public void writeFixed64NoTag(final long value) throws IOException { 290 writeRawLittleEndian64(value); 291 } 292 293 /** Write a {@code fixed32} field to the stream. */ writeFixed32NoTag(final int value)294 public void writeFixed32NoTag(final int value) throws IOException { 295 writeRawLittleEndian32(value); 296 } 297 298 /** Write a {@code bool} field to the stream. */ writeBoolNoTag(final boolean value)299 public void writeBoolNoTag(final boolean value) throws IOException { 300 writeRawByte(value ? 1 : 0); 301 } 302 303 /** Write a {@code string} field to the stream. */ writeStringNoTag(final String value)304 public void writeStringNoTag(final String value) throws IOException { 305 // UTF-8 byte length of the string is at least its UTF-16 code unit length (value.length()), 306 // and at most 3 times of it. Optimize for the case where we know this length results in a 307 // constant varint length - saves measuring length of the string. 308 try { 309 final int minLengthVarIntSize = computeRawVarint32Size(value.length()); 310 final int maxLengthVarIntSize = computeRawVarint32Size(value.length() * MAX_UTF8_EXPANSION); 311 if (minLengthVarIntSize == maxLengthVarIntSize) { 312 int oldPosition = buffer.position(); 313 // Buffer.position, when passed a position that is past its limit, throws 314 // IllegalArgumentException, and this class is documented to throw 315 // OutOfSpaceException instead. 316 if (buffer.remaining() < minLengthVarIntSize) { 317 throw new OutOfSpaceException(oldPosition + minLengthVarIntSize, buffer.limit()); 318 } 319 buffer.position(oldPosition + minLengthVarIntSize); 320 encode(value, buffer); 321 int newPosition = buffer.position(); 322 buffer.position(oldPosition); 323 writeRawVarint32(newPosition - oldPosition - minLengthVarIntSize); 324 buffer.position(newPosition); 325 } else { 326 writeRawVarint32(encodedLength(value)); 327 encode(value, buffer); 328 } 329 } catch (BufferOverflowException e) { 330 final OutOfSpaceException outOfSpaceException = new OutOfSpaceException(buffer.position(), 331 buffer.limit()); 332 outOfSpaceException.initCause(e); 333 throw outOfSpaceException; 334 } 335 } 336 337 // These UTF-8 handling methods are copied from Guava's Utf8 class. 338 /** 339 * Returns the number of bytes in the UTF-8-encoded form of {@code sequence}. For a string, 340 * this method is equivalent to {@code string.getBytes(UTF_8).length}, but is more efficient in 341 * both time and space. 342 * 343 * @throws IllegalArgumentException if {@code sequence} contains ill-formed UTF-16 (unpaired 344 * surrogates) 345 */ encodedLength(CharSequence sequence)346 private static int encodedLength(CharSequence sequence) { 347 // Warning to maintainers: this implementation is highly optimized. 348 int utf16Length = sequence.length(); 349 int utf8Length = utf16Length; 350 int i = 0; 351 352 // This loop optimizes for pure ASCII. 353 while (i < utf16Length && sequence.charAt(i) < 0x80) { 354 i++; 355 } 356 357 // This loop optimizes for chars less than 0x800. 358 for (; i < utf16Length; i++) { 359 char c = sequence.charAt(i); 360 if (c < 0x800) { 361 utf8Length += ((0x7f - c) >>> 31); // branch free! 362 } else { 363 utf8Length += encodedLengthGeneral(sequence, i); 364 break; 365 } 366 } 367 368 if (utf8Length < utf16Length) { 369 // Necessary and sufficient condition for overflow because of maximum 3x expansion 370 throw new IllegalArgumentException("UTF-8 length does not fit in int: " 371 + (utf8Length + (1L << 32))); 372 } 373 return utf8Length; 374 } 375 encodedLengthGeneral(CharSequence sequence, int start)376 private static int encodedLengthGeneral(CharSequence sequence, int start) { 377 int utf16Length = sequence.length(); 378 int utf8Length = 0; 379 for (int i = start; i < utf16Length; i++) { 380 char c = sequence.charAt(i); 381 if (c < 0x800) { 382 utf8Length += (0x7f - c) >>> 31; // branch free! 383 } else { 384 utf8Length += 2; 385 // jdk7+: if (Character.isSurrogate(c)) { 386 if (Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE) { 387 // Check that we have a well-formed surrogate pair. 388 int cp = Character.codePointAt(sequence, i); 389 if (cp < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 390 throw new IllegalArgumentException("Unpaired surrogate at index " + i); 391 } 392 i++; 393 } 394 } 395 } 396 return utf8Length; 397 } 398 399 /** 400 * Encodes {@code sequence} into UTF-8, in {@code byteBuffer}. For a string, this method is 401 * equivalent to {@code buffer.put(string.getBytes(UTF_8))}, but is more efficient in both time 402 * and space. Bytes are written starting at the current position. This method requires paired 403 * surrogates, and therefore does not support chunking. 404 * 405 * <p>To ensure sufficient space in the output buffer, either call {@link #encodedLength} to 406 * compute the exact amount needed, or leave room for {@code 3 * sequence.length()}, which is the 407 * largest possible number of bytes that any input can be encoded to. 408 * 409 * @throws IllegalArgumentException if {@code sequence} contains ill-formed UTF-16 (unpaired 410 * surrogates) 411 * @throws BufferOverflowException if {@code sequence} encoded in UTF-8 does not fit in 412 * {@code byteBuffer}'s remaining space. 413 * @throws ReadOnlyBufferException if {@code byteBuffer} is a read-only buffer. 414 */ encode(CharSequence sequence, ByteBuffer byteBuffer)415 private static void encode(CharSequence sequence, ByteBuffer byteBuffer) { 416 if (byteBuffer.isReadOnly()) { 417 throw new ReadOnlyBufferException(); 418 } else if (byteBuffer.hasArray()) { 419 try { 420 int encoded = encode(sequence, 421 byteBuffer.array(), 422 byteBuffer.arrayOffset() + byteBuffer.position(), 423 byteBuffer.remaining()); 424 byteBuffer.position(encoded - byteBuffer.arrayOffset()); 425 } catch (ArrayIndexOutOfBoundsException e) { 426 BufferOverflowException boe = new BufferOverflowException(); 427 boe.initCause(e); 428 throw boe; 429 } 430 } else { 431 encodeDirect(sequence, byteBuffer); 432 } 433 } 434 encodeDirect(CharSequence sequence, ByteBuffer byteBuffer)435 private static void encodeDirect(CharSequence sequence, ByteBuffer byteBuffer) { 436 int utf16Length = sequence.length(); 437 for (int i = 0; i < utf16Length; i++) { 438 final char c = sequence.charAt(i); 439 if (c < 0x80) { // ASCII 440 byteBuffer.put((byte) c); 441 } else if (c < 0x800) { // 11 bits, two UTF-8 bytes 442 byteBuffer.put((byte) ((0xF << 6) | (c >>> 6))); 443 byteBuffer.put((byte) (0x80 | (0x3F & c))); 444 } else if (c < Character.MIN_SURROGATE || Character.MAX_SURROGATE < c) { 445 // Maximium single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes 446 byteBuffer.put((byte) ((0xF << 5) | (c >>> 12))); 447 byteBuffer.put((byte) (0x80 | (0x3F & (c >>> 6)))); 448 byteBuffer.put((byte) (0x80 | (0x3F & c))); 449 } else { 450 final char low; 451 if (i + 1 == sequence.length() 452 || !Character.isSurrogatePair(c, (low = sequence.charAt(++i)))) { 453 throw new IllegalArgumentException("Unpaired surrogate at index " + (i - 1)); 454 } 455 int codePoint = Character.toCodePoint(c, low); 456 byteBuffer.put((byte) ((0xF << 4) | (codePoint >>> 18))); 457 byteBuffer.put((byte) (0x80 | (0x3F & (codePoint >>> 12)))); 458 byteBuffer.put((byte) (0x80 | (0x3F & (codePoint >>> 6)))); 459 byteBuffer.put((byte) (0x80 | (0x3F & codePoint))); 460 } 461 } 462 } 463 encode(CharSequence sequence, byte[] bytes, int offset, int length)464 private static int encode(CharSequence sequence, byte[] bytes, int offset, int length) { 465 int utf16Length = sequence.length(); 466 int j = offset; 467 int i = 0; 468 int limit = offset + length; 469 // Designed to take advantage of 470 // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination 471 for (char c; i < utf16Length && i + j < limit && (c = sequence.charAt(i)) < 0x80; i++) { 472 bytes[j + i] = (byte) c; 473 } 474 if (i == utf16Length) { 475 return j + utf16Length; 476 } 477 j += i; 478 for (char c; i < utf16Length; i++) { 479 c = sequence.charAt(i); 480 if (c < 0x80 && j < limit) { 481 bytes[j++] = (byte) c; 482 } else if (c < 0x800 && j <= limit - 2) { // 11 bits, two UTF-8 bytes 483 bytes[j++] = (byte) ((0xF << 6) | (c >>> 6)); 484 bytes[j++] = (byte) (0x80 | (0x3F & c)); 485 } else if ((c < Character.MIN_SURROGATE || Character.MAX_SURROGATE < c) && j <= limit - 3) { 486 // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes 487 bytes[j++] = (byte) ((0xF << 5) | (c >>> 12)); 488 bytes[j++] = (byte) (0x80 | (0x3F & (c >>> 6))); 489 bytes[j++] = (byte) (0x80 | (0x3F & c)); 490 } else if (j <= limit - 4) { 491 // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8 bytes 492 final char low; 493 if (i + 1 == sequence.length() 494 || !Character.isSurrogatePair(c, (low = sequence.charAt(++i)))) { 495 throw new IllegalArgumentException("Unpaired surrogate at index " + (i - 1)); 496 } 497 int codePoint = Character.toCodePoint(c, low); 498 bytes[j++] = (byte) ((0xF << 4) | (codePoint >>> 18)); 499 bytes[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 12))); 500 bytes[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 6))); 501 bytes[j++] = (byte) (0x80 | (0x3F & codePoint)); 502 } else { 503 // If we are surrogates and we're not a surrogate pair, always throw an 504 // IllegalArgumentException instead of an ArrayOutOfBoundsException. 505 if ((Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE) 506 && (i + 1 == sequence.length() 507 || !Character.isSurrogatePair(c, sequence.charAt(i + 1)))) { 508 throw new IllegalArgumentException("Unpaired surrogate at index " + i); 509 } 510 throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + j); 511 } 512 } 513 return j; 514 } 515 516 // End guava UTF-8 methods 517 518 /** Write a {@code group} field to the stream. */ writeGroupNoTag(final MessageNano value)519 public void writeGroupNoTag(final MessageNano value) throws IOException { 520 value.writeTo(this); 521 } 522 523 /** Write an embedded message field to the stream. */ writeMessageNoTag(final MessageNano value)524 public void writeMessageNoTag(final MessageNano value) throws IOException { 525 writeRawVarint32(value.getCachedSize()); 526 value.writeTo(this); 527 } 528 529 /** Write a {@code bytes} field to the stream. */ writeBytesNoTag(final byte[] value)530 public void writeBytesNoTag(final byte[] value) throws IOException { 531 writeRawVarint32(value.length); 532 writeRawBytes(value); 533 } 534 535 /** Write a {@code bytes} field to the stream. */ writeBytesNoTag(final byte[] value, final int offset, final int length)536 public void writeBytesNoTag(final byte[] value, final int offset, final int length) 537 throws IOException { 538 writeRawVarint32(length); 539 writeRawBytes(value, offset, length); 540 } 541 542 /** Write a {@code uint32} field to the stream. */ writeUInt32NoTag(final int value)543 public void writeUInt32NoTag(final int value) throws IOException { 544 writeRawVarint32(value); 545 } 546 547 /** 548 * Write an enum field to the stream. Caller is responsible 549 * for converting the enum value to its numeric value. 550 */ writeEnumNoTag(final int value)551 public void writeEnumNoTag(final int value) throws IOException { 552 writeRawVarint32(value); 553 } 554 555 /** Write an {@code sfixed32} field to the stream. */ writeSFixed32NoTag(final int value)556 public void writeSFixed32NoTag(final int value) throws IOException { 557 writeRawLittleEndian32(value); 558 } 559 560 /** Write an {@code sfixed64} field to the stream. */ writeSFixed64NoTag(final long value)561 public void writeSFixed64NoTag(final long value) throws IOException { 562 writeRawLittleEndian64(value); 563 } 564 565 /** Write an {@code sint32} field to the stream. */ writeSInt32NoTag(final int value)566 public void writeSInt32NoTag(final int value) throws IOException { 567 writeRawVarint32(encodeZigZag32(value)); 568 } 569 570 /** Write an {@code sint64} field to the stream. */ writeSInt64NoTag(final long value)571 public void writeSInt64NoTag(final long value) throws IOException { 572 writeRawVarint64(encodeZigZag64(value)); 573 } 574 575 // ================================================================= 576 577 /** 578 * Compute the number of bytes that would be needed to encode a 579 * {@code double} field, including tag. 580 */ computeDoubleSize(final int fieldNumber, final double value)581 public static int computeDoubleSize(final int fieldNumber, 582 final double value) { 583 return computeTagSize(fieldNumber) + computeDoubleSizeNoTag(value); 584 } 585 586 /** 587 * Compute the number of bytes that would be needed to encode a 588 * {@code float} field, including tag. 589 */ computeFloatSize(final int fieldNumber, final float value)590 public static int computeFloatSize(final int fieldNumber, final float value) { 591 return computeTagSize(fieldNumber) + computeFloatSizeNoTag(value); 592 } 593 594 /** 595 * Compute the number of bytes that would be needed to encode a 596 * {@code uint64} field, including tag. 597 */ computeUInt64Size(final int fieldNumber, final long value)598 public static int computeUInt64Size(final int fieldNumber, final long value) { 599 return computeTagSize(fieldNumber) + computeUInt64SizeNoTag(value); 600 } 601 602 /** 603 * Compute the number of bytes that would be needed to encode an 604 * {@code int64} field, including tag. 605 */ computeInt64Size(final int fieldNumber, final long value)606 public static int computeInt64Size(final int fieldNumber, final long value) { 607 return computeTagSize(fieldNumber) + computeInt64SizeNoTag(value); 608 } 609 610 /** 611 * Compute the number of bytes that would be needed to encode an 612 * {@code int32} field, including tag. 613 */ computeInt32Size(final int fieldNumber, final int value)614 public static int computeInt32Size(final int fieldNumber, final int value) { 615 return computeTagSize(fieldNumber) + computeInt32SizeNoTag(value); 616 } 617 618 /** 619 * Compute the number of bytes that would be needed to encode a 620 * {@code fixed64} field, including tag. 621 */ computeFixed64Size(final int fieldNumber, final long value)622 public static int computeFixed64Size(final int fieldNumber, 623 final long value) { 624 return computeTagSize(fieldNumber) + computeFixed64SizeNoTag(value); 625 } 626 627 /** 628 * Compute the number of bytes that would be needed to encode a 629 * {@code fixed32} field, including tag. 630 */ computeFixed32Size(final int fieldNumber, final int value)631 public static int computeFixed32Size(final int fieldNumber, 632 final int value) { 633 return computeTagSize(fieldNumber) + computeFixed32SizeNoTag(value); 634 } 635 636 /** 637 * Compute the number of bytes that would be needed to encode a 638 * {@code bool} field, including tag. 639 */ computeBoolSize(final int fieldNumber, final boolean value)640 public static int computeBoolSize(final int fieldNumber, 641 final boolean value) { 642 return computeTagSize(fieldNumber) + computeBoolSizeNoTag(value); 643 } 644 645 /** 646 * Compute the number of bytes that would be needed to encode a 647 * {@code string} field, including tag. 648 */ computeStringSize(final int fieldNumber, final String value)649 public static int computeStringSize(final int fieldNumber, 650 final String value) { 651 return computeTagSize(fieldNumber) + computeStringSizeNoTag(value); 652 } 653 654 /** 655 * Compute the number of bytes that would be needed to encode a 656 * {@code group} field, including tag. 657 */ computeGroupSize(final int fieldNumber, final MessageNano value)658 public static int computeGroupSize(final int fieldNumber, 659 final MessageNano value) { 660 return computeTagSize(fieldNumber) * 2 + computeGroupSizeNoTag(value); 661 } 662 663 /** 664 * Compute the number of bytes that would be needed to encode an 665 * embedded message field, including tag. 666 */ computeMessageSize(final int fieldNumber, final MessageNano value)667 public static int computeMessageSize(final int fieldNumber, 668 final MessageNano value) { 669 return computeTagSize(fieldNumber) + computeMessageSizeNoTag(value); 670 } 671 672 /** 673 * Compute the number of bytes that would be needed to encode a 674 * {@code bytes} field, including tag. 675 */ computeBytesSize(final int fieldNumber, final byte[] value)676 public static int computeBytesSize(final int fieldNumber, 677 final byte[] value) { 678 return computeTagSize(fieldNumber) + computeBytesSizeNoTag(value); 679 } 680 681 /** 682 * Compute the number of bytes that would be needed to encode a 683 * {@code bytes} field of the given length, including tag. 684 */ computeBytesSize(final int fieldNumber, final int length)685 public static int computeBytesSize(final int fieldNumber, 686 final int length) { 687 return computeTagSize(fieldNumber) + computeBytesSizeNoTag(length); 688 } 689 690 /** 691 * Compute the number of bytes that would be needed to encode a 692 * {@code uint32} field, including tag. 693 */ computeUInt32Size(final int fieldNumber, final int value)694 public static int computeUInt32Size(final int fieldNumber, final int value) { 695 return computeTagSize(fieldNumber) + computeUInt32SizeNoTag(value); 696 } 697 698 /** 699 * Compute the number of bytes that would be needed to encode an 700 * enum field, including tag. Caller is responsible for converting the 701 * enum value to its numeric value. 702 */ computeEnumSize(final int fieldNumber, final int value)703 public static int computeEnumSize(final int fieldNumber, final int value) { 704 return computeTagSize(fieldNumber) + computeEnumSizeNoTag(value); 705 } 706 707 /** 708 * Compute the number of bytes that would be needed to encode an 709 * {@code sfixed32} field, including tag. 710 */ computeSFixed32Size(final int fieldNumber, final int value)711 public static int computeSFixed32Size(final int fieldNumber, 712 final int value) { 713 return computeTagSize(fieldNumber) + computeSFixed32SizeNoTag(value); 714 } 715 716 /** 717 * Compute the number of bytes that would be needed to encode an 718 * {@code sfixed64} field, including tag. 719 */ computeSFixed64Size(final int fieldNumber, final long value)720 public static int computeSFixed64Size(final int fieldNumber, 721 final long value) { 722 return computeTagSize(fieldNumber) + computeSFixed64SizeNoTag(value); 723 } 724 725 /** 726 * Compute the number of bytes that would be needed to encode an 727 * {@code sint32} field, including tag. 728 */ computeSInt32Size(final int fieldNumber, final int value)729 public static int computeSInt32Size(final int fieldNumber, final int value) { 730 return computeTagSize(fieldNumber) + computeSInt32SizeNoTag(value); 731 } 732 733 /** 734 * Compute the number of bytes that would be needed to encode an 735 * {@code sint64} field, including tag. 736 */ computeSInt64Size(final int fieldNumber, final long value)737 public static int computeSInt64Size(final int fieldNumber, final long value) { 738 return computeTagSize(fieldNumber) + computeSInt64SizeNoTag(value); 739 } 740 741 /** 742 * Compute the number of bytes that would be needed to encode a 743 * MessageSet extension to the stream. For historical reasons, 744 * the wire format differs from normal fields. 745 */ 746 // public static int computeMessageSetExtensionSize( 747 // final int fieldNumber, final MessageMicro value) { 748 // return computeTagSize(WireFormatMicro.MESSAGE_SET_ITEM) * 2 + 749 // computeUInt32Size(WireFormatMicro.MESSAGE_SET_TYPE_ID, fieldNumber) + 750 // computeMessageSize(WireFormatMicro.MESSAGE_SET_MESSAGE, value); 751 // } 752 753 /** 754 * Compute the number of bytes that would be needed to encode an 755 * unparsed MessageSet extension field to the stream. For 756 * historical reasons, the wire format differs from normal fields. 757 */ 758 // public static int computeRawMessageSetExtensionSize( 759 // final int fieldNumber, final ByteStringMicro value) { 760 // return computeTagSize(WireFormatMicro.MESSAGE_SET_ITEM) * 2 + 761 // computeUInt32Size(WireFormatMicro.MESSAGE_SET_TYPE_ID, fieldNumber) + 762 // computeBytesSize(WireFormatMicro.MESSAGE_SET_MESSAGE, value); 763 // } 764 765 // ----------------------------------------------------------------- 766 767 /** 768 * Compute the number of bytes that would be needed to encode a 769 * {@code double} field, including tag. 770 */ computeDoubleSizeNoTag(final double value)771 public static int computeDoubleSizeNoTag(final double value) { 772 return LITTLE_ENDIAN_64_SIZE; 773 } 774 775 /** 776 * Compute the number of bytes that would be needed to encode a 777 * {@code float} field, including tag. 778 */ computeFloatSizeNoTag(final float value)779 public static int computeFloatSizeNoTag(final float value) { 780 return LITTLE_ENDIAN_32_SIZE; 781 } 782 783 /** 784 * Compute the number of bytes that would be needed to encode a 785 * {@code uint64} field, including tag. 786 */ computeUInt64SizeNoTag(final long value)787 public static int computeUInt64SizeNoTag(final long value) { 788 return computeRawVarint64Size(value); 789 } 790 791 /** 792 * Compute the number of bytes that would be needed to encode an 793 * {@code int64} field, including tag. 794 */ computeInt64SizeNoTag(final long value)795 public static int computeInt64SizeNoTag(final long value) { 796 return computeRawVarint64Size(value); 797 } 798 799 /** 800 * Compute the number of bytes that would be needed to encode an 801 * {@code int32} field, including tag. 802 */ computeInt32SizeNoTag(final int value)803 public static int computeInt32SizeNoTag(final int value) { 804 if (value >= 0) { 805 return computeRawVarint32Size(value); 806 } else { 807 // Must sign-extend. 808 return 10; 809 } 810 } 811 812 /** 813 * Compute the number of bytes that would be needed to encode a 814 * {@code fixed64} field. 815 */ computeFixed64SizeNoTag(final long value)816 public static int computeFixed64SizeNoTag(final long value) { 817 return LITTLE_ENDIAN_64_SIZE; 818 } 819 820 /** 821 * Compute the number of bytes that would be needed to encode a 822 * {@code fixed32} field. 823 */ computeFixed32SizeNoTag(final int value)824 public static int computeFixed32SizeNoTag(final int value) { 825 return LITTLE_ENDIAN_32_SIZE; 826 } 827 828 /** 829 * Compute the number of bytes that would be needed to encode a 830 * {@code bool} field. 831 */ computeBoolSizeNoTag(final boolean value)832 public static int computeBoolSizeNoTag(final boolean value) { 833 return 1; 834 } 835 836 /** 837 * Compute the number of bytes that would be needed to encode a 838 * {@code string} field. 839 */ computeStringSizeNoTag(final String value)840 public static int computeStringSizeNoTag(final String value) { 841 final int length = encodedLength(value); 842 return computeRawVarint32Size(length) + length; 843 } 844 845 /** 846 * Compute the number of bytes that would be needed to encode a 847 * {@code group} field. 848 */ computeGroupSizeNoTag(final MessageNano value)849 public static int computeGroupSizeNoTag(final MessageNano value) { 850 return value.getSerializedSize(); 851 } 852 853 /** 854 * Compute the number of bytes that would be needed to encode an embedded 855 * message field. 856 */ computeMessageSizeNoTag(final MessageNano value)857 public static int computeMessageSizeNoTag(final MessageNano value) { 858 final int size = value.getSerializedSize(); 859 return computeRawVarint32Size(size) + size; 860 } 861 862 /** 863 * Compute the number of bytes that would be needed to encode a 864 * {@code bytes} field. 865 */ computeBytesSizeNoTag(final byte[] value)866 public static int computeBytesSizeNoTag(final byte[] value) { 867 return computeRawVarint32Size(value.length) + value.length; 868 } 869 870 /** 871 * Compute the number of bytes that would be needed to encode a 872 * {@code bytes} field of the given length. 873 */ computeBytesSizeNoTag(final int length)874 public static int computeBytesSizeNoTag(final int length) { 875 return computeRawVarint32Size(length) + length; 876 } 877 878 /** 879 * Compute the number of bytes that would be needed to encode a 880 * {@code uint32} field. 881 */ computeUInt32SizeNoTag(final int value)882 public static int computeUInt32SizeNoTag(final int value) { 883 return computeRawVarint32Size(value); 884 } 885 886 /** 887 * Compute the number of bytes that would be needed to encode an enum field. 888 * Caller is responsible for converting the enum value to its numeric value. 889 */ computeEnumSizeNoTag(final int value)890 public static int computeEnumSizeNoTag(final int value) { 891 return computeRawVarint32Size(value); 892 } 893 894 /** 895 * Compute the number of bytes that would be needed to encode an 896 * {@code sfixed32} field. 897 */ computeSFixed32SizeNoTag(final int value)898 public static int computeSFixed32SizeNoTag(final int value) { 899 return LITTLE_ENDIAN_32_SIZE; 900 } 901 902 /** 903 * Compute the number of bytes that would be needed to encode an 904 * {@code sfixed64} field. 905 */ computeSFixed64SizeNoTag(final long value)906 public static int computeSFixed64SizeNoTag(final long value) { 907 return LITTLE_ENDIAN_64_SIZE; 908 } 909 910 /** 911 * Compute the number of bytes that would be needed to encode an 912 * {@code sint32} field. 913 */ computeSInt32SizeNoTag(final int value)914 public static int computeSInt32SizeNoTag(final int value) { 915 return computeRawVarint32Size(encodeZigZag32(value)); 916 } 917 918 /** 919 * Compute the number of bytes that would be needed to encode an 920 * {@code sint64} field. 921 */ computeSInt64SizeNoTag(final long value)922 public static int computeSInt64SizeNoTag(final long value) { 923 return computeRawVarint64Size(encodeZigZag64(value)); 924 } 925 926 // ================================================================= 927 928 /** 929 * If writing to a flat array, return the space left in the array. 930 * Otherwise, throws {@code UnsupportedOperationException}. 931 */ spaceLeft()932 public int spaceLeft() { 933 return buffer.remaining(); 934 } 935 936 /** 937 * Verifies that {@link #spaceLeft()} returns zero. It's common to create 938 * a byte array that is exactly big enough to hold a message, then write to 939 * it with a {@code CodedOutputStream}. Calling {@code checkNoSpaceLeft()} 940 * after writing verifies that the message was actually as big as expected, 941 * which can help catch bugs. 942 */ checkNoSpaceLeft()943 public void checkNoSpaceLeft() { 944 if (spaceLeft() != 0) { 945 throw new IllegalStateException( 946 "Did not write as much data as expected."); 947 } 948 } 949 950 /** 951 * Returns the position within the internal buffer. 952 */ position()953 public int position() { 954 return buffer.position(); 955 } 956 957 /** 958 * Resets the position within the internal buffer to zero. 959 * 960 * @see #position 961 * @see #spaceLeft 962 */ reset()963 public void reset() { 964 buffer.clear(); 965 } 966 967 /** 968 * If you create a CodedOutputStream around a simple flat array, you must 969 * not attempt to write more bytes than the array has space. Otherwise, 970 * this exception will be thrown. 971 */ 972 public static class OutOfSpaceException extends IOException { 973 private static final long serialVersionUID = -6947486886997889499L; 974 OutOfSpaceException(int position, int limit)975 OutOfSpaceException(int position, int limit) { 976 super("CodedOutputStream was writing to a flat byte array and ran " + 977 "out of space (pos " + position + " limit " + limit + ")."); 978 } 979 } 980 981 /** Write a single byte. */ writeRawByte(final byte value)982 public void writeRawByte(final byte value) throws IOException { 983 if (!buffer.hasRemaining()) { 984 // We're writing to a single buffer. 985 throw new OutOfSpaceException(buffer.position(), buffer.limit()); 986 } 987 988 buffer.put(value); 989 } 990 991 /** Write a single byte, represented by an integer value. */ writeRawByte(final int value)992 public void writeRawByte(final int value) throws IOException { 993 writeRawByte((byte) value); 994 } 995 996 /** Write an array of bytes. */ writeRawBytes(final byte[] value)997 public void writeRawBytes(final byte[] value) throws IOException { 998 writeRawBytes(value, 0, value.length); 999 } 1000 1001 /** Write part of an array of bytes. */ writeRawBytes(final byte[] value, int offset, int length)1002 public void writeRawBytes(final byte[] value, int offset, int length) 1003 throws IOException { 1004 if (buffer.remaining() >= length) { 1005 buffer.put(value, offset, length); 1006 } else { 1007 // We're writing to a single buffer. 1008 throw new OutOfSpaceException(buffer.position(), buffer.limit()); 1009 } 1010 } 1011 1012 /** Encode and write a tag. */ writeTag(final int fieldNumber, final int wireType)1013 public void writeTag(final int fieldNumber, final int wireType) 1014 throws IOException { 1015 writeRawVarint32(WireFormatNano.makeTag(fieldNumber, wireType)); 1016 } 1017 1018 /** Compute the number of bytes that would be needed to encode a tag. */ computeTagSize(final int fieldNumber)1019 public static int computeTagSize(final int fieldNumber) { 1020 return computeRawVarint32Size(WireFormatNano.makeTag(fieldNumber, 0)); 1021 } 1022 1023 /** 1024 * Encode and write a varint. {@code value} is treated as 1025 * unsigned, so it won't be sign-extended if negative. 1026 */ writeRawVarint32(int value)1027 public void writeRawVarint32(int value) throws IOException { 1028 while (true) { 1029 if ((value & ~0x7F) == 0) { 1030 writeRawByte(value); 1031 return; 1032 } else { 1033 writeRawByte((value & 0x7F) | 0x80); 1034 value >>>= 7; 1035 } 1036 } 1037 } 1038 1039 /** 1040 * Compute the number of bytes that would be needed to encode a varint. 1041 * {@code value} is treated as unsigned, so it won't be sign-extended if 1042 * negative. 1043 */ computeRawVarint32Size(final int value)1044 public static int computeRawVarint32Size(final int value) { 1045 if ((value & (0xffffffff << 7)) == 0) return 1; 1046 if ((value & (0xffffffff << 14)) == 0) return 2; 1047 if ((value & (0xffffffff << 21)) == 0) return 3; 1048 if ((value & (0xffffffff << 28)) == 0) return 4; 1049 return 5; 1050 } 1051 1052 /** Encode and write a varint. */ writeRawVarint64(long value)1053 public void writeRawVarint64(long value) throws IOException { 1054 while (true) { 1055 if ((value & ~0x7FL) == 0) { 1056 writeRawByte((int)value); 1057 return; 1058 } else { 1059 writeRawByte(((int)value & 0x7F) | 0x80); 1060 value >>>= 7; 1061 } 1062 } 1063 } 1064 1065 /** Compute the number of bytes that would be needed to encode a varint. */ computeRawVarint64Size(final long value)1066 public static int computeRawVarint64Size(final long value) { 1067 if ((value & (0xffffffffffffffffL << 7)) == 0) return 1; 1068 if ((value & (0xffffffffffffffffL << 14)) == 0) return 2; 1069 if ((value & (0xffffffffffffffffL << 21)) == 0) return 3; 1070 if ((value & (0xffffffffffffffffL << 28)) == 0) return 4; 1071 if ((value & (0xffffffffffffffffL << 35)) == 0) return 5; 1072 if ((value & (0xffffffffffffffffL << 42)) == 0) return 6; 1073 if ((value & (0xffffffffffffffffL << 49)) == 0) return 7; 1074 if ((value & (0xffffffffffffffffL << 56)) == 0) return 8; 1075 if ((value & (0xffffffffffffffffL << 63)) == 0) return 9; 1076 return 10; 1077 } 1078 1079 /** Write a little-endian 32-bit integer. */ writeRawLittleEndian32(final int value)1080 public void writeRawLittleEndian32(final int value) throws IOException { 1081 if (buffer.remaining() < 4) { 1082 throw new OutOfSpaceException(buffer.position(), buffer.limit()); 1083 } 1084 buffer.putInt(value); 1085 } 1086 1087 public static final int LITTLE_ENDIAN_32_SIZE = 4; 1088 1089 /** Write a little-endian 64-bit integer. */ writeRawLittleEndian64(final long value)1090 public void writeRawLittleEndian64(final long value) throws IOException { 1091 if (buffer.remaining() < 8) { 1092 throw new OutOfSpaceException(buffer.position(), buffer.limit()); 1093 } 1094 buffer.putLong(value); 1095 } 1096 1097 public static final int LITTLE_ENDIAN_64_SIZE = 8; 1098 1099 /** 1100 * Encode a ZigZag-encoded 32-bit value. ZigZag encodes signed integers 1101 * into values that can be efficiently encoded with varint. (Otherwise, 1102 * negative values must be sign-extended to 64 bits to be varint encoded, 1103 * thus always taking 10 bytes on the wire.) 1104 * 1105 * @param n A signed 32-bit integer. 1106 * @return An unsigned 32-bit integer, stored in a signed int because 1107 * Java has no explicit unsigned support. 1108 */ encodeZigZag32(final int n)1109 public static int encodeZigZag32(final int n) { 1110 // Note: the right-shift must be arithmetic 1111 return (n << 1) ^ (n >> 31); 1112 } 1113 1114 /** 1115 * Encode a ZigZag-encoded 64-bit value. ZigZag encodes signed integers 1116 * into values that can be efficiently encoded with varint. (Otherwise, 1117 * negative values must be sign-extended to 64 bits to be varint encoded, 1118 * thus always taking 10 bytes on the wire.) 1119 * 1120 * @param n A signed 64-bit integer. 1121 * @return An unsigned 64-bit integer, stored in a signed int because 1122 * Java has no explicit unsigned support. 1123 */ encodeZigZag64(final long n)1124 public static long encodeZigZag64(final long n) { 1125 // Note: the right-shift must be arithmetic 1126 return (n << 1) ^ (n >> 63); 1127 } 1128 } 1129