1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file or at 6 // https://developers.google.com/open-source/licenses/bsd 7 8 package com.google.protobuf; 9 10 import static com.google.common.truth.Truth.assertThat; 11 import static com.google.common.truth.Truth.assertWithMessage; 12 13 import java.nio.ByteBuffer; 14 import java.util.Random; 15 import org.junit.Test; 16 import org.junit.runner.RunWith; 17 import org.junit.runners.JUnit4; 18 19 @RunWith(JUnit4.class) 20 public class Utf8Test { 21 private static final int NUM_CHARS = 16384; 22 23 private static final Utf8.Processor safeProcessor = new Utf8.SafeProcessor(); 24 private static final Utf8.Processor unsafeProcessor = new Utf8.UnsafeProcessor(); 25 26 @Test testEncode()27 public void testEncode() { 28 assertEncoding(randomString(0x80)); 29 assertEncoding(randomString(0x90)); 30 assertEncoding(randomString(0x800)); 31 assertEncoding(randomString(0x10000)); 32 assertEncoding(randomString(0x10ffff)); 33 } 34 35 @Test testEncode_insufficientSpace()36 public void testEncode_insufficientSpace() { 37 assertEncoding_insufficientSpace(randomString(0x80)); 38 assertEncoding_insufficientSpace(randomString(0x90)); 39 assertEncoding_insufficientSpace(randomString(0x800)); 40 assertEncoding_insufficientSpace(randomString(0x10000)); 41 assertEncoding_insufficientSpace(randomString(0x10ffff)); 42 } 43 44 @Test testValid()45 public void testValid() { 46 assertIsValid(new byte[] {(byte) 0xE0, (byte) 0xB9, (byte) 0x96}, true); 47 assertIsValid(new byte[] {(byte) 0xF0, (byte) 0xB2, (byte) 0x83, (byte) 0xBC}, true); 48 } 49 50 @Test testOverlongIsInvalid()51 public void testOverlongIsInvalid() { 52 assertIsValid(new byte[] {(byte) 0xC0, (byte) 0x81}, false); 53 assertIsValid(new byte[] {(byte) 0xE0, (byte) 0x81, (byte) 0x81}, false); 54 assertIsValid(new byte[] {(byte) 0xF0, (byte) 0x81, (byte) 0x81, (byte) 0x81}, false); 55 } 56 57 @Test testMaxCodepointExceeded()58 public void testMaxCodepointExceeded() { 59 // byte1 > 0xF4 60 assertIsValid(new byte[] {(byte) 0xF5, (byte) 0x81, (byte) 0x81, (byte) 0x81}, false); 61 } 62 63 @Test testInvalidSurrogateCodepoint()64 public void testInvalidSurrogateCodepoint() { 65 assertIsValid(new byte[] {(byte) 0xED, (byte) 0xA1, (byte) 0x81}, false); 66 67 // byte1 == 0xF0 && byte2 < 0x90 68 assertIsValid(new byte[] {(byte) 0xF0, (byte) 0x81, (byte) 0x81, (byte) 0x81}, false); 69 // byte1 == 0xF4 && byte2 > 0x8F 70 assertIsValid(new byte[] {(byte) 0xF4, (byte) 0x90, (byte) 0x81, (byte) 0x81}, false); 71 } 72 randomString(int maxCodePoint)73 private static String randomString(int maxCodePoint) { 74 final long seed = 99; 75 final Random rnd = new Random(seed); 76 StringBuilder sb = new StringBuilder(); 77 for (int j = 0; j < NUM_CHARS; j++) { 78 int codePoint; 79 do { 80 codePoint = rnd.nextInt(maxCodePoint); 81 } while (Character.isSurrogate((char) codePoint)); 82 sb.appendCodePoint(codePoint); 83 } 84 return sb.toString(); 85 } 86 assertIsValid(byte[] data, boolean valid)87 private static void assertIsValid(byte[] data, boolean valid) { 88 assertWithMessage("isValidUtf8[ARRAY]") 89 .that(safeProcessor.isValidUtf8(data, 0, data.length)) 90 .isEqualTo(valid); 91 assertWithMessage("isValidUtf8[ARRAY_UNSAFE]") 92 .that(unsafeProcessor.isValidUtf8(data, 0, data.length)) 93 .isEqualTo(valid); 94 95 ByteBuffer buffer = ByteBuffer.wrap(data); 96 assertWithMessage("isValidUtf8[NIO_HEAP]") 97 .that(safeProcessor.isValidUtf8(buffer, buffer.position(), buffer.remaining())) 98 .isEqualTo(valid); 99 100 // Direct buffers. 101 buffer = ByteBuffer.allocateDirect(data.length); 102 buffer.put(data); 103 buffer.flip(); 104 assertWithMessage("isValidUtf8[NIO_DEFAULT]") 105 .that(safeProcessor.isValidUtf8(buffer, buffer.position(), buffer.remaining())) 106 .isEqualTo(valid); 107 assertWithMessage("isValidUtf8[NIO_UNSAFE]") 108 .that(unsafeProcessor.isValidUtf8(buffer, buffer.position(), buffer.remaining())) 109 .isEqualTo(valid); 110 } 111 assertEncoding(String message)112 private static void assertEncoding(String message) { 113 byte[] expected = message.getBytes(Internal.UTF_8); 114 byte[] output = encodeToByteArray(message, expected.length, safeProcessor); 115 assertWithMessage("encodeUtf8[ARRAY]") 116 .that(output).isEqualTo(expected); 117 118 output = encodeToByteArray(message, expected.length, unsafeProcessor); 119 assertWithMessage("encodeUtf8[ARRAY_UNSAFE]") 120 .that(output).isEqualTo(expected); 121 122 output = encodeToByteBuffer(message, expected.length, false, safeProcessor); 123 assertWithMessage("encodeUtf8[NIO_HEAP]") 124 .that(output).isEqualTo(expected); 125 126 output = encodeToByteBuffer(message, expected.length, true, safeProcessor); 127 assertWithMessage("encodeUtf8[NIO_DEFAULT]") 128 .that(output).isEqualTo(expected); 129 130 output = encodeToByteBuffer(message, expected.length, true, unsafeProcessor); 131 assertWithMessage("encodeUtf8[NIO_UNSAFE]") 132 .that(output).isEqualTo(expected); 133 } 134 assertEncoding_insufficientSpace(String message)135 private void assertEncoding_insufficientSpace(String message) { 136 final int length = message.length() - 1; 137 Class<ArrayIndexOutOfBoundsException> clazz = ArrayIndexOutOfBoundsException.class; 138 139 try { 140 encodeToByteArray(message, length, safeProcessor); 141 assertWithMessage("Expected " + clazz.getSimpleName()).fail(); 142 } catch (Throwable t) { 143 // Expected 144 assertThat(t).isInstanceOf(clazz); 145 // byte[] + safeProcessor will not exit early. We can't match the message since we don't 146 // know which char/index due to random input. 147 } 148 149 try { 150 encodeToByteArray(message, length, unsafeProcessor); 151 assertWithMessage("Expected " + clazz.getSimpleName()).fail(); 152 } catch (Throwable t) { 153 assertThat(t).isInstanceOf(clazz); 154 // byte[] + unsafeProcessor will exit early, so we have can match the message. 155 String pattern = "Failed writing (.) at index " + length; 156 assertThat(t).hasMessageThat().matches(pattern); 157 } 158 159 try { 160 encodeToByteBuffer(message, length, false, safeProcessor); 161 assertWithMessage("Expected " + clazz.getSimpleName()).fail(); 162 } catch (Throwable t) { 163 // Expected 164 assertThat(t).isInstanceOf(clazz); 165 // ByteBuffer + safeProcessor will not exit early. We can't match the message since we don't 166 // know which char/index due to random input. 167 } 168 169 try { 170 encodeToByteBuffer(message, length, true, safeProcessor); 171 assertWithMessage("Expected " + clazz.getSimpleName()).fail(); 172 } catch (Throwable t) { 173 // Expected 174 assertThat(t).isInstanceOf(clazz); 175 // ByteBuffer + safeProcessor will not exit early. We can't match the message since we don't 176 // know which char/index due to random input. 177 } 178 179 try { 180 encodeToByteBuffer(message, length, true, unsafeProcessor); 181 assertWithMessage("Expected " + clazz.getSimpleName()).fail(); 182 } catch (Throwable t) { 183 // Expected 184 assertThat(t).isInstanceOf(clazz); 185 // Direct ByteBuffer + unsafeProcessor will exit early if it's not on Android, so we can 186 // match the message. On Android, a direct ByteBuffer will have hasArray() being true and 187 // it will take a different code path and produces a different message. 188 if (!Android.isOnAndroidDevice()) { 189 String pattern = "Failed writing (.) at index " + length; 190 assertThat(t).hasMessageThat().matches(pattern); 191 } 192 } 193 } 194 encodeToByteArray(String message, int length, Utf8.Processor processor)195 private static byte[] encodeToByteArray(String message, int length, Utf8.Processor processor) { 196 byte[] output = new byte[length]; 197 int unused = processor.encodeUtf8(message, output, 0, output.length); 198 return output; 199 } 200 encodeToByteBuffer( String message, int length, boolean direct, Utf8.Processor processor)201 private static byte[] encodeToByteBuffer( 202 String message, int length, boolean direct, Utf8.Processor processor) { 203 ByteBuffer buffer = direct ? ByteBuffer.allocateDirect(length) : ByteBuffer.allocate(length); 204 205 processor.encodeUtf8(message, buffer); 206 buffer.flip(); 207 208 byte[] output = new byte[buffer.remaining()]; 209 buffer.get(output); 210 return output; 211 } 212 213 } 214