1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file or at 6 // https://developers.google.com/open-source/licenses/bsd 7 8 package com.google.protobuf; 9 10 import static com.google.common.truth.Truth.assertThat; 11 import static com.google.common.truth.Truth.assertWithMessage; 12 13 import java.lang.ref.SoftReference; 14 import java.nio.ByteBuffer; 15 import java.util.ArrayList; 16 import java.util.Arrays; 17 import java.util.List; 18 import java.util.Random; 19 20 /** 21 * Shared testing code for {@link IsValidUtf8Test} and {@link IsValidUtf8FourByteTest}. 22 * 23 * @author jonp@google.com (Jon Perlow) 24 * @author martinrb@google.com (Martin Buchholz) 25 */ 26 final class IsValidUtf8TestUtil { IsValidUtf8TestUtil()27 private IsValidUtf8TestUtil() {} 28 29 static interface ByteStringFactory { newByteString(byte[] bytes)30 ByteString newByteString(byte[] bytes); 31 } 32 33 static final ByteStringFactory LITERAL_FACTORY = 34 new ByteStringFactory() { 35 @Override 36 public ByteString newByteString(byte[] bytes) { 37 return ByteString.wrap(bytes); 38 } 39 }; 40 41 static final ByteStringFactory HEAP_NIO_FACTORY = 42 new ByteStringFactory() { 43 @Override 44 public ByteString newByteString(byte[] bytes) { 45 return ByteString.nioByteString(ByteBuffer.wrap(bytes)); 46 } 47 }; 48 49 private static final ThreadLocal<SoftReference<ByteBuffer>> directBuffer = new ThreadLocal<>(); 50 51 /** 52 * Factory for direct {@link ByteBuffer} instances. To reduce direct memory usage, this uses a 53 * thread local direct buffer. This means that each call will overwrite the buffer's contents from 54 * the previous call, so the calling code must be careful not to continue using a buffer returned 55 * from a previous invocation. 56 */ 57 static final ByteStringFactory DIRECT_NIO_FACTORY = 58 new ByteStringFactory() { 59 @Override 60 public ByteString newByteString(byte[] bytes) { 61 SoftReference<ByteBuffer> ref = directBuffer.get(); 62 ByteBuffer buffer = ref == null ? null : ref.get(); 63 if (buffer == null || buffer.capacity() < bytes.length) { 64 buffer = ByteBuffer.allocateDirect(bytes.length); 65 directBuffer.set(new SoftReference<ByteBuffer>(buffer)); 66 } 67 buffer.clear(); 68 buffer.put(bytes); 69 buffer.flip(); 70 return ByteString.nioByteString(buffer); 71 } 72 }; 73 74 static final ByteStringFactory ROPE_FACTORY = 75 new ByteStringFactory() { 76 // Seed the random number generator with 0 so that the tests are deterministic. 77 private final Random random = new Random(0); 78 79 @Override 80 public ByteString newByteString(byte[] bytes) { 81 // We split the byte array into three pieces (some possibly empty) by choosing two random 82 // cut points i and j. 83 int i = random.nextInt(bytes.length); 84 int j = random.nextInt(bytes.length); 85 if (j < i) { 86 int tmp = i; 87 i = j; 88 j = tmp; 89 } 90 return RopeByteString.newInstanceForTest( 91 ByteString.wrap(bytes, 0, i), 92 RopeByteString.newInstanceForTest( 93 ByteString.wrap(bytes, i, j - i), ByteString.wrap(bytes, j, bytes.length - j))); 94 } 95 }; 96 97 // 128 - [chars 0x0000 to 0x007f] 98 static final long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1; 99 100 // 128 101 static final long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT = ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS; 102 103 // 1920 [chars 0x0080 to 0x07FF] 104 static final long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1; 105 106 // 18,304 107 static final long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT = 108 // Both bytes are one byte characters 109 (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 2) 110 + 111 // The possible number of two byte characters 112 TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS; 113 114 // 2048 115 static final long THREE_BYTE_SURROGATES = 2 * 1024; 116 117 // 61,440 [chars 0x0800 to 0xFFFF, minus surrogates] 118 static final long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 119 0xFFFF - 0x0800 + 1 - THREE_BYTE_SURROGATES; 120 121 // 2,650,112 122 static final long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT = 123 // All one byte characters 124 (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 3) 125 + 126 // One two byte character and a one byte character 127 2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS 128 + 129 // Three byte characters 130 THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS; 131 132 // 1,048,576 [chars 0x10000L to 0x10FFFF] 133 static final long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1; 134 135 // 289,571,839 136 static final long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT = 137 // All one byte characters 138 (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 4) 139 + 140 // One and three byte characters 141 2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS 142 + 143 // Two two byte characters 144 TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS 145 + 146 // Permutations of one and two byte characters 147 3 148 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS 149 * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS 150 * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS 151 + 152 // Four byte characters 153 FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS; 154 155 static final class Shard { 156 final long index; 157 final long start; 158 final long lim; 159 final long expected; 160 Shard(long index, long start, long lim, long expected)161 public Shard(long index, long start, long lim, long expected) { 162 assertThat(start).isLessThan(lim); 163 this.index = index; 164 this.start = start; 165 this.lim = lim; 166 this.expected = expected; 167 } 168 } 169 170 static final long[] FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES = 171 generateFourByteShardsExpectedRunnables(); 172 generateFourByteShardsExpectedRunnables()173 private static long[] generateFourByteShardsExpectedRunnables() { 174 long[] expected = new long[128]; 175 176 // 0-63 are all 5300224 177 for (int i = 0; i <= 63; i++) { 178 expected[i] = 5300224; 179 } 180 181 // 97-111 are all 2342912 182 for (int i = 97; i <= 111; i++) { 183 expected[i] = 2342912; 184 } 185 186 // 113-117 are all 1048576 187 for (int i = 113; i <= 117; i++) { 188 expected[i] = 1048576; 189 } 190 191 // One offs 192 expected[112] = 786432; 193 expected[118] = 786432; 194 expected[119] = 1048576; 195 expected[120] = 458752; 196 expected[121] = 524288; 197 expected[122] = 65536; 198 199 // Anything not assigned was the default 0. 200 return expected; 201 } 202 203 static final List<Shard> FOUR_BYTE_SHARDS = 204 generateFourByteShards(128, FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES); 205 generateFourByteShards(int numShards, long[] expected)206 private static List<Shard> generateFourByteShards(int numShards, long[] expected) { 207 assertThat(expected).hasLength(numShards); 208 List<Shard> shards = new ArrayList<>(numShards); 209 long lim = 1L << 32; 210 long increment = lim / numShards; 211 assertThat(lim % numShards).isEqualTo(0); 212 for (int i = 0; i < numShards; i++) { 213 shards.add(new Shard(i, increment * i, increment * (i + 1), expected[i])); 214 } 215 return shards; 216 } 217 218 /** 219 * Helper to run the loop to test all the permutations for the number of bytes specified. 220 * 221 * @param factory the factory for {@link ByteString} instances. 222 * @param numBytes the number of bytes in the byte array 223 * @param expectedCount the expected number of roundtrippable permutations 224 */ testBytes(ByteStringFactory factory, int numBytes, long expectedCount)225 static void testBytes(ByteStringFactory factory, int numBytes, long expectedCount) { 226 testBytes(factory, numBytes, expectedCount, 0, -1); 227 } 228 229 /** 230 * Helper to run the loop to test all the permutations for the number of bytes specified. This 231 * overload is useful for debugging to get the loop to start at a certain character. 232 * 233 * @param factory the factory for {@link ByteString} instances. 234 * @param numBytes the number of bytes in the byte array 235 * @param expectedCount the expected number of roundtrippable permutations 236 * @param start the starting bytes encoded as a long as big-endian 237 * @param lim the limit of bytes to process encoded as a long as big-endian, or -1 to mean the max 238 * limit for numBytes 239 */ testBytes( ByteStringFactory factory, int numBytes, long expectedCount, long start, long lim)240 static void testBytes( 241 ByteStringFactory factory, int numBytes, long expectedCount, long start, long lim) { 242 byte[] bytes = new byte[numBytes]; 243 244 if (lim == -1) { 245 lim = 1L << (numBytes * 8); 246 } 247 long countRoundTripped = 0; 248 for (long byteChar = start; byteChar < lim; byteChar++) { 249 long tmpByteChar = byteChar; 250 for (int i = 0; i < numBytes; i++) { 251 bytes[bytes.length - i - 1] = (byte) tmpByteChar; 252 tmpByteChar = tmpByteChar >> 8; 253 } 254 ByteString bs = factory.newByteString(bytes); 255 boolean isRoundTrippable = bs.isValidUtf8(); 256 String s = new String(bytes, Internal.UTF_8); 257 byte[] bytesReencoded = s.getBytes(Internal.UTF_8); 258 boolean bytesEqual = Arrays.equals(bytes, bytesReencoded); 259 260 if (bytesEqual != isRoundTrippable) { 261 outputFailure(byteChar, bytes, bytesReencoded); 262 } 263 264 // Check agreement with Utf8.isValidUtf8. 265 assertThat(Utf8.isValidUtf8(bytes)).isEqualTo(isRoundTrippable); 266 267 if (isRoundTrippable) { 268 countRoundTripped++; 269 } 270 } 271 assertThat(countRoundTripped).isEqualTo(expectedCount); 272 } 273 outputFailure(long byteChar, byte[] bytes, byte[] after)274 private static void outputFailure(long byteChar, byte[] bytes, byte[] after) { 275 outputFailure(byteChar, bytes, after, after.length); 276 } 277 outputFailure(long byteChar, byte[] bytes, byte[] after, int len)278 private static void outputFailure(long byteChar, byte[] bytes, byte[] after, int len) { 279 assertWithMessage("Failure: (%s) %s => %s", 280 Long.toHexString(byteChar), toHexString(bytes), toHexString(after, len)).fail(); 281 } 282 toHexString(byte[] b)283 private static String toHexString(byte[] b) { 284 return toHexString(b, b.length); 285 } 286 toHexString(byte[] b, int len)287 private static String toHexString(byte[] b, int len) { 288 StringBuilder s = new StringBuilder(); 289 s.append("\""); 290 for (int i = 0; i < len; i++) { 291 if (i > 0) { 292 s.append(" "); 293 } 294 s.append(String.format("%02x", b[i] & 0xFF)); 295 } 296 s.append("\""); 297 return s.toString(); 298 } 299 } 300