• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 package com.google.protobuf;
9 
10 import static com.google.common.truth.Truth.assertThat;
11 import static com.google.common.truth.Truth.assertWithMessage;
12 
13 import java.lang.ref.SoftReference;
14 import java.nio.ByteBuffer;
15 import java.util.ArrayList;
16 import java.util.Arrays;
17 import java.util.List;
18 import java.util.Random;
19 
20 /**
21  * Shared testing code for {@link IsValidUtf8Test} and {@link IsValidUtf8FourByteTest}.
22  *
23  * @author jonp@google.com (Jon Perlow)
24  * @author martinrb@google.com (Martin Buchholz)
25  */
26 final class IsValidUtf8TestUtil {
IsValidUtf8TestUtil()27   private IsValidUtf8TestUtil() {}
28 
29   static interface ByteStringFactory {
newByteString(byte[] bytes)30     ByteString newByteString(byte[] bytes);
31   }
32 
33   static final ByteStringFactory LITERAL_FACTORY =
34       new ByteStringFactory() {
35         @Override
36         public ByteString newByteString(byte[] bytes) {
37           return ByteString.wrap(bytes);
38         }
39       };
40 
41   static final ByteStringFactory HEAP_NIO_FACTORY =
42       new ByteStringFactory() {
43         @Override
44         public ByteString newByteString(byte[] bytes) {
45           return ByteString.nioByteString(ByteBuffer.wrap(bytes));
46         }
47       };
48 
49   private static final ThreadLocal<SoftReference<ByteBuffer>> directBuffer = new ThreadLocal<>();
50 
51   /**
52    * Factory for direct {@link ByteBuffer} instances. To reduce direct memory usage, this uses a
53    * thread local direct buffer. This means that each call will overwrite the buffer's contents from
54    * the previous call, so the calling code must be careful not to continue using a buffer returned
55    * from a previous invocation.
56    */
57   static final ByteStringFactory DIRECT_NIO_FACTORY =
58       new ByteStringFactory() {
59         @Override
60         public ByteString newByteString(byte[] bytes) {
61           SoftReference<ByteBuffer> ref = directBuffer.get();
62           ByteBuffer buffer = ref == null ? null : ref.get();
63           if (buffer == null || buffer.capacity() < bytes.length) {
64             buffer = ByteBuffer.allocateDirect(bytes.length);
65             directBuffer.set(new SoftReference<ByteBuffer>(buffer));
66           }
67           buffer.clear();
68           buffer.put(bytes);
69           buffer.flip();
70           return ByteString.nioByteString(buffer);
71         }
72       };
73 
74   static final ByteStringFactory ROPE_FACTORY =
75       new ByteStringFactory() {
76         // Seed the random number generator with 0 so that the tests are deterministic.
77         private final Random random = new Random(0);
78 
79         @Override
80         public ByteString newByteString(byte[] bytes) {
81           // We split the byte array into three pieces (some possibly empty) by choosing two random
82           // cut points i and j.
83           int i = random.nextInt(bytes.length);
84           int j = random.nextInt(bytes.length);
85           if (j < i) {
86             int tmp = i;
87             i = j;
88             j = tmp;
89           }
90           return RopeByteString.newInstanceForTest(
91               ByteString.wrap(bytes, 0, i),
92               RopeByteString.newInstanceForTest(
93                   ByteString.wrap(bytes, i, j - i), ByteString.wrap(bytes, j, bytes.length - j)));
94         }
95       };
96 
97   // 128 - [chars 0x0000 to 0x007f]
98   static final long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1;
99 
100   // 128
101   static final long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT = ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
102 
103   // 1920 [chars 0x0080 to 0x07FF]
104   static final long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1;
105 
106   // 18,304
107   static final long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT =
108       // Both bytes are one byte characters
109       (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 2)
110           +
111           // The possible number of two byte characters
112           TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS;
113 
114   // 2048
115   static final long THREE_BYTE_SURROGATES = 2 * 1024;
116 
117   // 61,440 [chars 0x0800 to 0xFFFF, minus surrogates]
118   static final long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS =
119       0xFFFF - 0x0800 + 1 - THREE_BYTE_SURROGATES;
120 
121   // 2,650,112
122   static final long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT =
123       // All one byte characters
124       (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 3)
125           +
126           // One two byte character and a one byte character
127           2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS
128           +
129           // Three byte characters
130           THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
131 
132   // 1,048,576 [chars 0x10000L to 0x10FFFF]
133   static final long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1;
134 
135   // 289,571,839
136   static final long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT =
137       // All one byte characters
138       (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 4)
139           +
140           // One and three byte characters
141           2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS
142           +
143           // Two two byte characters
144           TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS
145           +
146           // Permutations of one and two byte characters
147           3
148               * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS
149               * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS
150               * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS
151           +
152           // Four byte characters
153           FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS;
154 
155   static final class Shard {
156     final long index;
157     final long start;
158     final long lim;
159     final long expected;
160 
Shard(long index, long start, long lim, long expected)161     public Shard(long index, long start, long lim, long expected) {
162       assertThat(start).isLessThan(lim);
163       this.index = index;
164       this.start = start;
165       this.lim = lim;
166       this.expected = expected;
167     }
168   }
169 
170   static final long[] FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES =
171       generateFourByteShardsExpectedRunnables();
172 
generateFourByteShardsExpectedRunnables()173   private static long[] generateFourByteShardsExpectedRunnables() {
174     long[] expected = new long[128];
175 
176     // 0-63 are all 5300224
177     for (int i = 0; i <= 63; i++) {
178       expected[i] = 5300224;
179     }
180 
181     // 97-111 are all 2342912
182     for (int i = 97; i <= 111; i++) {
183       expected[i] = 2342912;
184     }
185 
186     // 113-117 are all 1048576
187     for (int i = 113; i <= 117; i++) {
188       expected[i] = 1048576;
189     }
190 
191     // One offs
192     expected[112] = 786432;
193     expected[118] = 786432;
194     expected[119] = 1048576;
195     expected[120] = 458752;
196     expected[121] = 524288;
197     expected[122] = 65536;
198 
199     // Anything not assigned was the default 0.
200     return expected;
201   }
202 
203   static final List<Shard> FOUR_BYTE_SHARDS =
204       generateFourByteShards(128, FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES);
205 
generateFourByteShards(int numShards, long[] expected)206   private static List<Shard> generateFourByteShards(int numShards, long[] expected) {
207     assertThat(expected).hasLength(numShards);
208     List<Shard> shards = new ArrayList<>(numShards);
209     long lim = 1L << 32;
210     long increment = lim / numShards;
211     assertThat(lim % numShards).isEqualTo(0);
212     for (int i = 0; i < numShards; i++) {
213       shards.add(new Shard(i, increment * i, increment * (i + 1), expected[i]));
214     }
215     return shards;
216   }
217 
218   /**
219    * Helper to run the loop to test all the permutations for the number of bytes specified.
220    *
221    * @param factory the factory for {@link ByteString} instances.
222    * @param numBytes the number of bytes in the byte array
223    * @param expectedCount the expected number of roundtrippable permutations
224    */
testBytes(ByteStringFactory factory, int numBytes, long expectedCount)225   static void testBytes(ByteStringFactory factory, int numBytes, long expectedCount) {
226     testBytes(factory, numBytes, expectedCount, 0, -1);
227   }
228 
229   /**
230    * Helper to run the loop to test all the permutations for the number of bytes specified. This
231    * overload is useful for debugging to get the loop to start at a certain character.
232    *
233    * @param factory the factory for {@link ByteString} instances.
234    * @param numBytes the number of bytes in the byte array
235    * @param expectedCount the expected number of roundtrippable permutations
236    * @param start the starting bytes encoded as a long as big-endian
237    * @param lim the limit of bytes to process encoded as a long as big-endian, or -1 to mean the max
238    *     limit for numBytes
239    */
testBytes( ByteStringFactory factory, int numBytes, long expectedCount, long start, long lim)240   static void testBytes(
241       ByteStringFactory factory, int numBytes, long expectedCount, long start, long lim) {
242     byte[] bytes = new byte[numBytes];
243 
244     if (lim == -1) {
245       lim = 1L << (numBytes * 8);
246     }
247     long countRoundTripped = 0;
248     for (long byteChar = start; byteChar < lim; byteChar++) {
249       long tmpByteChar = byteChar;
250       for (int i = 0; i < numBytes; i++) {
251         bytes[bytes.length - i - 1] = (byte) tmpByteChar;
252         tmpByteChar = tmpByteChar >> 8;
253       }
254       ByteString bs = factory.newByteString(bytes);
255       boolean isRoundTrippable = bs.isValidUtf8();
256       String s = new String(bytes, Internal.UTF_8);
257       byte[] bytesReencoded = s.getBytes(Internal.UTF_8);
258       boolean bytesEqual = Arrays.equals(bytes, bytesReencoded);
259 
260       if (bytesEqual != isRoundTrippable) {
261         outputFailure(byteChar, bytes, bytesReencoded);
262       }
263 
264       // Check agreement with Utf8.isValidUtf8.
265       assertThat(Utf8.isValidUtf8(bytes)).isEqualTo(isRoundTrippable);
266 
267       if (isRoundTrippable) {
268         countRoundTripped++;
269       }
270     }
271     assertThat(countRoundTripped).isEqualTo(expectedCount);
272   }
273 
outputFailure(long byteChar, byte[] bytes, byte[] after)274   private static void outputFailure(long byteChar, byte[] bytes, byte[] after) {
275     outputFailure(byteChar, bytes, after, after.length);
276   }
277 
outputFailure(long byteChar, byte[] bytes, byte[] after, int len)278   private static void outputFailure(long byteChar, byte[] bytes, byte[] after, int len) {
279     assertWithMessage("Failure: (%s) %s => %s",
280             Long.toHexString(byteChar), toHexString(bytes), toHexString(after, len)).fail();
281   }
282 
toHexString(byte[] b)283   private static String toHexString(byte[] b) {
284     return toHexString(b, b.length);
285   }
286 
toHexString(byte[] b, int len)287   private static String toHexString(byte[] b, int len) {
288     StringBuilder s = new StringBuilder();
289     s.append("\"");
290     for (int i = 0; i < len; i++) {
291       if (i > 0) {
292         s.append(" ");
293       }
294       s.append(String.format("%02x", b[i] & 0xFF));
295     }
296     s.append("\"");
297     return s.toString();
298   }
299 }
300