• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 package com.google.protobuf;
32 
33 import java.nio.ByteBuffer;
34 import java.util.Arrays;
35 import java.util.Random;
36 import java.util.regex.Pattern;
37 import junit.framework.TestCase;
38 
39 /** Unit tests for {@link Utf8}. */
40 public class Utf8Test extends TestCase {
41   private static final int NUM_CHARS = 16384;
42 
43   private static final Utf8.Processor safeProcessor = new Utf8.SafeProcessor();
44   private static final Utf8.Processor unsafeProcessor = new Utf8.UnsafeProcessor();
45 
testEncode()46   public void testEncode() {
47     assertEncoding(randomString(0x80));
48     assertEncoding(randomString(0x90));
49     assertEncoding(randomString(0x800));
50     assertEncoding(randomString(0x10000));
51     assertEncoding(randomString(0x10ffff));
52   }
53 
testEncode_insufficientSpace()54   public void testEncode_insufficientSpace() {
55     assertEncoding_insufficientSpace(randomString(0x80));
56     assertEncoding_insufficientSpace(randomString(0x90));
57     assertEncoding_insufficientSpace(randomString(0x800));
58     assertEncoding_insufficientSpace(randomString(0x10000));
59     assertEncoding_insufficientSpace(randomString(0x10ffff));
60   }
61 
testValid()62   public void testValid() {
63     assertIsValid(new byte[] {(byte) 0xE0, (byte) 0xB9, (byte) 0x96}, true);
64     assertIsValid(new byte[] {(byte) 0xF0, (byte) 0xB2, (byte) 0x83, (byte) 0xBC}, true);
65   }
66 
testOverlongIsInvalid()67   public void testOverlongIsInvalid() {
68     assertIsValid(new byte[] {(byte) 0xC0, (byte) 0x81}, false);
69     assertIsValid(new byte[] {(byte) 0xE0, (byte) 0x81, (byte) 0x81}, false);
70     assertIsValid(new byte[] {(byte) 0xF0, (byte) 0x81, (byte) 0x81, (byte) 0x81}, false);
71   }
72 
testMaxCodepointExceeded()73   public void testMaxCodepointExceeded() {
74     // byte1 > 0xF4
75     assertIsValid(new byte[] {(byte) 0xF5, (byte) 0x81, (byte) 0x81, (byte) 0x81}, false);
76   }
77 
testInvalidSurrogateCodepoint()78   public void testInvalidSurrogateCodepoint() {
79     assertIsValid(new byte[] {(byte) 0xED, (byte) 0xA1, (byte) 0x81}, false);
80 
81     // byte1 == 0xF0 && byte2 < 0x90
82     assertIsValid(new byte[] {(byte) 0xF0, (byte) 0x81, (byte) 0x81, (byte) 0x81}, false);
83     // byte1 == 0xF4 && byte2 > 0x8F
84     assertIsValid(new byte[] {(byte) 0xF4, (byte) 0x90, (byte) 0x81, (byte) 0x81}, false);
85   }
86 
randomString(int maxCodePoint)87   private static String randomString(int maxCodePoint) {
88     final long seed = 99;
89     final Random rnd = new Random(seed);
90     StringBuilder sb = new StringBuilder();
91     for (int j = 0; j < NUM_CHARS; j++) {
92       int codePoint;
93       do {
94         codePoint = rnd.nextInt(maxCodePoint);
95       } while (Utf8Utils.isSurrogate(codePoint));
96       sb.appendCodePoint(codePoint);
97     }
98     return sb.toString();
99   }
100 
assertIsValid(byte[] data, boolean valid)101   private static void assertIsValid(byte[] data, boolean valid) {
102     assertEquals("isValidUtf8[ARRAY]", valid, safeProcessor.isValidUtf8(data, 0, data.length));
103     assertEquals(
104         "isValidUtf8[ARRAY_UNSAFE]", valid, unsafeProcessor.isValidUtf8(data, 0, data.length));
105 
106     ByteBuffer buffer = ByteBuffer.wrap(data);
107     assertEquals(
108         "isValidUtf8[NIO_HEAP]",
109         valid,
110         safeProcessor.isValidUtf8(buffer, buffer.position(), buffer.remaining()));
111 
112     // Direct buffers.
113     buffer = ByteBuffer.allocateDirect(data.length);
114     buffer.put(data);
115     buffer.flip();
116     assertEquals(
117         "isValidUtf8[NIO_DEFAULT]",
118         valid,
119         safeProcessor.isValidUtf8(buffer, buffer.position(), buffer.remaining()));
120     assertEquals(
121         "isValidUtf8[NIO_UNSAFE]",
122         valid,
123         unsafeProcessor.isValidUtf8(buffer, buffer.position(), buffer.remaining()));
124   }
125 
assertEncoding(String message)126   private static void assertEncoding(String message) {
127     byte[] expected = message.getBytes(Internal.UTF_8);
128     byte[] output = encodeToByteArray(message, expected.length, safeProcessor);
129     assertTrue("encodeUtf8[ARRAY]", Arrays.equals(expected, output));
130 
131     output = encodeToByteArray(message, expected.length, unsafeProcessor);
132     assertTrue("encodeUtf8[ARRAY_UNSAFE]", Arrays.equals(expected, output));
133 
134     output = encodeToByteBuffer(message, expected.length, false, safeProcessor);
135     assertTrue("encodeUtf8[NIO_HEAP]", Arrays.equals(expected, output));
136 
137     output = encodeToByteBuffer(message, expected.length, true, safeProcessor);
138     assertTrue("encodeUtf8[NIO_DEFAULT]", Arrays.equals(expected, output));
139 
140     output = encodeToByteBuffer(message, expected.length, true, unsafeProcessor);
141     assertTrue("encodeUtf8[NIO_UNSAFE]", Arrays.equals(expected, output));
142   }
143 
assertEncoding_insufficientSpace(String message)144   private void assertEncoding_insufficientSpace(String message) {
145     final int length = message.length() - 1;
146     Class<ArrayIndexOutOfBoundsException> clazz = ArrayIndexOutOfBoundsException.class;
147 
148     try {
149       encodeToByteArray(message, length, safeProcessor);
150       fail("Expected " + clazz.getSimpleName());
151     } catch (Throwable t) {
152       // Expected
153       assertExceptionType(t, clazz);
154       // byte[] + safeProcessor will not exit early. We can't match the message since we don't
155       // know which char/index due to random input.
156     }
157 
158     try {
159       encodeToByteArray(message, length, unsafeProcessor);
160       fail("Expected " + clazz.getSimpleName());
161     } catch (Throwable t) {
162       assertExceptionType(t, clazz);
163       // byte[] + unsafeProcessor will exit early, so we have can match the message.
164       assertExceptionMessage(t, length);
165     }
166 
167     try {
168       encodeToByteBuffer(message, length, false, safeProcessor);
169       fail("Expected " + clazz.getSimpleName());
170     } catch (Throwable t) {
171       // Expected
172       assertExceptionType(t, clazz);
173       // ByteBuffer + safeProcessor will not exit early. We can't match the message since we don't
174       // know which char/index due to random input.
175     }
176 
177     try {
178       encodeToByteBuffer(message, length, true, safeProcessor);
179       fail("Expected " + clazz.getSimpleName());
180     } catch (Throwable t) {
181       // Expected
182       assertExceptionType(t, clazz);
183       // ByteBuffer + safeProcessor will not exit early. We can't match the message since we don't
184       // know which char/index due to random input.
185     }
186 
187     try {
188       encodeToByteBuffer(message, length, true, unsafeProcessor);
189       fail("Expected " + clazz.getSimpleName());
190     } catch (Throwable t) {
191       // Expected
192       assertExceptionType(t, clazz);
193       // Direct ByteBuffer + unsafeProcessor will exit early if it's not on Android, so we can
194       // match the message. On Android, a direct ByteBuffer will have hasArray() being true and
195       // it will take a different code path and produces a different message.
196       if (!Android.isOnAndroidDevice()) {
197         assertExceptionMessage(t, length);
198       }
199     }
200   }
201 
encodeToByteArray(String message, int length, Utf8.Processor processor)202   private static byte[] encodeToByteArray(String message, int length, Utf8.Processor processor) {
203     byte[] output = new byte[length];
204     processor.encodeUtf8(message, output, 0, output.length);
205     return output;
206   }
207 
encodeToByteBuffer( String message, int length, boolean direct, Utf8.Processor processor)208   private static byte[] encodeToByteBuffer(
209       String message, int length, boolean direct, Utf8.Processor processor) {
210     ByteBuffer buffer = direct ? ByteBuffer.allocateDirect(length) : ByteBuffer.allocate(length);
211 
212     processor.encodeUtf8(message, buffer);
213     buffer.flip();
214 
215     byte[] output = new byte[buffer.remaining()];
216     buffer.get(output);
217     return output;
218   }
219 
assertExceptionType(Throwable t, Class<T> expected)220   private <T extends Throwable> void assertExceptionType(Throwable t, Class<T> expected) {
221     if (!expected.isAssignableFrom(t.getClass())) {
222       fail("Expected " + expected.getSimpleName() + ", but found " + t.getClass().getSimpleName());
223     }
224   }
225 
assertExceptionMessage(Throwable t, int index)226   private void assertExceptionMessage(Throwable t, int index) {
227     String pattern = "Failed writing (.) at index " + index;
228     assertTrue(
229         t.getMessage() + " does not match pattern " + pattern,
230         Pattern.matches(pattern, t.getMessage()));
231   }
232 }
233