• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 package com.google.protobuf;
9 
10 import static com.google.common.truth.Truth.assertThat;
11 import static com.google.common.truth.Truth.assertWithMessage;
12 
13 import java.nio.ByteBuffer;
14 import java.util.Random;
15 import org.junit.Test;
16 import org.junit.runner.RunWith;
17 import org.junit.runners.JUnit4;
18 
19 @RunWith(JUnit4.class)
20 public class Utf8Test {
21   private static final int NUM_CHARS = 16384;
22 
23   private static final Utf8.Processor safeProcessor = new Utf8.SafeProcessor();
24   private static final Utf8.Processor unsafeProcessor = new Utf8.UnsafeProcessor();
25 
26   @Test
testEncode()27   public void testEncode() {
28     assertEncoding(randomString(0x80));
29     assertEncoding(randomString(0x90));
30     assertEncoding(randomString(0x800));
31     assertEncoding(randomString(0x10000));
32     assertEncoding(randomString(0x10ffff));
33   }
34 
35   @Test
testEncode_insufficientSpace()36   public void testEncode_insufficientSpace() {
37     assertEncoding_insufficientSpace(randomString(0x80));
38     assertEncoding_insufficientSpace(randomString(0x90));
39     assertEncoding_insufficientSpace(randomString(0x800));
40     assertEncoding_insufficientSpace(randomString(0x10000));
41     assertEncoding_insufficientSpace(randomString(0x10ffff));
42   }
43 
44   @Test
testValid()45   public void testValid() {
46     assertIsValid(new byte[] {(byte) 0xE0, (byte) 0xB9, (byte) 0x96}, true);
47     assertIsValid(new byte[] {(byte) 0xF0, (byte) 0xB2, (byte) 0x83, (byte) 0xBC}, true);
48   }
49 
50   @Test
testOverlongIsInvalid()51   public void testOverlongIsInvalid() {
52     assertIsValid(new byte[] {(byte) 0xC0, (byte) 0x81}, false);
53     assertIsValid(new byte[] {(byte) 0xE0, (byte) 0x81, (byte) 0x81}, false);
54     assertIsValid(new byte[] {(byte) 0xF0, (byte) 0x81, (byte) 0x81, (byte) 0x81}, false);
55   }
56 
57   @Test
testMaxCodepointExceeded()58   public void testMaxCodepointExceeded() {
59     // byte1 > 0xF4
60     assertIsValid(new byte[] {(byte) 0xF5, (byte) 0x81, (byte) 0x81, (byte) 0x81}, false);
61   }
62 
63   @Test
testInvalidSurrogateCodepoint()64   public void testInvalidSurrogateCodepoint() {
65     assertIsValid(new byte[] {(byte) 0xED, (byte) 0xA1, (byte) 0x81}, false);
66 
67     // byte1 == 0xF0 && byte2 < 0x90
68     assertIsValid(new byte[] {(byte) 0xF0, (byte) 0x81, (byte) 0x81, (byte) 0x81}, false);
69     // byte1 == 0xF4 && byte2 > 0x8F
70     assertIsValid(new byte[] {(byte) 0xF4, (byte) 0x90, (byte) 0x81, (byte) 0x81}, false);
71   }
72 
randomString(int maxCodePoint)73   private static String randomString(int maxCodePoint) {
74     final long seed = 99;
75     final Random rnd = new Random(seed);
76     StringBuilder sb = new StringBuilder();
77     for (int j = 0; j < NUM_CHARS; j++) {
78       int codePoint;
79       do {
80         codePoint = rnd.nextInt(maxCodePoint);
81       } while (Character.isSurrogate((char) codePoint));
82       sb.appendCodePoint(codePoint);
83     }
84     return sb.toString();
85   }
86 
assertIsValid(byte[] data, boolean valid)87   private static void assertIsValid(byte[] data, boolean valid) {
88     assertWithMessage("isValidUtf8[ARRAY]")
89         .that(safeProcessor.isValidUtf8(data, 0, data.length))
90         .isEqualTo(valid);
91     assertWithMessage("isValidUtf8[ARRAY_UNSAFE]")
92         .that(unsafeProcessor.isValidUtf8(data, 0, data.length))
93         .isEqualTo(valid);
94 
95     ByteBuffer buffer = ByteBuffer.wrap(data);
96     assertWithMessage("isValidUtf8[NIO_HEAP]")
97         .that(safeProcessor.isValidUtf8(buffer, buffer.position(), buffer.remaining()))
98         .isEqualTo(valid);
99 
100     // Direct buffers.
101     buffer = ByteBuffer.allocateDirect(data.length);
102     buffer.put(data);
103     buffer.flip();
104     assertWithMessage("isValidUtf8[NIO_DEFAULT]")
105         .that(safeProcessor.isValidUtf8(buffer, buffer.position(), buffer.remaining()))
106         .isEqualTo(valid);
107     assertWithMessage("isValidUtf8[NIO_UNSAFE]")
108         .that(unsafeProcessor.isValidUtf8(buffer, buffer.position(), buffer.remaining()))
109         .isEqualTo(valid);
110   }
111 
assertEncoding(String message)112   private static void assertEncoding(String message) {
113     byte[] expected = message.getBytes(Internal.UTF_8);
114     byte[] output = encodeToByteArray(message, expected.length, safeProcessor);
115     assertWithMessage("encodeUtf8[ARRAY]")
116         .that(output).isEqualTo(expected);
117 
118     output = encodeToByteArray(message, expected.length, unsafeProcessor);
119     assertWithMessage("encodeUtf8[ARRAY_UNSAFE]")
120         .that(output).isEqualTo(expected);
121 
122     output = encodeToByteBuffer(message, expected.length, false, safeProcessor);
123     assertWithMessage("encodeUtf8[NIO_HEAP]")
124         .that(output).isEqualTo(expected);
125 
126     output = encodeToByteBuffer(message, expected.length, true, safeProcessor);
127     assertWithMessage("encodeUtf8[NIO_DEFAULT]")
128         .that(output).isEqualTo(expected);
129 
130     output = encodeToByteBuffer(message, expected.length, true, unsafeProcessor);
131     assertWithMessage("encodeUtf8[NIO_UNSAFE]")
132         .that(output).isEqualTo(expected);
133   }
134 
assertEncoding_insufficientSpace(String message)135   private void assertEncoding_insufficientSpace(String message) {
136     final int length = message.length() - 1;
137     Class<ArrayIndexOutOfBoundsException> clazz = ArrayIndexOutOfBoundsException.class;
138 
139     try {
140       encodeToByteArray(message, length, safeProcessor);
141       assertWithMessage("Expected " + clazz.getSimpleName()).fail();
142     } catch (Throwable t) {
143       // Expected
144       assertThat(t).isInstanceOf(clazz);
145       // byte[] + safeProcessor will not exit early. We can't match the message since we don't
146       // know which char/index due to random input.
147     }
148 
149     try {
150       encodeToByteArray(message, length, unsafeProcessor);
151       assertWithMessage("Expected " + clazz.getSimpleName()).fail();
152     } catch (Throwable t) {
153       assertThat(t).isInstanceOf(clazz);
154       // byte[] + unsafeProcessor will exit early, so we have can match the message.
155       String pattern = "Failed writing (.) at index " + length;
156       assertThat(t).hasMessageThat().matches(pattern);
157     }
158 
159     try {
160       encodeToByteBuffer(message, length, false, safeProcessor);
161       assertWithMessage("Expected " + clazz.getSimpleName()).fail();
162     } catch (Throwable t) {
163       // Expected
164       assertThat(t).isInstanceOf(clazz);
165       // ByteBuffer + safeProcessor will not exit early. We can't match the message since we don't
166       // know which char/index due to random input.
167     }
168 
169     try {
170       encodeToByteBuffer(message, length, true, safeProcessor);
171       assertWithMessage("Expected " + clazz.getSimpleName()).fail();
172     } catch (Throwable t) {
173       // Expected
174       assertThat(t).isInstanceOf(clazz);
175       // ByteBuffer + safeProcessor will not exit early. We can't match the message since we don't
176       // know which char/index due to random input.
177     }
178 
179     try {
180       encodeToByteBuffer(message, length, true, unsafeProcessor);
181       assertWithMessage("Expected " + clazz.getSimpleName()).fail();
182     } catch (Throwable t) {
183       // Expected
184       assertThat(t).isInstanceOf(clazz);
185       // Direct ByteBuffer + unsafeProcessor will exit early if it's not on Android, so we can
186       // match the message. On Android, a direct ByteBuffer will have hasArray() being true and
187       // it will take a different code path and produces a different message.
188       if (!Android.isOnAndroidDevice()) {
189         String pattern = "Failed writing (.) at index " + length;
190         assertThat(t).hasMessageThat().matches(pattern);
191       }
192     }
193   }
194 
encodeToByteArray(String message, int length, Utf8.Processor processor)195   private static byte[] encodeToByteArray(String message, int length, Utf8.Processor processor) {
196     byte[] output = new byte[length];
197     int unused = processor.encodeUtf8(message, output, 0, output.length);
198     return output;
199   }
200 
encodeToByteBuffer( String message, int length, boolean direct, Utf8.Processor processor)201   private static byte[] encodeToByteBuffer(
202       String message, int length, boolean direct, Utf8.Processor processor) {
203     ByteBuffer buffer = direct ? ByteBuffer.allocateDirect(length) : ByteBuffer.allocate(length);
204 
205     processor.encodeUtf8(message, buffer);
206     buffer.flip();
207 
208     byte[] output = new byte[buffer.remaining()];
209     buffer.get(output);
210     return output;
211   }
212 
213 }
214