• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 package com.google.protobuf;
9 
10 import com.google.protobuf.Utf8.Processor;
11 import com.google.protobuf.Utf8.SafeProcessor;
12 import com.google.protobuf.Utf8.UnsafeProcessor;
13 import java.nio.ByteBuffer;
14 import java.util.ArrayList;
15 import java.util.List;
16 import java.util.logging.Logger;
17 import junit.framework.TestCase;
18 
19 public class DecodeUtf8Test extends TestCase {
20   private static Logger logger = Logger.getLogger(DecodeUtf8Test.class.getName());
21 
22   private static final Processor SAFE_PROCESSOR = new SafeProcessor();
23   private static final Processor UNSAFE_PROCESSOR = new UnsafeProcessor();
24 
testRoundTripAllValidChars()25   public void testRoundTripAllValidChars() throws Exception {
26     for (int i = Character.MIN_CODE_POINT; i < Character.MAX_CODE_POINT; i++) {
27       if (i < Character.MIN_SURROGATE || i > Character.MAX_SURROGATE) {
28         String str = new String(Character.toChars(i));
29         assertRoundTrips(str);
30       }
31     }
32   }
33 
34   // Test all 1, 2, 3 invalid byte combinations. Valid ones would have been covered above.
35 
testOneByte()36   public void testOneByte() throws Exception {
37     int valid = 0;
38     ByteBuffer buffer = ByteBuffer.allocateDirect(1);
39     for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
40       ByteString bs = ByteString.copyFrom(new byte[] {(byte) i});
41       if (bs.isValidUtf8()) {
42         valid++;
43       } else {
44         assertInvalid(bs.toByteArray(), buffer);
45       }
46     }
47     assertEquals(IsValidUtf8TestUtil.EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, valid);
48   }
49 
testTwoBytes()50   public void testTwoBytes() throws Exception {
51     int valid = 0;
52     ByteBuffer buffer = ByteBuffer.allocateDirect(2);
53     for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
54       for (int j = Byte.MIN_VALUE; j <= Byte.MAX_VALUE; j++) {
55         ByteString bs = ByteString.copyFrom(new byte[] {(byte) i, (byte) j});
56         if (bs.isValidUtf8()) {
57           valid++;
58         } else {
59           assertInvalid(bs.toByteArray(), buffer);
60         }
61       }
62     }
63     assertEquals(IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT, valid);
64   }
65 
testThreeBytes()66   public void testThreeBytes() throws Exception {
67     int count = 0;
68     int valid = 0;
69     ByteBuffer buffer = ByteBuffer.allocateDirect(3);
70     for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
71       for (int j = Byte.MIN_VALUE; j <= Byte.MAX_VALUE; j++) {
72         for (int k = Byte.MIN_VALUE; k <= Byte.MAX_VALUE; k++) {
73           byte[] bytes = new byte[] {(byte) i, (byte) j, (byte) k};
74           ByteString bs = ByteString.copyFrom(bytes);
75           if (bs.isValidUtf8()) {
76             valid++;
77           } else {
78             assertInvalid(bytes, buffer);
79           }
80           count++;
81           if (count % 1000000L == 0) {
82             logger.info("Processed " + (count / 1000000L) + " million characters");
83           }
84         }
85       }
86     }
87     assertEquals(IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT, valid);
88   }
89 
90   /** Tests that round tripping of a sample of four byte permutations work. */
testInvalid_4BytesSamples()91   public void testInvalid_4BytesSamples() throws Exception {
92     // Bad trailing bytes
93     assertInvalid(0xF0, 0xA4, 0xAD, 0x7F);
94     assertInvalid(0xF0, 0xA4, 0xAD, 0xC0);
95 
96     // Special cases for byte2
97     assertInvalid(0xF0, 0x8F, 0xAD, 0xA2);
98     assertInvalid(0xF4, 0x90, 0xAD, 0xA2);
99   }
100 
testRealStrings()101   public void testRealStrings() throws Exception {
102     // English
103     assertRoundTrips("The quick brown fox jumps over the lazy dog");
104     // German
105     assertRoundTrips("Quizdeltagerne spiste jordb\u00e6r med fl\u00f8de, mens cirkusklovnen");
106     // Japanese
107     assertRoundTrips("\u3044\u308d\u306f\u306b\u307b\u3078\u3068\u3061\u308a\u306c\u308b\u3092");
108     // Hebrew
109     assertRoundTrips(
110         "\u05d3\u05d2 \u05e1\u05e7\u05e8\u05df \u05e9\u05d8 \u05d1\u05d9\u05dd "
111             + "\u05de\u05d0\u05d5\u05db\u05d6\u05d1 \u05d5\u05dc\u05e4\u05ea\u05e2"
112             + " \u05de\u05e6\u05d0 \u05dc\u05d5 \u05d7\u05d1\u05e8\u05d4 "
113             + "\u05d0\u05d9\u05da \u05d4\u05e7\u05dc\u05d9\u05d8\u05d4");
114     // Thai
115     assertRoundTrips(
116         " \u0e08\u0e07\u0e1d\u0e48\u0e32\u0e1f\u0e31\u0e19\u0e1e\u0e31\u0e12"
117             + "\u0e19\u0e32\u0e27\u0e34\u0e0a\u0e32\u0e01\u0e32\u0e23");
118     // Chinese
119     assertRoundTrips(
120         "\u8fd4\u56de\u94fe\u4e2d\u7684\u4e0b\u4e00\u4e2a\u4ee3\u7406\u9879\u9009\u62e9\u5668");
121     // Chinese with 4-byte chars
122     assertRoundTrips(
123         "\uD841\uDF0E\uD841\uDF31\uD841\uDF79\uD843\uDC53\uD843\uDC78"
124             + "\uD843\uDC96\uD843\uDCCF\uD843\uDCD5\uD843\uDD15\uD843\uDD7C\uD843\uDD7F"
125             + "\uD843\uDE0E\uD843\uDE0F\uD843\uDE77\uD843\uDE9D\uD843\uDEA2");
126     // Mixed
127     assertRoundTrips(
128         "The quick brown \u3044\u308d\u306f\u306b\u307b\u3078\u8fd4\u56de\u94fe"
129             + "\u4e2d\u7684\u4e0b\u4e00");
130   }
131 
testOverlong()132   public void testOverlong() throws Exception {
133     assertInvalid(0xc0, 0xaf);
134     assertInvalid(0xe0, 0x80, 0xaf);
135     assertInvalid(0xf0, 0x80, 0x80, 0xaf);
136 
137     // Max overlong
138     assertInvalid(0xc1, 0xbf);
139     assertInvalid(0xe0, 0x9f, 0xbf);
140     assertInvalid(0xf0, 0x8f, 0xbf, 0xbf);
141 
142     // null overlong
143     assertInvalid(0xc0, 0x80);
144     assertInvalid(0xe0, 0x80, 0x80);
145     assertInvalid(0xf0, 0x80, 0x80, 0x80);
146   }
147 
testIllegalCodepoints()148   public void testIllegalCodepoints() throws Exception {
149     // Single surrogate
150     assertInvalid(0xed, 0xa0, 0x80);
151     assertInvalid(0xed, 0xad, 0xbf);
152     assertInvalid(0xed, 0xae, 0x80);
153     assertInvalid(0xed, 0xaf, 0xbf);
154     assertInvalid(0xed, 0xb0, 0x80);
155     assertInvalid(0xed, 0xbe, 0x80);
156     assertInvalid(0xed, 0xbf, 0xbf);
157 
158     // Paired surrogates
159     assertInvalid(0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80);
160     assertInvalid(0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf);
161     assertInvalid(0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80);
162     assertInvalid(0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf);
163     assertInvalid(0xed, 0xae, 0x80, 0xed, 0xb0, 0x80);
164     assertInvalid(0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf);
165     assertInvalid(0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80);
166     assertInvalid(0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf);
167   }
168 
testBufferSlice()169   public void testBufferSlice() throws Exception {
170     String str = "The quick brown fox jumps over the lazy dog";
171     assertRoundTrips(str, 10, 4);
172     assertRoundTrips(str, str.length(), 0);
173   }
174 
testInvalidBufferSlice()175   public void testInvalidBufferSlice() throws Exception {
176     byte[] bytes = "The quick brown fox jumps over the lazy dog".getBytes(Internal.UTF_8);
177     assertInvalidSlice(bytes, bytes.length - 3, 4);
178     assertInvalidSlice(bytes, bytes.length, 1);
179     assertInvalidSlice(bytes, bytes.length + 1, 0);
180     assertInvalidSlice(bytes, 0, bytes.length + 1);
181   }
182 
assertInvalid(int... bytesAsInt)183   private void assertInvalid(int... bytesAsInt) throws Exception {
184     byte[] bytes = new byte[bytesAsInt.length];
185     for (int i = 0; i < bytesAsInt.length; i++) {
186       bytes[i] = (byte) bytesAsInt[i];
187     }
188     assertInvalid(bytes, null);
189   }
190 
191   // Attempts to decode the byte array in several ways and asserts that it always generates an
192   // exception. Allocating a direct ByteBuffer is slow, so the caller can optionally provide a
193   // buffer to reuse. If buffer is non-null, it must be a direct-allocated ByteBuffer of the
194   // appropriate size.
assertInvalid(byte[] bytes, ByteBuffer buffer)195   private void assertInvalid(byte[] bytes, ByteBuffer buffer) throws Exception {
196     try {
197       UNSAFE_PROCESSOR.decodeUtf8(bytes, 0, bytes.length);
198       fail();
199     } catch (InvalidProtocolBufferException e) {
200       // Expected.
201     }
202     try {
203       SAFE_PROCESSOR.decodeUtf8(bytes, 0, bytes.length);
204       fail();
205     } catch (InvalidProtocolBufferException e) {
206       // Expected.
207     }
208 
209     if (buffer == null) {
210       buffer = ByteBuffer.allocateDirect(bytes.length);
211     }
212     buffer.put(bytes);
213     buffer.flip();
214     try {
215       UNSAFE_PROCESSOR.decodeUtf8(buffer, 0, bytes.length);
216       fail();
217     } catch (InvalidProtocolBufferException e) {
218       // Expected.
219     }
220     try {
221       SAFE_PROCESSOR.decodeUtf8(buffer, 0, bytes.length);
222       fail();
223     } catch (InvalidProtocolBufferException e) {
224       // Expected.
225     }
226     buffer.clear();
227   }
228 
assertInvalidSlice(byte[] bytes, int index, int size)229   private void assertInvalidSlice(byte[] bytes, int index, int size) throws Exception {
230     try {
231       UNSAFE_PROCESSOR.decodeUtf8(bytes, index, size);
232       fail();
233     } catch (IndexOutOfBoundsException e) {
234       // Expected.
235     }
236     try {
237       SAFE_PROCESSOR.decodeUtf8(bytes, index, size);
238       fail();
239     } catch (IndexOutOfBoundsException e) {
240       // Expected.
241     }
242 
243     ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length);
244     direct.put(bytes);
245     direct.flip();
246     try {
247       UNSAFE_PROCESSOR.decodeUtf8(direct, index, size);
248       fail();
249     } catch (IndexOutOfBoundsException e) {
250       // Expected.
251     }
252     try {
253       SAFE_PROCESSOR.decodeUtf8(direct, index, size);
254       fail();
255     } catch (IndexOutOfBoundsException e) {
256       // Expected.
257     }
258 
259     ByteBuffer heap = ByteBuffer.allocate(bytes.length);
260     heap.put(bytes);
261     heap.flip();
262     try {
263       UNSAFE_PROCESSOR.decodeUtf8(heap, index, size);
264       fail();
265     } catch (IndexOutOfBoundsException e) {
266       // Expected.
267     }
268     try {
269       SAFE_PROCESSOR.decodeUtf8(heap, index, size);
270       fail();
271     } catch (IndexOutOfBoundsException e) {
272       // Expected.
273     }
274   }
275 
assertRoundTrips(String str)276   private void assertRoundTrips(String str) throws Exception {
277     assertRoundTrips(str, 0, -1);
278   }
279 
assertRoundTrips(String str, int index, int size)280   private void assertRoundTrips(String str, int index, int size) throws Exception {
281     byte[] bytes = str.getBytes(Internal.UTF_8);
282     if (size == -1) {
283       size = bytes.length;
284     }
285     assertDecode(
286         new String(bytes, index, size, Internal.UTF_8),
287         UNSAFE_PROCESSOR.decodeUtf8(bytes, index, size));
288     assertDecode(
289         new String(bytes, index, size, Internal.UTF_8),
290         SAFE_PROCESSOR.decodeUtf8(bytes, index, size));
291 
292     ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length);
293     direct.put(bytes);
294     direct.flip();
295     assertDecode(
296         new String(bytes, index, size, Internal.UTF_8),
297         UNSAFE_PROCESSOR.decodeUtf8(direct, index, size));
298     assertDecode(
299         new String(bytes, index, size, Internal.UTF_8),
300         SAFE_PROCESSOR.decodeUtf8(direct, index, size));
301 
302     ByteBuffer heap = ByteBuffer.allocate(bytes.length);
303     heap.put(bytes);
304     heap.flip();
305     assertDecode(
306         new String(bytes, index, size, Internal.UTF_8),
307         UNSAFE_PROCESSOR.decodeUtf8(heap, index, size));
308     assertDecode(
309         new String(bytes, index, size, Internal.UTF_8),
310         SAFE_PROCESSOR.decodeUtf8(heap, index, size));
311   }
312 
assertDecode(String expected, String actual)313   private void assertDecode(String expected, String actual) {
314     if (!expected.equals(actual)) {
315       fail("Failure: Expected (" + codepoints(expected) + ") Actual (" + codepoints(actual) + ")");
316     }
317   }
318 
codepoints(String str)319   private List<String> codepoints(String str) {
320     List<String> codepoints = new ArrayList<String>();
321     for (int i = 0; i < str.length(); i++) {
322       codepoints.add(Long.toHexString(str.charAt(i)));
323     }
324     return codepoints;
325   }
326 }
327