• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package com.google.protobuf;
2 
3 import com.google.protobuf.Utf8.Processor;
4 import com.google.protobuf.Utf8.SafeProcessor;
5 import com.google.protobuf.Utf8.UnsafeProcessor;
6 import java.nio.ByteBuffer;
7 import java.util.ArrayList;
8 import java.util.List;
9 import java.util.logging.Logger;
10 import junit.framework.TestCase;
11 
12 public class DecodeUtf8Test extends TestCase {
13   private static Logger logger = Logger.getLogger(DecodeUtf8Test.class.getName());
14 
15   private static final Processor SAFE_PROCESSOR = new SafeProcessor();
16   private static final Processor UNSAFE_PROCESSOR = new UnsafeProcessor();
17 
testRoundTripAllValidChars()18   public void testRoundTripAllValidChars() throws Exception {
19     for (int i = Character.MIN_CODE_POINT; i < Character.MAX_CODE_POINT; i++) {
20       if (i < Character.MIN_SURROGATE || i > Character.MAX_SURROGATE) {
21         String str = new String(Character.toChars(i));
22         assertRoundTrips(str);
23       }
24     }
25   }
26 
27   // Test all 1, 2, 3 invalid byte combinations. Valid ones would have been covered above.
28 
testOneByte()29   public void testOneByte() throws Exception {
30     int valid = 0;
31     for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
32       ByteString bs = ByteString.copyFrom(new byte[] { (byte) i });
33       if (!bs.isValidUtf8()) {
34         assertInvalid(bs.toByteArray());
35       } else {
36         valid++;
37       }
38     }
39     assertEquals(IsValidUtf8TestUtil.EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, valid);
40   }
41 
testTwoBytes()42   public void testTwoBytes() throws Exception {
43     int valid = 0;
44     for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
45       for (int j = Byte.MIN_VALUE; j <= Byte.MAX_VALUE; j++) {
46         ByteString bs = ByteString.copyFrom(new byte[]{(byte) i, (byte) j});
47         if (!bs.isValidUtf8()) {
48           assertInvalid(bs.toByteArray());
49         } else {
50           valid++;
51         }
52       }
53     }
54     assertEquals(IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT, valid);
55   }
56 
testThreeBytes()57   public void testThreeBytes() throws Exception {
58     // Travis' OOM killer doesn't like this test
59     if (System.getenv("TRAVIS") == null) {
60       int count = 0;
61       int valid = 0;
62       for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
63         for (int j = Byte.MIN_VALUE; j <= Byte.MAX_VALUE; j++) {
64           for (int k = Byte.MIN_VALUE; k <= Byte.MAX_VALUE; k++) {
65             byte[] bytes = new byte[]{(byte) i, (byte) j, (byte) k};
66             ByteString bs = ByteString.copyFrom(bytes);
67             if (!bs.isValidUtf8()) {
68               assertInvalid(bytes);
69             } else {
70               valid++;
71             }
72             count++;
73             if (count % 1000000L == 0) {
74               logger.info("Processed " + (count / 1000000L) + " million characters");
75             }
76           }
77         }
78       }
79       assertEquals(IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT, valid);
80     }
81   }
82 
83   /**
84    * Tests that round tripping of a sample of four byte permutations work.
85    */
testInvalid_4BytesSamples()86   public void testInvalid_4BytesSamples() throws Exception {
87     // Bad trailing bytes
88     assertInvalid(0xF0, 0xA4, 0xAD, 0x7F);
89     assertInvalid(0xF0, 0xA4, 0xAD, 0xC0);
90 
91     // Special cases for byte2
92     assertInvalid(0xF0, 0x8F, 0xAD, 0xA2);
93     assertInvalid(0xF4, 0x90, 0xAD, 0xA2);
94   }
95 
testRealStrings()96   public void testRealStrings() throws Exception {
97     // English
98     assertRoundTrips("The quick brown fox jumps over the lazy dog");
99     // German
100     assertRoundTrips("Quizdeltagerne spiste jordb\u00e6r med fl\u00f8de, mens cirkusklovnen");
101     // Japanese
102     assertRoundTrips(
103         "\u3044\u308d\u306f\u306b\u307b\u3078\u3068\u3061\u308a\u306c\u308b\u3092");
104     // Hebrew
105     assertRoundTrips(
106         "\u05d3\u05d2 \u05e1\u05e7\u05e8\u05df \u05e9\u05d8 \u05d1\u05d9\u05dd "
107             + "\u05de\u05d0\u05d5\u05db\u05d6\u05d1 \u05d5\u05dc\u05e4\u05ea\u05e2"
108             + " \u05de\u05e6\u05d0 \u05dc\u05d5 \u05d7\u05d1\u05e8\u05d4 "
109             + "\u05d0\u05d9\u05da \u05d4\u05e7\u05dc\u05d9\u05d8\u05d4");
110     // Thai
111     assertRoundTrips(
112         " \u0e08\u0e07\u0e1d\u0e48\u0e32\u0e1f\u0e31\u0e19\u0e1e\u0e31\u0e12"
113             + "\u0e19\u0e32\u0e27\u0e34\u0e0a\u0e32\u0e01\u0e32\u0e23");
114     // Chinese
115     assertRoundTrips(
116         "\u8fd4\u56de\u94fe\u4e2d\u7684\u4e0b\u4e00\u4e2a\u4ee3\u7406\u9879\u9009\u62e9\u5668");
117     // Chinese with 4-byte chars
118     assertRoundTrips("\uD841\uDF0E\uD841\uDF31\uD841\uDF79\uD843\uDC53\uD843\uDC78"
119         + "\uD843\uDC96\uD843\uDCCF\uD843\uDCD5\uD843\uDD15\uD843\uDD7C\uD843\uDD7F"
120         + "\uD843\uDE0E\uD843\uDE0F\uD843\uDE77\uD843\uDE9D\uD843\uDEA2");
121     // Mixed
122     assertRoundTrips(
123         "The quick brown \u3044\u308d\u306f\u306b\u307b\u3078\u8fd4\u56de\u94fe"
124             + "\u4e2d\u7684\u4e0b\u4e00");
125   }
126 
testOverlong()127   public void testOverlong() throws Exception {
128     assertInvalid(0xc0, 0xaf);
129     assertInvalid(0xe0, 0x80, 0xaf);
130     assertInvalid(0xf0, 0x80, 0x80, 0xaf);
131 
132     // Max overlong
133     assertInvalid(0xc1, 0xbf);
134     assertInvalid(0xe0, 0x9f, 0xbf);
135     assertInvalid(0xf0 ,0x8f, 0xbf, 0xbf);
136 
137     // null overlong
138     assertInvalid(0xc0, 0x80);
139     assertInvalid(0xe0, 0x80, 0x80);
140     assertInvalid(0xf0, 0x80, 0x80, 0x80);
141   }
142 
testIllegalCodepoints()143   public void testIllegalCodepoints() throws Exception {
144     // Single surrogate
145     assertInvalid(0xed, 0xa0, 0x80);
146     assertInvalid(0xed, 0xad, 0xbf);
147     assertInvalid(0xed, 0xae, 0x80);
148     assertInvalid(0xed, 0xaf, 0xbf);
149     assertInvalid(0xed, 0xb0, 0x80);
150     assertInvalid(0xed, 0xbe, 0x80);
151     assertInvalid(0xed, 0xbf, 0xbf);
152 
153     // Paired surrogates
154     assertInvalid(0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80);
155     assertInvalid(0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf);
156     assertInvalid(0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80);
157     assertInvalid(0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf);
158     assertInvalid(0xed, 0xae, 0x80, 0xed, 0xb0, 0x80);
159     assertInvalid(0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf);
160     assertInvalid(0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80);
161     assertInvalid(0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf);
162   }
163 
testBufferSlice()164   public void testBufferSlice() throws Exception {
165     String str = "The quick brown fox jumps over the lazy dog";
166     assertRoundTrips(str, 10, 4);
167     assertRoundTrips(str, str.length(), 0);
168   }
169 
testInvalidBufferSlice()170   public void testInvalidBufferSlice() throws Exception {
171     byte[] bytes  = "The quick brown fox jumps over the lazy dog".getBytes(Internal.UTF_8);
172     assertInvalidSlice(bytes, bytes.length - 3, 4);
173     assertInvalidSlice(bytes, bytes.length, 1);
174     assertInvalidSlice(bytes, bytes.length + 1, 0);
175     assertInvalidSlice(bytes, 0, bytes.length + 1);
176   }
177 
assertInvalid(int... bytesAsInt)178   private void assertInvalid(int... bytesAsInt) throws Exception {
179     byte[] bytes = new byte[bytesAsInt.length];
180     for (int i = 0; i < bytesAsInt.length; i++) {
181       bytes[i] = (byte) bytesAsInt[i];
182     }
183     assertInvalid(bytes);
184   }
185 
assertInvalid(byte[] bytes)186   private void assertInvalid(byte[] bytes) throws Exception {
187     try {
188       UNSAFE_PROCESSOR.decodeUtf8(bytes, 0, bytes.length);
189       fail();
190     } catch (InvalidProtocolBufferException e) {
191       // Expected.
192     }
193     try {
194       SAFE_PROCESSOR.decodeUtf8(bytes, 0, bytes.length);
195       fail();
196     } catch (InvalidProtocolBufferException e) {
197       // Expected.
198     }
199 
200     ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length);
201     direct.put(bytes);
202     direct.flip();
203     try {
204       UNSAFE_PROCESSOR.decodeUtf8(direct, 0, bytes.length);
205       fail();
206     } catch (InvalidProtocolBufferException e) {
207       // Expected.
208     }
209     try {
210       SAFE_PROCESSOR.decodeUtf8(direct, 0, bytes.length);
211       fail();
212     } catch (InvalidProtocolBufferException e) {
213       // Expected.
214     }
215 
216     ByteBuffer heap = ByteBuffer.allocate(bytes.length);
217     heap.put(bytes);
218     heap.flip();
219     try {
220       UNSAFE_PROCESSOR.decodeUtf8(heap, 0, bytes.length);
221       fail();
222     } catch (InvalidProtocolBufferException e) {
223       // Expected.
224     }
225     try {
226       SAFE_PROCESSOR.decodeUtf8(heap, 0, bytes.length);
227       fail();
228     } catch (InvalidProtocolBufferException e) {
229       // Expected.
230     }
231   }
232 
assertInvalidSlice(byte[] bytes, int index, int size)233   private void assertInvalidSlice(byte[] bytes, int index, int size) throws Exception {
234     try {
235       UNSAFE_PROCESSOR.decodeUtf8(bytes, index, size);
236       fail();
237     } catch (ArrayIndexOutOfBoundsException e) {
238       // Expected.
239     }
240     try {
241       SAFE_PROCESSOR.decodeUtf8(bytes, index, size);
242       fail();
243     } catch (ArrayIndexOutOfBoundsException e) {
244       // Expected.
245     }
246 
247     ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length);
248     direct.put(bytes);
249     direct.flip();
250     try {
251       UNSAFE_PROCESSOR.decodeUtf8(direct, index, size);
252       fail();
253     } catch (ArrayIndexOutOfBoundsException e) {
254       // Expected.
255     }
256     try {
257       SAFE_PROCESSOR.decodeUtf8(direct, index, size);
258       fail();
259     } catch (ArrayIndexOutOfBoundsException e) {
260       // Expected.
261     }
262 
263     ByteBuffer heap = ByteBuffer.allocate(bytes.length);
264     heap.put(bytes);
265     heap.flip();
266     try {
267       UNSAFE_PROCESSOR.decodeUtf8(heap, index, size);
268       fail();
269     } catch (ArrayIndexOutOfBoundsException e) {
270       // Expected.
271     }
272     try {
273       SAFE_PROCESSOR.decodeUtf8(heap, index, size);
274       fail();
275     } catch (ArrayIndexOutOfBoundsException e) {
276       // Expected.
277     }
278   }
279 
assertRoundTrips(String str)280   private void assertRoundTrips(String str) throws Exception {
281     assertRoundTrips(str, 0, -1);
282   }
283 
assertRoundTrips(String str, int index, int size)284   private void assertRoundTrips(String str, int index, int size) throws Exception {
285     byte[] bytes = str.getBytes(Internal.UTF_8);
286     if (size == -1) {
287       size = bytes.length;
288     }
289     assertDecode(new String(bytes, index, size, Internal.UTF_8),
290         UNSAFE_PROCESSOR.decodeUtf8(bytes, index, size));
291     assertDecode(new String(bytes, index, size, Internal.UTF_8),
292         SAFE_PROCESSOR.decodeUtf8(bytes, index, size));
293 
294     ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length);
295     direct.put(bytes);
296     direct.flip();
297     assertDecode(new String(bytes, index, size, Internal.UTF_8),
298         UNSAFE_PROCESSOR.decodeUtf8(direct, index, size));
299     assertDecode(new String(bytes, index, size, Internal.UTF_8),
300         SAFE_PROCESSOR.decodeUtf8(direct, index, size));
301 
302     ByteBuffer heap = ByteBuffer.allocate(bytes.length);
303     heap.put(bytes);
304     heap.flip();
305     assertDecode(new String(bytes, index, size, Internal.UTF_8),
306         UNSAFE_PROCESSOR.decodeUtf8(heap, index, size));
307     assertDecode(new String(bytes, index, size, Internal.UTF_8),
308         SAFE_PROCESSOR.decodeUtf8(heap, index, size));
309   }
310 
assertDecode(String expected, String actual)311   private void assertDecode(String expected, String actual) {
312     if (!expected.equals(actual)) {
313       fail("Failure: Expected (" + codepoints(expected) + ") Actual (" + codepoints(actual) + ")");
314     }
315   }
316 
codepoints(String str)317   private List<String> codepoints(String str) {
318     List<String> codepoints = new ArrayList<String>();
319     for (int i = 0; i < str.length(); i++) {
320       codepoints.add(Long.toHexString(str.charAt(i)));
321     }
322     return codepoints;
323   }
324 
325 }
326