1 package com.google.protobuf; 2 3 import com.google.protobuf.Utf8.Processor; 4 import com.google.protobuf.Utf8.SafeProcessor; 5 import com.google.protobuf.Utf8.UnsafeProcessor; 6 import java.nio.ByteBuffer; 7 import java.util.ArrayList; 8 import java.util.List; 9 import java.util.logging.Logger; 10 import junit.framework.TestCase; 11 12 public class DecodeUtf8Test extends TestCase { 13 private static Logger logger = Logger.getLogger(DecodeUtf8Test.class.getName()); 14 15 private static final Processor SAFE_PROCESSOR = new SafeProcessor(); 16 private static final Processor UNSAFE_PROCESSOR = new UnsafeProcessor(); 17 testRoundTripAllValidChars()18 public void testRoundTripAllValidChars() throws Exception { 19 for (int i = Character.MIN_CODE_POINT; i < Character.MAX_CODE_POINT; i++) { 20 if (i < Character.MIN_SURROGATE || i > Character.MAX_SURROGATE) { 21 String str = new String(Character.toChars(i)); 22 assertRoundTrips(str); 23 } 24 } 25 } 26 27 // Test all 1, 2, 3 invalid byte combinations. Valid ones would have been covered above. 28 testOneByte()29 public void testOneByte() throws Exception { 30 int valid = 0; 31 for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) { 32 ByteString bs = ByteString.copyFrom(new byte[] { (byte) i }); 33 if (!bs.isValidUtf8()) { 34 assertInvalid(bs.toByteArray()); 35 } else { 36 valid++; 37 } 38 } 39 assertEquals(IsValidUtf8TestUtil.EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, valid); 40 } 41 testTwoBytes()42 public void testTwoBytes() throws Exception { 43 int valid = 0; 44 for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) { 45 for (int j = Byte.MIN_VALUE; j <= Byte.MAX_VALUE; j++) { 46 ByteString bs = ByteString.copyFrom(new byte[]{(byte) i, (byte) j}); 47 if (!bs.isValidUtf8()) { 48 assertInvalid(bs.toByteArray()); 49 } else { 50 valid++; 51 } 52 } 53 } 54 assertEquals(IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT, valid); 55 } 56 testThreeBytes()57 public void testThreeBytes() throws Exception { 58 // Travis' OOM killer doesn't like this test 59 if (System.getenv("TRAVIS") == null) { 60 int count = 0; 61 int valid = 0; 62 for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) { 63 for (int j = Byte.MIN_VALUE; j <= Byte.MAX_VALUE; j++) { 64 for (int k = Byte.MIN_VALUE; k <= Byte.MAX_VALUE; k++) { 65 byte[] bytes = new byte[]{(byte) i, (byte) j, (byte) k}; 66 ByteString bs = ByteString.copyFrom(bytes); 67 if (!bs.isValidUtf8()) { 68 assertInvalid(bytes); 69 } else { 70 valid++; 71 } 72 count++; 73 if (count % 1000000L == 0) { 74 logger.info("Processed " + (count / 1000000L) + " million characters"); 75 } 76 } 77 } 78 } 79 assertEquals(IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT, valid); 80 } 81 } 82 83 /** 84 * Tests that round tripping of a sample of four byte permutations work. 85 */ testInvalid_4BytesSamples()86 public void testInvalid_4BytesSamples() throws Exception { 87 // Bad trailing bytes 88 assertInvalid(0xF0, 0xA4, 0xAD, 0x7F); 89 assertInvalid(0xF0, 0xA4, 0xAD, 0xC0); 90 91 // Special cases for byte2 92 assertInvalid(0xF0, 0x8F, 0xAD, 0xA2); 93 assertInvalid(0xF4, 0x90, 0xAD, 0xA2); 94 } 95 testRealStrings()96 public void testRealStrings() throws Exception { 97 // English 98 assertRoundTrips("The quick brown fox jumps over the lazy dog"); 99 // German 100 assertRoundTrips("Quizdeltagerne spiste jordb\u00e6r med fl\u00f8de, mens cirkusklovnen"); 101 // Japanese 102 assertRoundTrips( 103 "\u3044\u308d\u306f\u306b\u307b\u3078\u3068\u3061\u308a\u306c\u308b\u3092"); 104 // Hebrew 105 assertRoundTrips( 106 "\u05d3\u05d2 \u05e1\u05e7\u05e8\u05df \u05e9\u05d8 \u05d1\u05d9\u05dd " 107 + "\u05de\u05d0\u05d5\u05db\u05d6\u05d1 \u05d5\u05dc\u05e4\u05ea\u05e2" 108 + " \u05de\u05e6\u05d0 \u05dc\u05d5 \u05d7\u05d1\u05e8\u05d4 " 109 + "\u05d0\u05d9\u05da \u05d4\u05e7\u05dc\u05d9\u05d8\u05d4"); 110 // Thai 111 assertRoundTrips( 112 " \u0e08\u0e07\u0e1d\u0e48\u0e32\u0e1f\u0e31\u0e19\u0e1e\u0e31\u0e12" 113 + "\u0e19\u0e32\u0e27\u0e34\u0e0a\u0e32\u0e01\u0e32\u0e23"); 114 // Chinese 115 assertRoundTrips( 116 "\u8fd4\u56de\u94fe\u4e2d\u7684\u4e0b\u4e00\u4e2a\u4ee3\u7406\u9879\u9009\u62e9\u5668"); 117 // Chinese with 4-byte chars 118 assertRoundTrips("\uD841\uDF0E\uD841\uDF31\uD841\uDF79\uD843\uDC53\uD843\uDC78" 119 + "\uD843\uDC96\uD843\uDCCF\uD843\uDCD5\uD843\uDD15\uD843\uDD7C\uD843\uDD7F" 120 + "\uD843\uDE0E\uD843\uDE0F\uD843\uDE77\uD843\uDE9D\uD843\uDEA2"); 121 // Mixed 122 assertRoundTrips( 123 "The quick brown \u3044\u308d\u306f\u306b\u307b\u3078\u8fd4\u56de\u94fe" 124 + "\u4e2d\u7684\u4e0b\u4e00"); 125 } 126 testOverlong()127 public void testOverlong() throws Exception { 128 assertInvalid(0xc0, 0xaf); 129 assertInvalid(0xe0, 0x80, 0xaf); 130 assertInvalid(0xf0, 0x80, 0x80, 0xaf); 131 132 // Max overlong 133 assertInvalid(0xc1, 0xbf); 134 assertInvalid(0xe0, 0x9f, 0xbf); 135 assertInvalid(0xf0 ,0x8f, 0xbf, 0xbf); 136 137 // null overlong 138 assertInvalid(0xc0, 0x80); 139 assertInvalid(0xe0, 0x80, 0x80); 140 assertInvalid(0xf0, 0x80, 0x80, 0x80); 141 } 142 testIllegalCodepoints()143 public void testIllegalCodepoints() throws Exception { 144 // Single surrogate 145 assertInvalid(0xed, 0xa0, 0x80); 146 assertInvalid(0xed, 0xad, 0xbf); 147 assertInvalid(0xed, 0xae, 0x80); 148 assertInvalid(0xed, 0xaf, 0xbf); 149 assertInvalid(0xed, 0xb0, 0x80); 150 assertInvalid(0xed, 0xbe, 0x80); 151 assertInvalid(0xed, 0xbf, 0xbf); 152 153 // Paired surrogates 154 assertInvalid(0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80); 155 assertInvalid(0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf); 156 assertInvalid(0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80); 157 assertInvalid(0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf); 158 assertInvalid(0xed, 0xae, 0x80, 0xed, 0xb0, 0x80); 159 assertInvalid(0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf); 160 assertInvalid(0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80); 161 assertInvalid(0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf); 162 } 163 testBufferSlice()164 public void testBufferSlice() throws Exception { 165 String str = "The quick brown fox jumps over the lazy dog"; 166 assertRoundTrips(str, 10, 4); 167 assertRoundTrips(str, str.length(), 0); 168 } 169 testInvalidBufferSlice()170 public void testInvalidBufferSlice() throws Exception { 171 byte[] bytes = "The quick brown fox jumps over the lazy dog".getBytes(Internal.UTF_8); 172 assertInvalidSlice(bytes, bytes.length - 3, 4); 173 assertInvalidSlice(bytes, bytes.length, 1); 174 assertInvalidSlice(bytes, bytes.length + 1, 0); 175 assertInvalidSlice(bytes, 0, bytes.length + 1); 176 } 177 assertInvalid(int... bytesAsInt)178 private void assertInvalid(int... bytesAsInt) throws Exception { 179 byte[] bytes = new byte[bytesAsInt.length]; 180 for (int i = 0; i < bytesAsInt.length; i++) { 181 bytes[i] = (byte) bytesAsInt[i]; 182 } 183 assertInvalid(bytes); 184 } 185 assertInvalid(byte[] bytes)186 private void assertInvalid(byte[] bytes) throws Exception { 187 try { 188 UNSAFE_PROCESSOR.decodeUtf8(bytes, 0, bytes.length); 189 fail(); 190 } catch (InvalidProtocolBufferException e) { 191 // Expected. 192 } 193 try { 194 SAFE_PROCESSOR.decodeUtf8(bytes, 0, bytes.length); 195 fail(); 196 } catch (InvalidProtocolBufferException e) { 197 // Expected. 198 } 199 200 ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length); 201 direct.put(bytes); 202 direct.flip(); 203 try { 204 UNSAFE_PROCESSOR.decodeUtf8(direct, 0, bytes.length); 205 fail(); 206 } catch (InvalidProtocolBufferException e) { 207 // Expected. 208 } 209 try { 210 SAFE_PROCESSOR.decodeUtf8(direct, 0, bytes.length); 211 fail(); 212 } catch (InvalidProtocolBufferException e) { 213 // Expected. 214 } 215 216 ByteBuffer heap = ByteBuffer.allocate(bytes.length); 217 heap.put(bytes); 218 heap.flip(); 219 try { 220 UNSAFE_PROCESSOR.decodeUtf8(heap, 0, bytes.length); 221 fail(); 222 } catch (InvalidProtocolBufferException e) { 223 // Expected. 224 } 225 try { 226 SAFE_PROCESSOR.decodeUtf8(heap, 0, bytes.length); 227 fail(); 228 } catch (InvalidProtocolBufferException e) { 229 // Expected. 230 } 231 } 232 assertInvalidSlice(byte[] bytes, int index, int size)233 private void assertInvalidSlice(byte[] bytes, int index, int size) throws Exception { 234 try { 235 UNSAFE_PROCESSOR.decodeUtf8(bytes, index, size); 236 fail(); 237 } catch (ArrayIndexOutOfBoundsException e) { 238 // Expected. 239 } 240 try { 241 SAFE_PROCESSOR.decodeUtf8(bytes, index, size); 242 fail(); 243 } catch (ArrayIndexOutOfBoundsException e) { 244 // Expected. 245 } 246 247 ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length); 248 direct.put(bytes); 249 direct.flip(); 250 try { 251 UNSAFE_PROCESSOR.decodeUtf8(direct, index, size); 252 fail(); 253 } catch (ArrayIndexOutOfBoundsException e) { 254 // Expected. 255 } 256 try { 257 SAFE_PROCESSOR.decodeUtf8(direct, index, size); 258 fail(); 259 } catch (ArrayIndexOutOfBoundsException e) { 260 // Expected. 261 } 262 263 ByteBuffer heap = ByteBuffer.allocate(bytes.length); 264 heap.put(bytes); 265 heap.flip(); 266 try { 267 UNSAFE_PROCESSOR.decodeUtf8(heap, index, size); 268 fail(); 269 } catch (ArrayIndexOutOfBoundsException e) { 270 // Expected. 271 } 272 try { 273 SAFE_PROCESSOR.decodeUtf8(heap, index, size); 274 fail(); 275 } catch (ArrayIndexOutOfBoundsException e) { 276 // Expected. 277 } 278 } 279 assertRoundTrips(String str)280 private void assertRoundTrips(String str) throws Exception { 281 assertRoundTrips(str, 0, -1); 282 } 283 assertRoundTrips(String str, int index, int size)284 private void assertRoundTrips(String str, int index, int size) throws Exception { 285 byte[] bytes = str.getBytes(Internal.UTF_8); 286 if (size == -1) { 287 size = bytes.length; 288 } 289 assertDecode(new String(bytes, index, size, Internal.UTF_8), 290 UNSAFE_PROCESSOR.decodeUtf8(bytes, index, size)); 291 assertDecode(new String(bytes, index, size, Internal.UTF_8), 292 SAFE_PROCESSOR.decodeUtf8(bytes, index, size)); 293 294 ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length); 295 direct.put(bytes); 296 direct.flip(); 297 assertDecode(new String(bytes, index, size, Internal.UTF_8), 298 UNSAFE_PROCESSOR.decodeUtf8(direct, index, size)); 299 assertDecode(new String(bytes, index, size, Internal.UTF_8), 300 SAFE_PROCESSOR.decodeUtf8(direct, index, size)); 301 302 ByteBuffer heap = ByteBuffer.allocate(bytes.length); 303 heap.put(bytes); 304 heap.flip(); 305 assertDecode(new String(bytes, index, size, Internal.UTF_8), 306 UNSAFE_PROCESSOR.decodeUtf8(heap, index, size)); 307 assertDecode(new String(bytes, index, size, Internal.UTF_8), 308 SAFE_PROCESSOR.decodeUtf8(heap, index, size)); 309 } 310 assertDecode(String expected, String actual)311 private void assertDecode(String expected, String actual) { 312 if (!expected.equals(actual)) { 313 fail("Failure: Expected (" + codepoints(expected) + ") Actual (" + codepoints(actual) + ")"); 314 } 315 } 316 codepoints(String str)317 private List<String> codepoints(String str) { 318 List<String> codepoints = new ArrayList<String>(); 319 for (int i = 0; i < str.length(); i++) { 320 codepoints.add(Long.toHexString(str.charAt(i))); 321 } 322 return codepoints; 323 } 324 325 } 326