1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import java.nio.ByteBuffer; 34 import java.util.Arrays; 35 import java.util.Random; 36 import java.util.regex.Pattern; 37 import junit.framework.TestCase; 38 39 /** Unit tests for {@link Utf8}. */ 40 public class Utf8Test extends TestCase { 41 private static final int NUM_CHARS = 16384; 42 43 private static final Utf8.Processor safeProcessor = new Utf8.SafeProcessor(); 44 private static final Utf8.Processor unsafeProcessor = new Utf8.UnsafeProcessor(); 45 testEncode()46 public void testEncode() { 47 assertEncoding(randomString(0x80)); 48 assertEncoding(randomString(0x90)); 49 assertEncoding(randomString(0x800)); 50 assertEncoding(randomString(0x10000)); 51 assertEncoding(randomString(0x10ffff)); 52 } 53 testEncode_insufficientSpace()54 public void testEncode_insufficientSpace() { 55 assertEncoding_insufficientSpace(randomString(0x80)); 56 assertEncoding_insufficientSpace(randomString(0x90)); 57 assertEncoding_insufficientSpace(randomString(0x800)); 58 assertEncoding_insufficientSpace(randomString(0x10000)); 59 assertEncoding_insufficientSpace(randomString(0x10ffff)); 60 } 61 testValid()62 public void testValid() { 63 assertIsValid(new byte[] {(byte) 0xE0, (byte) 0xB9, (byte) 0x96}, true); 64 assertIsValid(new byte[] {(byte) 0xF0, (byte) 0xB2, (byte) 0x83, (byte) 0xBC}, true); 65 } 66 testOverlongIsInvalid()67 public void testOverlongIsInvalid() { 68 assertIsValid(new byte[] {(byte) 0xC0, (byte) 0x81}, false); 69 assertIsValid(new byte[] {(byte) 0xE0, (byte) 0x81, (byte) 0x81}, false); 70 assertIsValid(new byte[] {(byte) 0xF0, (byte) 0x81, (byte) 0x81, (byte) 0x81}, false); 71 } 72 testMaxCodepointExceeded()73 public void testMaxCodepointExceeded() { 74 // byte1 > 0xF4 75 assertIsValid(new byte[] {(byte) 0xF5, (byte) 0x81, (byte) 0x81, (byte) 0x81}, false); 76 } 77 testInvalidSurrogateCodepoint()78 public void testInvalidSurrogateCodepoint() { 79 assertIsValid(new byte[] {(byte) 0xED, (byte) 0xA1, (byte) 0x81}, false); 80 81 // byte1 == 0xF0 && byte2 < 0x90 82 assertIsValid(new byte[] {(byte) 0xF0, (byte) 0x81, (byte) 0x81, (byte) 0x81}, false); 83 // byte1 == 0xF4 && byte2 > 0x8F 84 assertIsValid(new byte[] {(byte) 0xF4, (byte) 0x90, (byte) 0x81, (byte) 0x81}, false); 85 } 86 randomString(int maxCodePoint)87 private static String randomString(int maxCodePoint) { 88 final long seed = 99; 89 final Random rnd = new Random(seed); 90 StringBuilder sb = new StringBuilder(); 91 for (int j = 0; j < NUM_CHARS; j++) { 92 int codePoint; 93 do { 94 codePoint = rnd.nextInt(maxCodePoint); 95 } while (Utf8Utils.isSurrogate(codePoint)); 96 sb.appendCodePoint(codePoint); 97 } 98 return sb.toString(); 99 } 100 assertIsValid(byte[] data, boolean valid)101 private static void assertIsValid(byte[] data, boolean valid) { 102 assertEquals("isValidUtf8[ARRAY]", valid, safeProcessor.isValidUtf8(data, 0, data.length)); 103 assertEquals( 104 "isValidUtf8[ARRAY_UNSAFE]", valid, unsafeProcessor.isValidUtf8(data, 0, data.length)); 105 106 ByteBuffer buffer = ByteBuffer.wrap(data); 107 assertEquals( 108 "isValidUtf8[NIO_HEAP]", 109 valid, 110 safeProcessor.isValidUtf8(buffer, buffer.position(), buffer.remaining())); 111 112 // Direct buffers. 113 buffer = ByteBuffer.allocateDirect(data.length); 114 buffer.put(data); 115 buffer.flip(); 116 assertEquals( 117 "isValidUtf8[NIO_DEFAULT]", 118 valid, 119 safeProcessor.isValidUtf8(buffer, buffer.position(), buffer.remaining())); 120 assertEquals( 121 "isValidUtf8[NIO_UNSAFE]", 122 valid, 123 unsafeProcessor.isValidUtf8(buffer, buffer.position(), buffer.remaining())); 124 } 125 assertEncoding(String message)126 private static void assertEncoding(String message) { 127 byte[] expected = message.getBytes(Internal.UTF_8); 128 byte[] output = encodeToByteArray(message, expected.length, safeProcessor); 129 assertTrue("encodeUtf8[ARRAY]", Arrays.equals(expected, output)); 130 131 output = encodeToByteArray(message, expected.length, unsafeProcessor); 132 assertTrue("encodeUtf8[ARRAY_UNSAFE]", Arrays.equals(expected, output)); 133 134 output = encodeToByteBuffer(message, expected.length, false, safeProcessor); 135 assertTrue("encodeUtf8[NIO_HEAP]", Arrays.equals(expected, output)); 136 137 output = encodeToByteBuffer(message, expected.length, true, safeProcessor); 138 assertTrue("encodeUtf8[NIO_DEFAULT]", Arrays.equals(expected, output)); 139 140 output = encodeToByteBuffer(message, expected.length, true, unsafeProcessor); 141 assertTrue("encodeUtf8[NIO_UNSAFE]", Arrays.equals(expected, output)); 142 } 143 assertEncoding_insufficientSpace(String message)144 private void assertEncoding_insufficientSpace(String message) { 145 final int length = message.length() - 1; 146 Class<ArrayIndexOutOfBoundsException> clazz = ArrayIndexOutOfBoundsException.class; 147 148 try { 149 encodeToByteArray(message, length, safeProcessor); 150 fail("Expected " + clazz.getSimpleName()); 151 } catch (Throwable t) { 152 // Expected 153 assertExceptionType(t, clazz); 154 // byte[] + safeProcessor will not exit early. We can't match the message since we don't 155 // know which char/index due to random input. 156 } 157 158 try { 159 encodeToByteArray(message, length, unsafeProcessor); 160 fail("Expected " + clazz.getSimpleName()); 161 } catch (Throwable t) { 162 assertExceptionType(t, clazz); 163 // byte[] + unsafeProcessor will exit early, so we have can match the message. 164 assertExceptionMessage(t, length); 165 } 166 167 try { 168 encodeToByteBuffer(message, length, false, safeProcessor); 169 fail("Expected " + clazz.getSimpleName()); 170 } catch (Throwable t) { 171 // Expected 172 assertExceptionType(t, clazz); 173 // ByteBuffer + safeProcessor will not exit early. We can't match the message since we don't 174 // know which char/index due to random input. 175 } 176 177 try { 178 encodeToByteBuffer(message, length, true, safeProcessor); 179 fail("Expected " + clazz.getSimpleName()); 180 } catch (Throwable t) { 181 // Expected 182 assertExceptionType(t, clazz); 183 // ByteBuffer + safeProcessor will not exit early. We can't match the message since we don't 184 // know which char/index due to random input. 185 } 186 187 try { 188 encodeToByteBuffer(message, length, true, unsafeProcessor); 189 fail("Expected " + clazz.getSimpleName()); 190 } catch (Throwable t) { 191 // Expected 192 assertExceptionType(t, clazz); 193 // Direct ByteBuffer + unsafeProcessor will exit early if it's not on Android, so we can 194 // match the message. On Android, a direct ByteBuffer will have hasArray() being true and 195 // it will take a different code path and produces a different message. 196 if (!Android.isOnAndroidDevice()) { 197 assertExceptionMessage(t, length); 198 } 199 } 200 } 201 encodeToByteArray(String message, int length, Utf8.Processor processor)202 private static byte[] encodeToByteArray(String message, int length, Utf8.Processor processor) { 203 byte[] output = new byte[length]; 204 processor.encodeUtf8(message, output, 0, output.length); 205 return output; 206 } 207 encodeToByteBuffer( String message, int length, boolean direct, Utf8.Processor processor)208 private static byte[] encodeToByteBuffer( 209 String message, int length, boolean direct, Utf8.Processor processor) { 210 ByteBuffer buffer = direct ? ByteBuffer.allocateDirect(length) : ByteBuffer.allocate(length); 211 212 processor.encodeUtf8(message, buffer); 213 buffer.flip(); 214 215 byte[] output = new byte[buffer.remaining()]; 216 buffer.get(output); 217 return output; 218 } 219 assertExceptionType(Throwable t, Class<T> expected)220 private <T extends Throwable> void assertExceptionType(Throwable t, Class<T> expected) { 221 if (!expected.isAssignableFrom(t.getClass())) { 222 fail("Expected " + expected.getSimpleName() + ", but found " + t.getClass().getSimpleName()); 223 } 224 } 225 assertExceptionMessage(Throwable t, int index)226 private void assertExceptionMessage(Throwable t, int index) { 227 String pattern = "Failed writing (.) at index " + index; 228 assertTrue( 229 t.getMessage() + " does not match pattern " + pattern, 230 Pattern.matches(pattern, t.getMessage())); 231 } 232 } 233