1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import static com.google.protobuf.IsValidUtf8TestUtil.DIRECT_NIO_FACTORY; 34 import static com.google.protobuf.IsValidUtf8TestUtil.EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT; 35 import static com.google.protobuf.IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT; 36 import static com.google.protobuf.IsValidUtf8TestUtil.HEAP_NIO_FACTORY; 37 import static com.google.protobuf.IsValidUtf8TestUtil.LITERAL_FACTORY; 38 import static com.google.protobuf.IsValidUtf8TestUtil.testBytes; 39 40 import com.google.protobuf.IsValidUtf8TestUtil.ByteStringFactory; 41 import com.google.protobuf.IsValidUtf8TestUtil.Shard; 42 import junit.framework.TestCase; 43 44 /** 45 * Tests cases for {@link ByteString#isValidUtf8()}. This includes three brute force tests that 46 * actually test every permutation of one byte, two byte, and three byte sequences to ensure that 47 * the method produces the right result for every possible byte encoding where "right" means it's 48 * consistent with java's UTF-8 string encoding/decoding such that the method returns true for any 49 * sequence that will round trip when converted to a String and then back to bytes and will return 50 * false for any sequence that will not round trip. See also {@link IsValidUtf8FourByteTest}. It 51 * also includes some other more targeted tests. 52 * 53 * @author jonp@google.com (Jon Perlow) 54 * @author martinrb@google.com (Martin Buchholz) 55 */ 56 public class IsValidUtf8Test extends TestCase { 57 /** Tests that round tripping of all two byte permutations work. */ testIsValidUtf8_1Byte()58 public void testIsValidUtf8_1Byte() { 59 testBytes(LITERAL_FACTORY, 1, EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT); 60 testBytes(HEAP_NIO_FACTORY, 1, EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT); 61 testBytes(DIRECT_NIO_FACTORY, 1, EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT); 62 } 63 64 /** Tests that round tripping of all two byte permutations work. */ testIsValidUtf8_2Bytes()65 public void testIsValidUtf8_2Bytes() { 66 testBytes(LITERAL_FACTORY, 2, IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT); 67 testBytes(HEAP_NIO_FACTORY, 2, IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT); 68 testBytes(DIRECT_NIO_FACTORY, 2, IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT); 69 } 70 71 /** Tests that round tripping of all three byte permutations work. */ testIsValidUtf8_3Bytes()72 public void testIsValidUtf8_3Bytes() { 73 // Travis' OOM killer doesn't like this test 74 if (System.getenv("TRAVIS") == null) { 75 testBytes(LITERAL_FACTORY, 3, EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT); 76 testBytes(HEAP_NIO_FACTORY, 3, EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT); 77 testBytes(DIRECT_NIO_FACTORY, 3, EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT); 78 } 79 } 80 81 /** 82 * Tests that round tripping of a sample of four byte permutations work. All permutations are 83 * prohibitively expensive to test for automated runs; {@link IsValidUtf8FourByteTest} is used for 84 * full coverage. This method tests specific four-byte cases. 85 */ testIsValidUtf8_4BytesSamples()86 public void testIsValidUtf8_4BytesSamples() { 87 // Valid 4 byte. 88 assertValidUtf8(0xF0, 0xA4, 0xAD, 0xA2); 89 90 // Bad trailing bytes 91 assertInvalidUtf8(0xF0, 0xA4, 0xAD, 0x7F); 92 assertInvalidUtf8(0xF0, 0xA4, 0xAD, 0xC0); 93 94 // Special cases for byte2 95 assertInvalidUtf8(0xF0, 0x8F, 0xAD, 0xA2); 96 assertInvalidUtf8(0xF4, 0x90, 0xAD, 0xA2); 97 } 98 99 /** Tests some hard-coded test cases. */ testSomeSequences()100 public void testSomeSequences() { 101 // Empty 102 assertTrue(asBytes("").isValidUtf8()); 103 104 // One-byte characters, including control characters 105 assertTrue(asBytes("\u0000abc\u007f").isValidUtf8()); 106 107 // Two-byte characters 108 assertTrue(asBytes("\u00a2\u00a2").isValidUtf8()); 109 110 // Three-byte characters 111 assertTrue(asBytes("\u020ac\u020ac").isValidUtf8()); 112 113 // Four-byte characters 114 assertTrue(asBytes("\u024B62\u024B62").isValidUtf8()); 115 116 // Mixed string 117 assertTrue(asBytes("a\u020ac\u00a2b\\u024B62u020acc\u00a2de\u024B62").isValidUtf8()); 118 119 // Not a valid string 120 assertInvalidUtf8(-1, 0, -1, 0); 121 } 122 toByteArray(int... bytes)123 private byte[] toByteArray(int... bytes) { 124 byte[] realBytes = new byte[bytes.length]; 125 for (int i = 0; i < bytes.length; i++) { 126 realBytes[i] = (byte) bytes[i]; 127 } 128 return realBytes; 129 } 130 assertValidUtf8(ByteStringFactory factory, int[] bytes, boolean not)131 private void assertValidUtf8(ByteStringFactory factory, int[] bytes, boolean not) { 132 byte[] realBytes = toByteArray(bytes); 133 assertTrue(not ^ Utf8.isValidUtf8(realBytes)); 134 assertTrue(not ^ Utf8.isValidUtf8(realBytes, 0, bytes.length)); 135 ByteString leaf = factory.newByteString(realBytes); 136 ByteString sub = leaf.substring(0, bytes.length); 137 assertTrue(not ^ leaf.isValidUtf8()); 138 assertTrue(not ^ sub.isValidUtf8()); 139 ByteString[] ropes = { 140 RopeByteString.newInstanceForTest(ByteString.EMPTY, leaf), 141 RopeByteString.newInstanceForTest(ByteString.EMPTY, sub), 142 RopeByteString.newInstanceForTest(leaf, ByteString.EMPTY), 143 RopeByteString.newInstanceForTest(sub, ByteString.EMPTY), 144 RopeByteString.newInstanceForTest(sub, leaf) 145 }; 146 for (ByteString rope : ropes) { 147 assertTrue(not ^ rope.isValidUtf8()); 148 } 149 } 150 assertValidUtf8(int... bytes)151 private void assertValidUtf8(int... bytes) { 152 assertValidUtf8(LITERAL_FACTORY, bytes, false); 153 assertValidUtf8(HEAP_NIO_FACTORY, bytes, false); 154 assertValidUtf8(DIRECT_NIO_FACTORY, bytes, false); 155 } 156 assertInvalidUtf8(int... bytes)157 private void assertInvalidUtf8(int... bytes) { 158 assertValidUtf8(LITERAL_FACTORY, bytes, true); 159 assertValidUtf8(HEAP_NIO_FACTORY, bytes, true); 160 assertValidUtf8(DIRECT_NIO_FACTORY, bytes, true); 161 } 162 asBytes(String s)163 private static ByteString asBytes(String s) { 164 return ByteString.copyFromUtf8(s); 165 } 166 testShardsHaveExpectedRoundTrippables()167 public void testShardsHaveExpectedRoundTrippables() { 168 // A sanity check. 169 int actual = 0; 170 for (Shard shard : IsValidUtf8TestUtil.FOUR_BYTE_SHARDS) { 171 actual = (int) (actual + shard.expected); 172 } 173 assertEquals(IsValidUtf8TestUtil.EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT, actual); 174 } 175 } 176