1 // Copyright 2011 Google Inc. All Rights Reserved. 2 3 package com.google.common.hash; 4 5 import static com.google.common.base.Charsets.ISO_8859_1; 6 import static com.google.common.base.Charsets.UTF_8; 7 import static com.google.common.truth.Truth.assertThat; 8 9 import com.google.common.base.Strings; 10 import com.google.common.collect.ImmutableSortedMap; 11 import com.google.common.collect.Ordering; 12 import com.google.common.primitives.UnsignedLong; 13 import java.util.Arrays; 14 import junit.framework.TestCase; 15 16 /** 17 * Unit test for Fingerprint2011. 18 * 19 * @author kylemaddison@google.com (Kyle Maddison) 20 */ 21 public class Fingerprint2011Test extends TestCase { 22 23 // Length of the sample string to produce 24 private static final int MAX_BYTES = 1000; 25 26 // Map from sample string lengths to the fingerprint 27 private static final ImmutableSortedMap<Integer, Long> LENGTH_FINGERPRINTS = 28 new ImmutableSortedMap.Builder<Integer, Long>(Ordering.natural()) 29 .put(1000, 0x433109b33e13e6edL) 30 .put(800, 0x5f2f123bfc815f81L) 31 .put(640, 0x6396fc6a67293cf4L) 32 .put(512, 0x45c01b4934ddbbbeL) 33 .put(409, 0xfcd19b617551db45L) 34 .put(327, 0x4eee69e12854871eL) 35 .put(261, 0xab753446a3bbd532L) 36 .put(208, 0x54242fe06a291c3fL) 37 .put(166, 0x4f7acff7703a635bL) 38 .put(132, 0xa784bd0a1f22cc7fL) 39 .put(105, 0xf19118e187456638L) 40 .put(84, 0x3e2e58f9196abfe5L) 41 .put(67, 0xd38ae3dec0107aeaL) 42 .put(53, 0xea3033885868e10eL) 43 .put(42, 0x1394a146d0d7e04bL) 44 .put(33, 0x9962499315d2e8daL) 45 .put(26, 0x0849f5cfa85489b5L) 46 .put(20, 0x83b395ff19bf2171L) 47 .put(16, 0x9d33dd141bd55d9aL) 48 .put(12, 0x196248eb0b02466aL) 49 .put(9, 0x1cf73a50ff120336L) 50 .put(7, 0xb451c339457dbf51L) 51 .put(5, 0x681982c5e7b74064L) 52 .put(4, 0xc5ce47450ca6c021L) 53 .put(3, 0x9fcc3c3fde4d5ff7L) 54 .put(2, 0x090966a836e5fa4bL) 55 .put(1, 0x8199675ecaa6fe64L) 56 .put(0, 0x23ad7c904aa665e3L) 57 .build(); 58 private static final HashFunction HASH_FN = Hashing.fingerprint2011(); 59 60 // If this test fails, all bets are off testReallySimpleFingerprints()61 public void testReallySimpleFingerprints() { 62 assertEquals(8473225671271759044L, fingerprint("test".getBytes(UTF_8))); 63 // 32 characters long 64 assertEquals(7345148637025587076L, fingerprint(Strings.repeat("test", 8).getBytes(UTF_8))); 65 // 256 characters long 66 assertEquals(4904844928629814570L, fingerprint(Strings.repeat("test", 64).getBytes(UTF_8))); 67 } 68 testStringsConsistency()69 public void testStringsConsistency() { 70 for (String s : Arrays.asList("", "some", "test", "strings", "to", "try")) { 71 assertEquals(HASH_FN.newHasher().putUnencodedChars(s).hash(), HASH_FN.hashUnencodedChars(s)); 72 } 73 } 74 testUtf8()75 public void testUtf8() { 76 char[] charsA = new char[128]; 77 char[] charsB = new char[128]; 78 79 for (int i = 0; i < charsA.length; i++) { 80 if (i < 100) { 81 charsA[i] = 'a'; 82 charsB[i] = 'a'; 83 } else { 84 // Both two-byte characters, but must be different 85 charsA[i] = (char) (0x0180 + i); 86 charsB[i] = (char) (0x0280 + i); 87 } 88 } 89 90 String stringA = new String(charsA); 91 String stringB = new String(charsB); 92 assertThat(stringA).isNotEqualTo(stringB); 93 assertThat(HASH_FN.hashUnencodedChars(stringA)) 94 .isNotEqualTo(HASH_FN.hashUnencodedChars(stringB)); 95 assertThat(fingerprint(stringA.getBytes(UTF_8))) 96 .isNotEqualTo(fingerprint(stringB.getBytes(UTF_8))); 97 98 // ISO 8859-1 only has 0-255 (ubyte) representation so throws away UTF-8 characters 99 // greater than 127 (ie with their top bit set). 100 // Don't attempt to do this in real code. 101 assertEquals( 102 fingerprint(stringA.getBytes(ISO_8859_1)), fingerprint(stringB.getBytes(ISO_8859_1))); 103 } 104 testMumurHash64()105 public void testMumurHash64() { 106 byte[] bytes = "test".getBytes(UTF_8); 107 assertEquals( 108 1618900948208871284L, Fingerprint2011.murmurHash64WithSeed(bytes, 0, bytes.length, 1)); 109 110 bytes = "test test test".getBytes(UTF_8); 111 assertEquals( 112 UnsignedLong.valueOf("12313169684067793560").longValue(), 113 Fingerprint2011.murmurHash64WithSeed(bytes, 0, bytes.length, 1)); 114 } 115 testPutNonChars()116 public void testPutNonChars() { 117 Hasher hasher = HASH_FN.newHasher(); 118 // Expected data is 0x0100010100000000 119 hasher 120 .putBoolean(true) 121 .putBoolean(true) 122 .putBoolean(false) 123 .putBoolean(true) 124 .putBoolean(false) 125 .putBoolean(false) 126 .putBoolean(false) 127 .putBoolean(false); 128 final long hashCode = hasher.hash().asLong(); 129 130 hasher = HASH_FN.newHasher(); 131 hasher 132 .putByte((byte) 0x01) 133 .putByte((byte) 0x01) 134 .putByte((byte) 0x00) 135 .putByte((byte) 0x01) 136 .putByte((byte) 0x00) 137 .putByte((byte) 0x00) 138 .putByte((byte) 0x00) 139 .putByte((byte) 0x00); 140 assertEquals(hashCode, hasher.hash().asLong()); 141 142 hasher = HASH_FN.newHasher(); 143 hasher 144 .putChar((char) 0x0101) 145 .putChar((char) 0x0100) 146 .putChar((char) 0x0000) 147 .putChar((char) 0x0000); 148 assertEquals(hashCode, hasher.hash().asLong()); 149 150 hasher = HASH_FN.newHasher(); 151 hasher.putBytes(new byte[] {0x01, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00}); 152 assertEquals(hashCode, hasher.hash().asLong()); 153 154 hasher = HASH_FN.newHasher(); 155 hasher.putLong(0x0000000001000101L); 156 assertEquals(hashCode, hasher.hash().asLong()); 157 158 hasher = HASH_FN.newHasher(); 159 hasher 160 .putShort((short) 0x0101) 161 .putShort((short) 0x0100) 162 .putShort((short) 0x0000) 163 .putShort((short) 0x0000); 164 assertEquals(hashCode, hasher.hash().asLong()); 165 } 166 testHashFloatIsStable()167 public void testHashFloatIsStable() { 168 // This is about the best we can do for floating-point 169 Hasher hasher = HASH_FN.newHasher(); 170 hasher.putFloat(0x01000101f).putFloat(0f); 171 assertEquals(0x96a4f8cc6ecbf16L, hasher.hash().asLong()); 172 173 hasher = HASH_FN.newHasher(); 174 hasher.putDouble(0x0000000001000101d); 175 assertEquals(0xcf54171253fdc198L, hasher.hash().asLong()); 176 } 177 178 /** Convenience method to compute a fingerprint on a full bytes array. */ fingerprint(byte[] bytes)179 private static long fingerprint(byte[] bytes) { 180 return fingerprint(bytes, bytes.length); 181 } 182 183 /** Convenience method to compute a fingerprint on a subset of a byte array. */ fingerprint(byte[] bytes, int length)184 private static long fingerprint(byte[] bytes, int length) { 185 return HASH_FN.hashBytes(bytes, 0, length).asLong(); 186 } 187 188 /** 189 * Tests that the Java port of Fingerprint2011 provides the same results on buffers up to 800 190 * bytes long as the original implementation in C++. See http://cl/106539598 191 */ testMultipleLengths()192 public void testMultipleLengths() { 193 int iterations = 800; 194 byte[] buf = new byte[iterations * 4]; 195 int bufLen = 0; 196 long h = 0; 197 for (int i = 0; i < iterations; ++i) { 198 h ^= fingerprint(buf, i); 199 h = remix(h); 200 buf[bufLen++] = getChar(h); 201 202 h ^= fingerprint(buf, i * i % bufLen); 203 h = remix(h); 204 buf[bufLen++] = getChar(h); 205 206 h ^= fingerprint(buf, i * i * i % bufLen); 207 h = remix(h); 208 buf[bufLen++] = getChar(h); 209 210 h ^= fingerprint(buf, bufLen); 211 h = remix(h); 212 buf[bufLen++] = getChar(h); 213 214 int x0 = buf[bufLen - 1] & 0xff; 215 int x1 = buf[bufLen - 2] & 0xff; 216 int x2 = buf[bufLen - 3] & 0xff; 217 int x3 = buf[bufLen / 2] & 0xff; 218 buf[((x0 << 16) + (x1 << 8) + x2) % bufLen] ^= x3; 219 buf[((x1 << 16) + (x2 << 8) + x3) % bufLen] ^= i % 256; 220 } 221 assertEquals(0xeaa3b1c985261632L, h); 222 } 223 remix(long h)224 private static long remix(long h) { 225 h ^= h >>> 41; 226 h *= 949921979; 227 return h; 228 } 229 getChar(long h)230 private static byte getChar(long h) { 231 return (byte) ('a' + ((h & 0xfffff) % 26)); 232 } 233 } 234