• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 
3 package com.google.common.hash;
4 
5 import static com.google.common.base.Charsets.ISO_8859_1;
6 import static com.google.common.base.Charsets.UTF_8;
7 import static com.google.common.truth.Truth.assertThat;
8 
9 import com.google.common.base.Strings;
10 import com.google.common.collect.ImmutableSortedMap;
11 import com.google.common.collect.Ordering;
12 import com.google.common.primitives.UnsignedLong;
13 import java.util.Arrays;
14 import junit.framework.TestCase;
15 
16 /**
17  * Unit test for Fingerprint2011.
18  *
19  * @author kylemaddison@google.com (Kyle Maddison)
20  */
21 public class Fingerprint2011Test extends TestCase {
22 
23   // Length of the sample string to produce
24   private static final int MAX_BYTES = 1000;
25 
26   // Map from sample string lengths to the fingerprint
27   private static final ImmutableSortedMap<Integer, Long> LENGTH_FINGERPRINTS =
28       new ImmutableSortedMap.Builder<Integer, Long>(Ordering.natural())
29           .put(1000, 0x433109b33e13e6edL)
30           .put(800, 0x5f2f123bfc815f81L)
31           .put(640, 0x6396fc6a67293cf4L)
32           .put(512, 0x45c01b4934ddbbbeL)
33           .put(409, 0xfcd19b617551db45L)
34           .put(327, 0x4eee69e12854871eL)
35           .put(261, 0xab753446a3bbd532L)
36           .put(208, 0x54242fe06a291c3fL)
37           .put(166, 0x4f7acff7703a635bL)
38           .put(132, 0xa784bd0a1f22cc7fL)
39           .put(105, 0xf19118e187456638L)
40           .put(84, 0x3e2e58f9196abfe5L)
41           .put(67, 0xd38ae3dec0107aeaL)
42           .put(53, 0xea3033885868e10eL)
43           .put(42, 0x1394a146d0d7e04bL)
44           .put(33, 0x9962499315d2e8daL)
45           .put(26, 0x0849f5cfa85489b5L)
46           .put(20, 0x83b395ff19bf2171L)
47           .put(16, 0x9d33dd141bd55d9aL)
48           .put(12, 0x196248eb0b02466aL)
49           .put(9, 0x1cf73a50ff120336L)
50           .put(7, 0xb451c339457dbf51L)
51           .put(5, 0x681982c5e7b74064L)
52           .put(4, 0xc5ce47450ca6c021L)
53           .put(3, 0x9fcc3c3fde4d5ff7L)
54           .put(2, 0x090966a836e5fa4bL)
55           .put(1, 0x8199675ecaa6fe64L)
56           .put(0, 0x23ad7c904aa665e3L)
57           .build();
58   private static final HashFunction HASH_FN = Hashing.fingerprint2011();
59 
60   // If this test fails, all bets are off
testReallySimpleFingerprints()61   public void testReallySimpleFingerprints() {
62     assertEquals(8473225671271759044L, fingerprint("test".getBytes(UTF_8)));
63     // 32 characters long
64     assertEquals(7345148637025587076L, fingerprint(Strings.repeat("test", 8).getBytes(UTF_8)));
65     // 256 characters long
66     assertEquals(4904844928629814570L, fingerprint(Strings.repeat("test", 64).getBytes(UTF_8)));
67   }
68 
testStringsConsistency()69   public void testStringsConsistency() {
70     for (String s : Arrays.asList("", "some", "test", "strings", "to", "try")) {
71       assertEquals(HASH_FN.newHasher().putUnencodedChars(s).hash(), HASH_FN.hashUnencodedChars(s));
72     }
73   }
74 
testUtf8()75   public void testUtf8() {
76     char[] charsA = new char[128];
77     char[] charsB = new char[128];
78 
79     for (int i = 0; i < charsA.length; i++) {
80       if (i < 100) {
81         charsA[i] = 'a';
82         charsB[i] = 'a';
83       } else {
84         // Both two-byte characters, but must be different
85         charsA[i] = (char) (0x0180 + i);
86         charsB[i] = (char) (0x0280 + i);
87       }
88     }
89 
90     String stringA = new String(charsA);
91     String stringB = new String(charsB);
92     assertThat(stringA).isNotEqualTo(stringB);
93     assertThat(HASH_FN.hashUnencodedChars(stringA))
94         .isNotEqualTo(HASH_FN.hashUnencodedChars(stringB));
95     assertThat(fingerprint(stringA.getBytes(UTF_8)))
96         .isNotEqualTo(fingerprint(stringB.getBytes(UTF_8)));
97 
98     // ISO 8859-1 only has 0-255 (ubyte) representation so throws away UTF-8 characters
99     // greater than 127 (ie with their top bit set).
100     // Don't attempt to do this in real code.
101     assertEquals(
102         fingerprint(stringA.getBytes(ISO_8859_1)), fingerprint(stringB.getBytes(ISO_8859_1)));
103   }
104 
testMumurHash64()105   public void testMumurHash64() {
106     byte[] bytes = "test".getBytes(UTF_8);
107     assertEquals(
108         1618900948208871284L, Fingerprint2011.murmurHash64WithSeed(bytes, 0, bytes.length, 1));
109 
110     bytes = "test test test".getBytes(UTF_8);
111     assertEquals(
112         UnsignedLong.valueOf("12313169684067793560").longValue(),
113         Fingerprint2011.murmurHash64WithSeed(bytes, 0, bytes.length, 1));
114   }
115 
testPutNonChars()116   public void testPutNonChars() {
117     Hasher hasher = HASH_FN.newHasher();
118     // Expected data is 0x0100010100000000
119     hasher
120         .putBoolean(true)
121         .putBoolean(true)
122         .putBoolean(false)
123         .putBoolean(true)
124         .putBoolean(false)
125         .putBoolean(false)
126         .putBoolean(false)
127         .putBoolean(false);
128     final long hashCode = hasher.hash().asLong();
129 
130     hasher = HASH_FN.newHasher();
131     hasher
132         .putByte((byte) 0x01)
133         .putByte((byte) 0x01)
134         .putByte((byte) 0x00)
135         .putByte((byte) 0x01)
136         .putByte((byte) 0x00)
137         .putByte((byte) 0x00)
138         .putByte((byte) 0x00)
139         .putByte((byte) 0x00);
140     assertEquals(hashCode, hasher.hash().asLong());
141 
142     hasher = HASH_FN.newHasher();
143     hasher
144         .putChar((char) 0x0101)
145         .putChar((char) 0x0100)
146         .putChar((char) 0x0000)
147         .putChar((char) 0x0000);
148     assertEquals(hashCode, hasher.hash().asLong());
149 
150     hasher = HASH_FN.newHasher();
151     hasher.putBytes(new byte[] {0x01, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00});
152     assertEquals(hashCode, hasher.hash().asLong());
153 
154     hasher = HASH_FN.newHasher();
155     hasher.putLong(0x0000000001000101L);
156     assertEquals(hashCode, hasher.hash().asLong());
157 
158     hasher = HASH_FN.newHasher();
159     hasher
160         .putShort((short) 0x0101)
161         .putShort((short) 0x0100)
162         .putShort((short) 0x0000)
163         .putShort((short) 0x0000);
164     assertEquals(hashCode, hasher.hash().asLong());
165   }
166 
testHashFloatIsStable()167   public void testHashFloatIsStable() {
168     // This is about the best we can do for floating-point
169     Hasher hasher = HASH_FN.newHasher();
170     hasher.putFloat(0x01000101f).putFloat(0f);
171     assertEquals(0x96a4f8cc6ecbf16L, hasher.hash().asLong());
172 
173     hasher = HASH_FN.newHasher();
174     hasher.putDouble(0x0000000001000101d);
175     assertEquals(0xcf54171253fdc198L, hasher.hash().asLong());
176   }
177 
178   /** Convenience method to compute a fingerprint on a full bytes array. */
fingerprint(byte[] bytes)179   private static long fingerprint(byte[] bytes) {
180     return fingerprint(bytes, bytes.length);
181   }
182 
183   /** Convenience method to compute a fingerprint on a subset of a byte array. */
fingerprint(byte[] bytes, int length)184   private static long fingerprint(byte[] bytes, int length) {
185     return HASH_FN.hashBytes(bytes, 0, length).asLong();
186   }
187 
188   /**
189    * Tests that the Java port of Fingerprint2011 provides the same results on buffers up to 800
190    * bytes long as the original implementation in C++. See http://cl/106539598
191    */
testMultipleLengths()192   public void testMultipleLengths() {
193     int iterations = 800;
194     byte[] buf = new byte[iterations * 4];
195     int bufLen = 0;
196     long h = 0;
197     for (int i = 0; i < iterations; ++i) {
198       h ^= fingerprint(buf, i);
199       h = remix(h);
200       buf[bufLen++] = getChar(h);
201 
202       h ^= fingerprint(buf, i * i % bufLen);
203       h = remix(h);
204       buf[bufLen++] = getChar(h);
205 
206       h ^= fingerprint(buf, i * i * i % bufLen);
207       h = remix(h);
208       buf[bufLen++] = getChar(h);
209 
210       h ^= fingerprint(buf, bufLen);
211       h = remix(h);
212       buf[bufLen++] = getChar(h);
213 
214       int x0 = buf[bufLen - 1] & 0xff;
215       int x1 = buf[bufLen - 2] & 0xff;
216       int x2 = buf[bufLen - 3] & 0xff;
217       int x3 = buf[bufLen / 2] & 0xff;
218       buf[((x0 << 16) + (x1 << 8) + x2) % bufLen] ^= x3;
219       buf[((x1 << 16) + (x2 << 8) + x3) % bufLen] ^= i % 256;
220     }
221     assertEquals(0xeaa3b1c985261632L, h);
222   }
223 
remix(long h)224   private static long remix(long h) {
225     h ^= h >>> 41;
226     h *= 949921979;
227     return h;
228   }
229 
getChar(long h)230   private static byte getChar(long h) {
231     return (byte) ('a' + ((h & 0xfffff) % 26));
232   }
233 }
234