• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.common.hash;
18 
19 import static com.google.common.base.Charsets.ISO_8859_1;
20 import static com.google.common.base.Charsets.UTF_8;
21 import static com.google.common.truth.Truth.assertThat;
22 
23 import com.google.common.base.Strings;
24 import java.util.Arrays;
25 import junit.framework.TestCase;
26 
27 /**
28  * Unit test for FarmHashFingerprint64.
29  *
30  * @author Kyle Maddison
31  * @author Geoff Pike
32  */
33 public class FarmHashFingerprint64Test extends TestCase {
34 
35   private static final HashFunction HASH_FN = Hashing.farmHashFingerprint64();
36 
37   // If this test fails, all bets are off
testReallySimpleFingerprints()38   public void testReallySimpleFingerprints() {
39     assertEquals(8581389452482819506L, fingerprint("test".getBytes(UTF_8)));
40     // 32 characters long
41     assertEquals(-4196240717365766262L, fingerprint(Strings.repeat("test", 8).getBytes(UTF_8)));
42     // 256 characters long
43     assertEquals(3500507768004279527L, fingerprint(Strings.repeat("test", 64).getBytes(UTF_8)));
44   }
45 
testStringsConsistency()46   public void testStringsConsistency() {
47     for (String s : Arrays.asList("", "some", "test", "strings", "to", "try")) {
48       assertEquals(HASH_FN.newHasher().putUnencodedChars(s).hash(), HASH_FN.hashUnencodedChars(s));
49     }
50   }
51 
testUtf8()52   public void testUtf8() {
53     char[] charsA = new char[128];
54     char[] charsB = new char[128];
55 
56     for (int i = 0; i < charsA.length; i++) {
57       if (i < 100) {
58         charsA[i] = 'a';
59         charsB[i] = 'a';
60       } else {
61         // Both two-byte characters, but must be different
62         charsA[i] = (char) (0x0180 + i);
63         charsB[i] = (char) (0x0280 + i);
64       }
65     }
66 
67     String stringA = new String(charsA);
68     String stringB = new String(charsB);
69     assertThat(stringA).isNotEqualTo(stringB);
70     assertThat(HASH_FN.hashUnencodedChars(stringA))
71         .isNotEqualTo(HASH_FN.hashUnencodedChars(stringB));
72     assertThat(fingerprint(stringA.getBytes(UTF_8)))
73         .isNotEqualTo(fingerprint(stringB.getBytes(UTF_8)));
74 
75     // ISO 8859-1 only has 0-255 (ubyte) representation so throws away UTF-8 characters
76     // greater than 127 (ie with their top bit set).
77     // Don't attempt to do this in real code.
78     assertEquals(
79         fingerprint(stringA.getBytes(ISO_8859_1)), fingerprint(stringB.getBytes(ISO_8859_1)));
80   }
81 
testPutNonChars()82   public void testPutNonChars() {
83     Hasher hasher = HASH_FN.newHasher();
84     // Expected data is 0x0100010100000000
85     hasher
86         .putBoolean(true)
87         .putBoolean(true)
88         .putBoolean(false)
89         .putBoolean(true)
90         .putBoolean(false)
91         .putBoolean(false)
92         .putBoolean(false)
93         .putBoolean(false);
94     final long hashCode = hasher.hash().asLong();
95 
96     hasher = HASH_FN.newHasher();
97     hasher
98         .putByte((byte) 0x01)
99         .putByte((byte) 0x01)
100         .putByte((byte) 0x00)
101         .putByte((byte) 0x01)
102         .putByte((byte) 0x00)
103         .putByte((byte) 0x00)
104         .putByte((byte) 0x00)
105         .putByte((byte) 0x00);
106     assertEquals(hashCode, hasher.hash().asLong());
107 
108     hasher = HASH_FN.newHasher();
109     hasher
110         .putChar((char) 0x0101)
111         .putChar((char) 0x0100)
112         .putChar((char) 0x0000)
113         .putChar((char) 0x0000);
114     assertEquals(hashCode, hasher.hash().asLong());
115 
116     hasher = HASH_FN.newHasher();
117     hasher.putBytes(new byte[] {0x01, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00});
118     assertEquals(hashCode, hasher.hash().asLong());
119 
120     hasher = HASH_FN.newHasher();
121     hasher.putLong(0x0000000001000101L);
122     assertEquals(hashCode, hasher.hash().asLong());
123 
124     hasher = HASH_FN.newHasher();
125     hasher
126         .putShort((short) 0x0101)
127         .putShort((short) 0x0100)
128         .putShort((short) 0x0000)
129         .putShort((short) 0x0000);
130     assertEquals(hashCode, hasher.hash().asLong());
131   }
132 
testHashFloatIsStable()133   public void testHashFloatIsStable() {
134     // Just a spot check.  Better than nothing.
135     Hasher hasher = HASH_FN.newHasher();
136     hasher.putFloat(0x01000101f).putFloat(0f);
137     assertEquals(0x49f9d18ee8ae1b28L, hasher.hash().asLong());
138 
139     hasher = HASH_FN.newHasher();
140     hasher.putDouble(0x0000000001000101d);
141     assertEquals(0x388ee898bad75cbfL, hasher.hash().asLong());
142   }
143 
144   /** Convenience method to compute a fingerprint on a full bytes array. */
fingerprint(byte[] bytes)145   private static long fingerprint(byte[] bytes) {
146     return fingerprint(bytes, bytes.length);
147   }
148 
149   /** Convenience method to compute a fingerprint on a subset of a byte array. */
fingerprint(byte[] bytes, int length)150   private static long fingerprint(byte[] bytes, int length) {
151     return HASH_FN.hashBytes(bytes, 0, length).asLong();
152   }
153 
154   /**
155    * Tests that the Java port of FarmHashFingerprint64 provides the same results on buffers up to
156    * 800 bytes long as the C++ reference implementation.
157    */
testMultipleLengths()158   public void testMultipleLengths() {
159     int iterations = 800;
160     byte[] buf = new byte[iterations * 4];
161     int bufLen = 0;
162     long h = 0;
163     for (int i = 0; i < iterations; ++i) {
164       h ^= fingerprint(buf, i);
165       h = remix(h);
166       buf[bufLen++] = getChar(h);
167 
168       h ^= fingerprint(buf, i * i % bufLen);
169       h = remix(h);
170       buf[bufLen++] = getChar(h);
171 
172       h ^= fingerprint(buf, i * i * i % bufLen);
173       h = remix(h);
174       buf[bufLen++] = getChar(h);
175 
176       h ^= fingerprint(buf, bufLen);
177       h = remix(h);
178       buf[bufLen++] = getChar(h);
179 
180       int x0 = buf[bufLen - 1] & 0xff;
181       int x1 = buf[bufLen - 2] & 0xff;
182       int x2 = buf[bufLen - 3] & 0xff;
183       int x3 = buf[bufLen / 2] & 0xff;
184       buf[((x0 << 16) + (x1 << 8) + x2) % bufLen] ^= x3;
185       buf[((x1 << 16) + (x2 << 8) + x3) % bufLen] ^= i % 256;
186     }
187     assertEquals(0x7a1d67c50ec7e167L, h);
188   }
189 
remix(long h)190   private static long remix(long h) {
191     h ^= h >>> 41;
192     h *= 949921979;
193     return h;
194   }
195 
getChar(long h)196   private static byte getChar(long h) {
197     return (byte) ('a' + ((h & 0xfffff) % 26));
198   }
199 }
200