1 /* 2 * Copyright (C) 2015 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.common.hash; 18 19 import static com.google.common.base.Charsets.ISO_8859_1; 20 import static com.google.common.base.Charsets.UTF_8; 21 import static com.google.common.truth.Truth.assertThat; 22 23 import com.google.common.base.Strings; 24 import java.util.Arrays; 25 import junit.framework.TestCase; 26 27 /** 28 * Unit test for FarmHashFingerprint64. 29 * 30 * @author Kyle Maddison 31 * @author Geoff Pike 32 */ 33 public class FarmHashFingerprint64Test extends TestCase { 34 35 private static final HashFunction HASH_FN = Hashing.farmHashFingerprint64(); 36 37 // If this test fails, all bets are off testReallySimpleFingerprints()38 public void testReallySimpleFingerprints() { 39 assertEquals(8581389452482819506L, fingerprint("test".getBytes(UTF_8))); 40 // 32 characters long 41 assertEquals(-4196240717365766262L, fingerprint(Strings.repeat("test", 8).getBytes(UTF_8))); 42 // 256 characters long 43 assertEquals(3500507768004279527L, fingerprint(Strings.repeat("test", 64).getBytes(UTF_8))); 44 } 45 testStringsConsistency()46 public void testStringsConsistency() { 47 for (String s : Arrays.asList("", "some", "test", "strings", "to", "try")) { 48 assertEquals(HASH_FN.newHasher().putUnencodedChars(s).hash(), HASH_FN.hashUnencodedChars(s)); 49 } 50 } 51 testUtf8()52 public void testUtf8() { 53 char[] charsA = new char[128]; 54 char[] charsB = new char[128]; 55 56 for (int i = 0; i < charsA.length; i++) { 57 if (i < 100) { 58 charsA[i] = 'a'; 59 charsB[i] = 'a'; 60 } else { 61 // Both two-byte characters, but must be different 62 charsA[i] = (char) (0x0180 + i); 63 charsB[i] = (char) (0x0280 + i); 64 } 65 } 66 67 String stringA = new String(charsA); 68 String stringB = new String(charsB); 69 assertThat(stringA).isNotEqualTo(stringB); 70 assertThat(HASH_FN.hashUnencodedChars(stringA)) 71 .isNotEqualTo(HASH_FN.hashUnencodedChars(stringB)); 72 assertThat(fingerprint(stringA.getBytes(UTF_8))) 73 .isNotEqualTo(fingerprint(stringB.getBytes(UTF_8))); 74 75 // ISO 8859-1 only has 0-255 (ubyte) representation so throws away UTF-8 characters 76 // greater than 127 (ie with their top bit set). 77 // Don't attempt to do this in real code. 78 assertEquals( 79 fingerprint(stringA.getBytes(ISO_8859_1)), fingerprint(stringB.getBytes(ISO_8859_1))); 80 } 81 testPutNonChars()82 public void testPutNonChars() { 83 Hasher hasher = HASH_FN.newHasher(); 84 // Expected data is 0x0100010100000000 85 hasher 86 .putBoolean(true) 87 .putBoolean(true) 88 .putBoolean(false) 89 .putBoolean(true) 90 .putBoolean(false) 91 .putBoolean(false) 92 .putBoolean(false) 93 .putBoolean(false); 94 final long hashCode = hasher.hash().asLong(); 95 96 hasher = HASH_FN.newHasher(); 97 hasher 98 .putByte((byte) 0x01) 99 .putByte((byte) 0x01) 100 .putByte((byte) 0x00) 101 .putByte((byte) 0x01) 102 .putByte((byte) 0x00) 103 .putByte((byte) 0x00) 104 .putByte((byte) 0x00) 105 .putByte((byte) 0x00); 106 assertEquals(hashCode, hasher.hash().asLong()); 107 108 hasher = HASH_FN.newHasher(); 109 hasher 110 .putChar((char) 0x0101) 111 .putChar((char) 0x0100) 112 .putChar((char) 0x0000) 113 .putChar((char) 0x0000); 114 assertEquals(hashCode, hasher.hash().asLong()); 115 116 hasher = HASH_FN.newHasher(); 117 hasher.putBytes(new byte[] {0x01, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00}); 118 assertEquals(hashCode, hasher.hash().asLong()); 119 120 hasher = HASH_FN.newHasher(); 121 hasher.putLong(0x0000000001000101L); 122 assertEquals(hashCode, hasher.hash().asLong()); 123 124 hasher = HASH_FN.newHasher(); 125 hasher 126 .putShort((short) 0x0101) 127 .putShort((short) 0x0100) 128 .putShort((short) 0x0000) 129 .putShort((short) 0x0000); 130 assertEquals(hashCode, hasher.hash().asLong()); 131 } 132 testHashFloatIsStable()133 public void testHashFloatIsStable() { 134 // Just a spot check. Better than nothing. 135 Hasher hasher = HASH_FN.newHasher(); 136 hasher.putFloat(0x01000101f).putFloat(0f); 137 assertEquals(0x49f9d18ee8ae1b28L, hasher.hash().asLong()); 138 139 hasher = HASH_FN.newHasher(); 140 hasher.putDouble(0x0000000001000101d); 141 assertEquals(0x388ee898bad75cbfL, hasher.hash().asLong()); 142 } 143 144 /** Convenience method to compute a fingerprint on a full bytes array. */ fingerprint(byte[] bytes)145 private static long fingerprint(byte[] bytes) { 146 return fingerprint(bytes, bytes.length); 147 } 148 149 /** Convenience method to compute a fingerprint on a subset of a byte array. */ fingerprint(byte[] bytes, int length)150 private static long fingerprint(byte[] bytes, int length) { 151 return HASH_FN.hashBytes(bytes, 0, length).asLong(); 152 } 153 154 /** 155 * Tests that the Java port of FarmHashFingerprint64 provides the same results on buffers up to 156 * 800 bytes long as the C++ reference implementation. 157 */ testMultipleLengths()158 public void testMultipleLengths() { 159 int iterations = 800; 160 byte[] buf = new byte[iterations * 4]; 161 int bufLen = 0; 162 long h = 0; 163 for (int i = 0; i < iterations; ++i) { 164 h ^= fingerprint(buf, i); 165 h = remix(h); 166 buf[bufLen++] = getChar(h); 167 168 h ^= fingerprint(buf, i * i % bufLen); 169 h = remix(h); 170 buf[bufLen++] = getChar(h); 171 172 h ^= fingerprint(buf, i * i * i % bufLen); 173 h = remix(h); 174 buf[bufLen++] = getChar(h); 175 176 h ^= fingerprint(buf, bufLen); 177 h = remix(h); 178 buf[bufLen++] = getChar(h); 179 180 int x0 = buf[bufLen - 1] & 0xff; 181 int x1 = buf[bufLen - 2] & 0xff; 182 int x2 = buf[bufLen - 3] & 0xff; 183 int x3 = buf[bufLen / 2] & 0xff; 184 buf[((x0 << 16) + (x1 << 8) + x2) % bufLen] ^= x3; 185 buf[((x1 << 16) + (x2 << 8) + x3) % bufLen] ^= i % 256; 186 } 187 assertEquals(0x7a1d67c50ec7e167L, h); 188 } 189 remix(long h)190 private static long remix(long h) { 191 h ^= h >>> 41; 192 h *= 949921979; 193 return h; 194 } 195 getChar(long h)196 private static byte getChar(long h) { 197 return (byte) ('a' + ((h & 0xfffff) % 26)); 198 } 199 } 200