1 /* 2 * Copyright (C) 2011 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.common.hash; 18 19 import static com.google.common.hash.Hashing.murmur3_32; 20 import static com.google.common.hash.Hashing.murmur3_32_fixed; 21 22 import com.google.common.base.Charsets; 23 import com.google.common.hash.HashTestUtils.HashFn; 24 import java.nio.charset.Charset; 25 import java.util.Random; 26 import junit.framework.TestCase; 27 28 /** Tests for {@link Murmur3_32HashFunction}. */ 29 public class Murmur3Hash32Test extends TestCase { testKnownIntegerInputs()30 public void testKnownIntegerInputs() { 31 assertHash(593689054, murmur3_32().hashInt(0)); 32 assertHash(-189366624, murmur3_32().hashInt(-42)); 33 assertHash(-1134849565, murmur3_32().hashInt(42)); 34 assertHash(-1718298732, murmur3_32().hashInt(Integer.MIN_VALUE)); 35 assertHash(-1653689534, murmur3_32().hashInt(Integer.MAX_VALUE)); 36 } 37 testKnownLongInputs()38 public void testKnownLongInputs() { 39 assertHash(1669671676, murmur3_32().hashLong(0L)); 40 assertHash(-846261623, murmur3_32().hashLong(-42L)); 41 assertHash(1871679806, murmur3_32().hashLong(42L)); 42 assertHash(1366273829, murmur3_32().hashLong(Long.MIN_VALUE)); 43 assertHash(-2106506049, murmur3_32().hashLong(Long.MAX_VALUE)); 44 } 45 testKnownStringInputs()46 public void testKnownStringInputs() { 47 assertHash(0, murmur3_32().hashUnencodedChars("")); 48 assertHash(679745764, murmur3_32().hashUnencodedChars("k")); 49 assertHash(1510782915, murmur3_32().hashUnencodedChars("hell")); 50 assertHash(-675079799, murmur3_32().hashUnencodedChars("hello")); 51 assertHash(1935035788, murmur3_32().hashUnencodedChars("http://www.google.com/")); 52 assertHash( 53 -528633700, murmur3_32().hashUnencodedChars("The quick brown fox jumps over the lazy dog")); 54 } 55 56 @SuppressWarnings("deprecation") testKnownEncodedStringInputs()57 public void testKnownEncodedStringInputs() { 58 assertStringHash(0, "", Charsets.UTF_8); 59 assertStringHash(0xcfbda5d1, "k", Charsets.UTF_8); 60 assertStringHash(0xa167dbf3, "hell", Charsets.UTF_8); 61 assertStringHash(0x248bfa47, "hello", Charsets.UTF_8); 62 assertStringHash(0x3d41b97c, "http://www.google.com/", Charsets.UTF_8); 63 assertStringHash(0x2e4ff723, "The quick brown fox jumps over the lazy dog", Charsets.UTF_8); 64 assertStringHash(0xb5a4be05, "ABCDefGHI\u0799", Charsets.UTF_8); 65 assertStringHash(0xfc5ba834, "毎月1日,毎週月曜日", Charsets.UTF_8); 66 assertStringHash(0x8a5c3699, "surrogate pair: \uD83D\uDCB0", Charsets.UTF_8); 67 68 assertStringHash(0, "", Charsets.UTF_16LE); 69 assertStringHash(0x288418e4, "k", Charsets.UTF_16LE); 70 assertStringHash(0x5a0cb7c3, "hell", Charsets.UTF_16LE); 71 assertStringHash(0xd7c31989, "hello", Charsets.UTF_16LE); 72 assertStringHash(0x73564d8c, "http://www.google.com/", Charsets.UTF_16LE); 73 assertStringHash(0xe07db09c, "The quick brown fox jumps over the lazy dog", Charsets.UTF_16LE); 74 assertStringHash(0xfefa3e76, "ABCDefGHI\u0799", Charsets.UTF_16LE); 75 assertStringHash(0x6a7be132, "毎月1日,毎週月曜日", Charsets.UTF_16LE); 76 assertStringHash(0x5a2d41c7, "surrogate pair: \uD83D\uDCB0", Charsets.UTF_16LE); 77 } 78 79 @SuppressWarnings("deprecation") assertStringHash(int expected, String string, Charset charset)80 private void assertStringHash(int expected, String string, Charset charset) { 81 if (allBmp(string)) { 82 assertHash(expected, murmur3_32().hashString(string, charset)); 83 } 84 assertHash(expected, murmur3_32_fixed().hashString(string, charset)); 85 assertHash(expected, murmur3_32().newHasher().putString(string, charset).hash()); 86 assertHash(expected, murmur3_32_fixed().newHasher().putString(string, charset).hash()); 87 assertHash(expected, murmur3_32().hashBytes(string.getBytes(charset))); 88 assertHash(expected, murmur3_32_fixed().hashBytes(string.getBytes(charset))); 89 assertHash(expected, murmur3_32().newHasher().putBytes(string.getBytes(charset)).hash()); 90 assertHash(expected, murmur3_32_fixed().newHasher().putBytes(string.getBytes(charset)).hash()); 91 } 92 allBmp(String string)93 private boolean allBmp(String string) { 94 // Ordinarily we'd use something like i += Character.charCount(string.codePointAt(i)) here. But 95 // we can get away with i++ because the whole point of this method is to return false if we find 96 // a code point that doesn't fit in a char. 97 for (int i = 0; i < string.length(); i++) { 98 if (string.codePointAt(i) > 0xffff) { 99 return false; 100 } 101 } 102 return true; 103 } 104 105 @SuppressWarnings("deprecation") testSimpleStringUtf8()106 public void testSimpleStringUtf8() { 107 assertEquals( 108 murmur3_32().hashBytes("ABCDefGHI\u0799".getBytes(Charsets.UTF_8)), 109 murmur3_32().hashString("ABCDefGHI\u0799", Charsets.UTF_8)); 110 } 111 112 @SuppressWarnings("deprecation") testEncodedStringInputs()113 public void testEncodedStringInputs() { 114 Random rng = new Random(0); 115 for (int z = 0; z < 100; z++) { 116 String str; 117 int[] codePoints = new int[rng.nextInt(8)]; 118 for (int i = 0; i < codePoints.length; i++) { 119 do { 120 codePoints[i] = rng.nextInt(0x800); 121 } while (!Character.isValidCodePoint(codePoints[i]) 122 || (codePoints[i] >= Character.MIN_SURROGATE 123 && codePoints[i] <= Character.MAX_SURROGATE)); 124 } 125 StringBuilder builder = new StringBuilder(); 126 for (int i = 0; i < codePoints.length; i++) { 127 builder.appendCodePoint(codePoints[i]); 128 } 129 str = builder.toString(); 130 HashCode hashUtf8 = murmur3_32().hashBytes(str.getBytes(Charsets.UTF_8)); 131 assertEquals( 132 hashUtf8, murmur3_32().newHasher().putBytes(str.getBytes(Charsets.UTF_8)).hash()); 133 assertEquals(hashUtf8, murmur3_32().hashString(str, Charsets.UTF_8)); 134 assertEquals(hashUtf8, murmur3_32().newHasher().putString(str, Charsets.UTF_8).hash()); 135 HashCode hashUtf16 = murmur3_32().hashBytes(str.getBytes(Charsets.UTF_16)); 136 assertEquals( 137 hashUtf16, murmur3_32().newHasher().putBytes(str.getBytes(Charsets.UTF_16)).hash()); 138 assertEquals(hashUtf16, murmur3_32().hashString(str, Charsets.UTF_16)); 139 assertEquals(hashUtf16, murmur3_32().newHasher().putString(str, Charsets.UTF_16).hash()); 140 } 141 } 142 assertHash(int expected, HashCode actual)143 private static void assertHash(int expected, HashCode actual) { 144 assertEquals(HashCode.fromInt(expected), actual); 145 } 146 testParanoidHashBytes()147 public void testParanoidHashBytes() { 148 HashFn hf = 149 new HashFn() { 150 @Override 151 public byte[] hash(byte[] input, int seed) { 152 return murmur3_32(seed).hashBytes(input).asBytes(); 153 } 154 }; 155 // Murmur3A, MurmurHash3 for x86, 32-bit (MurmurHash3_x86_32) 156 // https://github.com/aappleby/smhasher/blob/master/src/main.cpp 157 HashTestUtils.verifyHashFunction(hf, 32, 0xB0F57EE3); 158 } 159 testParanoid()160 public void testParanoid() { 161 HashFn hf = 162 new HashFn() { 163 @Override 164 public byte[] hash(byte[] input, int seed) { 165 Hasher hasher = murmur3_32(seed).newHasher(); 166 Funnels.byteArrayFunnel().funnel(input, hasher); 167 return hasher.hash().asBytes(); 168 } 169 }; 170 // Murmur3A, MurmurHash3 for x86, 32-bit (MurmurHash3_x86_32) 171 // https://github.com/aappleby/smhasher/blob/master/src/main.cpp 172 HashTestUtils.verifyHashFunction(hf, 32, 0xB0F57EE3); 173 } 174 testInvariants()175 public void testInvariants() { 176 HashTestUtils.assertInvariants(murmur3_32()); 177 } 178 179 @SuppressWarnings("deprecation") testInvalidUnicodeHashString()180 public void testInvalidUnicodeHashString() { 181 String str = 182 new String( 183 new char[] {'a', Character.MIN_HIGH_SURROGATE, Character.MIN_HIGH_SURROGATE, 'z'}); 184 assertEquals( 185 murmur3_32().hashBytes(str.getBytes(Charsets.UTF_8)), 186 murmur3_32().hashString(str, Charsets.UTF_8)); 187 assertEquals( 188 murmur3_32_fixed().hashBytes(str.getBytes(Charsets.UTF_8)), 189 murmur3_32().hashString(str, Charsets.UTF_8)); 190 } 191 192 @SuppressWarnings("deprecation") testInvalidUnicodeHasherPutString()193 public void testInvalidUnicodeHasherPutString() { 194 String str = 195 new String( 196 new char[] {'a', Character.MIN_HIGH_SURROGATE, Character.MIN_HIGH_SURROGATE, 'z'}); 197 assertEquals( 198 murmur3_32().hashBytes(str.getBytes(Charsets.UTF_8)), 199 murmur3_32().newHasher().putString(str, Charsets.UTF_8).hash()); 200 assertEquals( 201 murmur3_32_fixed().hashBytes(str.getBytes(Charsets.UTF_8)), 202 murmur3_32_fixed().newHasher().putString(str, Charsets.UTF_8).hash()); 203 } 204 } 205