• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.common.hash;
18 
19 import static com.google.common.hash.Hashing.murmur3_32;
20 import static com.google.common.hash.Hashing.murmur3_32_fixed;
21 
22 import com.google.common.base.Charsets;
23 import com.google.common.hash.HashTestUtils.HashFn;
24 import java.nio.charset.Charset;
25 import java.util.Random;
26 import junit.framework.TestCase;
27 
28 /** Tests for {@link Murmur3_32HashFunction}. */
29 public class Murmur3Hash32Test extends TestCase {
testKnownIntegerInputs()30   public void testKnownIntegerInputs() {
31     assertHash(593689054, murmur3_32().hashInt(0));
32     assertHash(-189366624, murmur3_32().hashInt(-42));
33     assertHash(-1134849565, murmur3_32().hashInt(42));
34     assertHash(-1718298732, murmur3_32().hashInt(Integer.MIN_VALUE));
35     assertHash(-1653689534, murmur3_32().hashInt(Integer.MAX_VALUE));
36   }
37 
testKnownLongInputs()38   public void testKnownLongInputs() {
39     assertHash(1669671676, murmur3_32().hashLong(0L));
40     assertHash(-846261623, murmur3_32().hashLong(-42L));
41     assertHash(1871679806, murmur3_32().hashLong(42L));
42     assertHash(1366273829, murmur3_32().hashLong(Long.MIN_VALUE));
43     assertHash(-2106506049, murmur3_32().hashLong(Long.MAX_VALUE));
44   }
45 
testKnownStringInputs()46   public void testKnownStringInputs() {
47     assertHash(0, murmur3_32().hashUnencodedChars(""));
48     assertHash(679745764, murmur3_32().hashUnencodedChars("k"));
49     assertHash(1510782915, murmur3_32().hashUnencodedChars("hell"));
50     assertHash(-675079799, murmur3_32().hashUnencodedChars("hello"));
51     assertHash(1935035788, murmur3_32().hashUnencodedChars("http://www.google.com/"));
52     assertHash(
53         -528633700, murmur3_32().hashUnencodedChars("The quick brown fox jumps over the lazy dog"));
54   }
55 
56   @SuppressWarnings("deprecation")
testKnownEncodedStringInputs()57   public void testKnownEncodedStringInputs() {
58     assertStringHash(0, "", Charsets.UTF_8);
59     assertStringHash(0xcfbda5d1, "k", Charsets.UTF_8);
60     assertStringHash(0xa167dbf3, "hell", Charsets.UTF_8);
61     assertStringHash(0x248bfa47, "hello", Charsets.UTF_8);
62     assertStringHash(0x3d41b97c, "http://www.google.com/", Charsets.UTF_8);
63     assertStringHash(0x2e4ff723, "The quick brown fox jumps over the lazy dog", Charsets.UTF_8);
64     assertStringHash(0xb5a4be05, "ABCDefGHI\u0799", Charsets.UTF_8);
65     assertStringHash(0xfc5ba834, "毎月1日,毎週月曜日", Charsets.UTF_8);
66     assertStringHash(0x8a5c3699, "surrogate pair: \uD83D\uDCB0", Charsets.UTF_8);
67 
68     assertStringHash(0, "", Charsets.UTF_16LE);
69     assertStringHash(0x288418e4, "k", Charsets.UTF_16LE);
70     assertStringHash(0x5a0cb7c3, "hell", Charsets.UTF_16LE);
71     assertStringHash(0xd7c31989, "hello", Charsets.UTF_16LE);
72     assertStringHash(0x73564d8c, "http://www.google.com/", Charsets.UTF_16LE);
73     assertStringHash(0xe07db09c, "The quick brown fox jumps over the lazy dog", Charsets.UTF_16LE);
74     assertStringHash(0xfefa3e76, "ABCDefGHI\u0799", Charsets.UTF_16LE);
75     assertStringHash(0x6a7be132, "毎月1日,毎週月曜日", Charsets.UTF_16LE);
76     assertStringHash(0x5a2d41c7, "surrogate pair: \uD83D\uDCB0", Charsets.UTF_16LE);
77   }
78 
79   @SuppressWarnings("deprecation")
assertStringHash(int expected, String string, Charset charset)80   private void assertStringHash(int expected, String string, Charset charset) {
81     if (allBmp(string)) {
82       assertHash(expected, murmur3_32().hashString(string, charset));
83     }
84     assertHash(expected, murmur3_32_fixed().hashString(string, charset));
85     assertHash(expected, murmur3_32().newHasher().putString(string, charset).hash());
86     assertHash(expected, murmur3_32_fixed().newHasher().putString(string, charset).hash());
87     assertHash(expected, murmur3_32().hashBytes(string.getBytes(charset)));
88     assertHash(expected, murmur3_32_fixed().hashBytes(string.getBytes(charset)));
89     assertHash(expected, murmur3_32().newHasher().putBytes(string.getBytes(charset)).hash());
90     assertHash(expected, murmur3_32_fixed().newHasher().putBytes(string.getBytes(charset)).hash());
91   }
92 
allBmp(String string)93   private boolean allBmp(String string) {
94     // Ordinarily we'd use something like i += Character.charCount(string.codePointAt(i)) here. But
95     // we can get away with i++ because the whole point of this method is to return false if we find
96     // a code point that doesn't fit in a char.
97     for (int i = 0; i < string.length(); i++) {
98       if (string.codePointAt(i) > 0xffff) {
99         return false;
100       }
101     }
102     return true;
103   }
104 
105   @SuppressWarnings("deprecation")
testSimpleStringUtf8()106   public void testSimpleStringUtf8() {
107     assertEquals(
108         murmur3_32().hashBytes("ABCDefGHI\u0799".getBytes(Charsets.UTF_8)),
109         murmur3_32().hashString("ABCDefGHI\u0799", Charsets.UTF_8));
110   }
111 
112   @SuppressWarnings("deprecation")
testEncodedStringInputs()113   public void testEncodedStringInputs() {
114     Random rng = new Random(0);
115     for (int z = 0; z < 100; z++) {
116       String str;
117       int[] codePoints = new int[rng.nextInt(8)];
118       for (int i = 0; i < codePoints.length; i++) {
119         do {
120           codePoints[i] = rng.nextInt(0x800);
121         } while (!Character.isValidCodePoint(codePoints[i])
122             || (codePoints[i] >= Character.MIN_SURROGATE
123                 && codePoints[i] <= Character.MAX_SURROGATE));
124       }
125       StringBuilder builder = new StringBuilder();
126       for (int i = 0; i < codePoints.length; i++) {
127         builder.appendCodePoint(codePoints[i]);
128       }
129       str = builder.toString();
130       HashCode hashUtf8 = murmur3_32().hashBytes(str.getBytes(Charsets.UTF_8));
131       assertEquals(
132           hashUtf8, murmur3_32().newHasher().putBytes(str.getBytes(Charsets.UTF_8)).hash());
133       assertEquals(hashUtf8, murmur3_32().hashString(str, Charsets.UTF_8));
134       assertEquals(hashUtf8, murmur3_32().newHasher().putString(str, Charsets.UTF_8).hash());
135       HashCode hashUtf16 = murmur3_32().hashBytes(str.getBytes(Charsets.UTF_16));
136       assertEquals(
137           hashUtf16, murmur3_32().newHasher().putBytes(str.getBytes(Charsets.UTF_16)).hash());
138       assertEquals(hashUtf16, murmur3_32().hashString(str, Charsets.UTF_16));
139       assertEquals(hashUtf16, murmur3_32().newHasher().putString(str, Charsets.UTF_16).hash());
140     }
141   }
142 
assertHash(int expected, HashCode actual)143   private static void assertHash(int expected, HashCode actual) {
144     assertEquals(HashCode.fromInt(expected), actual);
145   }
146 
testParanoidHashBytes()147   public void testParanoidHashBytes() {
148     HashFn hf =
149         new HashFn() {
150           @Override
151           public byte[] hash(byte[] input, int seed) {
152             return murmur3_32(seed).hashBytes(input).asBytes();
153           }
154         };
155     // Murmur3A, MurmurHash3 for x86, 32-bit (MurmurHash3_x86_32)
156     // https://github.com/aappleby/smhasher/blob/master/src/main.cpp
157     HashTestUtils.verifyHashFunction(hf, 32, 0xB0F57EE3);
158   }
159 
testParanoid()160   public void testParanoid() {
161     HashFn hf =
162         new HashFn() {
163           @Override
164           public byte[] hash(byte[] input, int seed) {
165             Hasher hasher = murmur3_32(seed).newHasher();
166             Funnels.byteArrayFunnel().funnel(input, hasher);
167             return hasher.hash().asBytes();
168           }
169         };
170     // Murmur3A, MurmurHash3 for x86, 32-bit (MurmurHash3_x86_32)
171     // https://github.com/aappleby/smhasher/blob/master/src/main.cpp
172     HashTestUtils.verifyHashFunction(hf, 32, 0xB0F57EE3);
173   }
174 
testInvariants()175   public void testInvariants() {
176     HashTestUtils.assertInvariants(murmur3_32());
177   }
178 
179   @SuppressWarnings("deprecation")
testInvalidUnicodeHashString()180   public void testInvalidUnicodeHashString() {
181     String str =
182         new String(
183             new char[] {'a', Character.MIN_HIGH_SURROGATE, Character.MIN_HIGH_SURROGATE, 'z'});
184     assertEquals(
185         murmur3_32().hashBytes(str.getBytes(Charsets.UTF_8)),
186         murmur3_32().hashString(str, Charsets.UTF_8));
187     assertEquals(
188         murmur3_32_fixed().hashBytes(str.getBytes(Charsets.UTF_8)),
189         murmur3_32().hashString(str, Charsets.UTF_8));
190   }
191 
192   @SuppressWarnings("deprecation")
testInvalidUnicodeHasherPutString()193   public void testInvalidUnicodeHasherPutString() {
194     String str =
195         new String(
196             new char[] {'a', Character.MIN_HIGH_SURROGATE, Character.MIN_HIGH_SURROGATE, 'z'});
197     assertEquals(
198         murmur3_32().hashBytes(str.getBytes(Charsets.UTF_8)),
199         murmur3_32().newHasher().putString(str, Charsets.UTF_8).hash());
200     assertEquals(
201         murmur3_32_fixed().hashBytes(str.getBytes(Charsets.UTF_8)),
202         murmur3_32_fixed().newHasher().putString(str, Charsets.UTF_8).hash());
203   }
204 }
205