1 /* 2 * Copyright (C) 2011 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.common.hash; 18 19 import static org.junit.Assert.assertEquals; 20 21 import com.google.common.base.Charsets; 22 import com.google.common.collect.ImmutableSet; 23 import com.google.common.collect.Sets; 24 import com.google.common.primitives.Ints; 25 import com.google.common.testing.EqualsTester; 26 27 import org.junit.Assert; 28 29 import java.nio.charset.Charset; 30 import java.util.Arrays; 31 import java.util.Random; 32 import java.util.Set; 33 34 /** 35 * Various utilities for testing {@link HashFunction}s. 36 * 37 * @author Dimitris Andreou 38 * @author Kurt Alfred Kluever 39 */ 40 final class HashTestUtils { HashTestUtils()41 private HashTestUtils() {} 42 43 /** 44 * Converts a string, which should contain only ascii-representable characters, to a byte[]. 45 */ ascii(String string)46 static byte[] ascii(String string) { 47 byte[] bytes = new byte[string.length()]; 48 for (int i = 0; i < string.length(); i++) { 49 bytes[i] = (byte) string.charAt(i); 50 } 51 return bytes; 52 } 53 54 interface HashFn { hash(byte[] input, int seed)55 byte[] hash(byte[] input, int seed); 56 } 57 verifyHashFunction(HashFn hashFunction, int hashbits, int expected)58 static void verifyHashFunction(HashFn hashFunction, int hashbits, int expected) { 59 int hashBytes = hashbits / 8; 60 61 byte[] key = new byte[256]; 62 byte[] hashes = new byte[hashBytes * 256]; 63 64 // Hash keys of the form {}, {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as the seed 65 for (int i = 0; i < 256; i++) { 66 key[i] = (byte) i; 67 int seed = 256 - i; 68 byte[] hash = hashFunction.hash(Arrays.copyOf(key, i), seed); 69 System.arraycopy(hash, 0, hashes, i * hashBytes, hash.length); 70 } 71 72 // Then hash the result array 73 byte[] result = hashFunction.hash(hashes, 0); 74 75 // interpreted in little-endian order. 76 int verification = Integer.reverseBytes(Ints.fromByteArray(result)); 77 78 if (expected != verification) { 79 throw new AssertionError("Expected: " + Integer.toHexString(expected) 80 + " got: " + Integer.toHexString(verification)); 81 } 82 } 83 84 static final Funnel<Object> BAD_FUNNEL = new Funnel<Object>() { 85 @Override public void funnel(Object object, PrimitiveSink bytePrimitiveSink) { 86 bytePrimitiveSink.putInt(object.hashCode()); 87 } 88 }; 89 90 static enum RandomHasherAction { PUT_BOOLEAN()91 PUT_BOOLEAN() { 92 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) { 93 boolean value = random.nextBoolean(); 94 for (PrimitiveSink sink : sinks) { 95 sink.putBoolean(value); 96 } 97 } 98 }, PUT_BYTE()99 PUT_BYTE() { 100 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) { 101 int value = random.nextInt(); 102 for (PrimitiveSink sink : sinks) { 103 sink.putByte((byte) value); 104 } 105 } 106 }, PUT_SHORT()107 PUT_SHORT() { 108 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) { 109 short value = (short) random.nextInt(); 110 for (PrimitiveSink sink : sinks) { 111 sink.putShort(value); 112 } 113 } 114 }, PUT_CHAR()115 PUT_CHAR() { 116 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) { 117 char value = (char) random.nextInt(); 118 for (PrimitiveSink sink : sinks) { 119 sink.putChar(value); 120 } 121 } 122 }, PUT_INT()123 PUT_INT() { 124 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) { 125 int value = random.nextInt(); 126 for (PrimitiveSink sink : sinks) { 127 sink.putInt(value); 128 } 129 } 130 }, PUT_LONG()131 PUT_LONG() { 132 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) { 133 long value = random.nextLong(); 134 for (PrimitiveSink sink : sinks) { 135 sink.putLong(value); 136 } 137 } 138 }, PUT_FLOAT()139 PUT_FLOAT() { 140 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) { 141 float value = random.nextFloat(); 142 for (PrimitiveSink sink : sinks) { 143 sink.putFloat(value); 144 } 145 } 146 }, PUT_DOUBLE()147 PUT_DOUBLE() { 148 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) { 149 double value = random.nextDouble(); 150 for (PrimitiveSink sink : sinks) { 151 sink.putDouble(value); 152 } 153 } 154 }, PUT_BYTES()155 PUT_BYTES() { 156 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) { 157 byte[] value = new byte[random.nextInt(128)]; 158 random.nextBytes(value); 159 for (PrimitiveSink sink : sinks) { 160 sink.putBytes(value); 161 } 162 } 163 }, PUT_BYTES_INT_INT()164 PUT_BYTES_INT_INT() { 165 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) { 166 byte[] value = new byte[random.nextInt(128)]; 167 random.nextBytes(value); 168 int off = random.nextInt(value.length + 1); 169 int len = random.nextInt(value.length - off + 1); 170 for (PrimitiveSink sink : sinks) { 171 sink.putBytes(value); 172 } 173 } 174 }, PUT_STRING()175 PUT_STRING() { 176 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) { 177 char[] value = new char[random.nextInt(128)]; 178 for (int i = 0; i < value.length; i++) { 179 value[i] = (char) random.nextInt(); 180 } 181 String s = new String(value); 182 for (PrimitiveSink sink : sinks) { 183 sink.putUnencodedChars(s); 184 } 185 } 186 }, PUT_STRING_LOW_SURROGATE()187 PUT_STRING_LOW_SURROGATE() { 188 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) { 189 String s = new String(new char[] { randomLowSurrogate(random) }); 190 for (PrimitiveSink sink : sinks) { 191 sink.putUnencodedChars(s); 192 } 193 } 194 }, PUT_STRING_HIGH_SURROGATE()195 PUT_STRING_HIGH_SURROGATE() { 196 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) { 197 String s = new String(new char[] { randomHighSurrogate(random) }); 198 for (PrimitiveSink sink : sinks) { 199 sink.putUnencodedChars(s); 200 } 201 } 202 }, PUT_STRING_LOW_HIGH_SURROGATE()203 PUT_STRING_LOW_HIGH_SURROGATE() { 204 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) { 205 String s = new String(new char[] { 206 randomLowSurrogate(random), randomHighSurrogate(random)}); 207 for (PrimitiveSink sink : sinks) { 208 sink.putUnencodedChars(s); 209 } 210 } 211 }, PUT_STRING_HIGH_LOW_SURROGATE()212 PUT_STRING_HIGH_LOW_SURROGATE() { 213 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) { 214 String s = new String(new char[] { 215 randomHighSurrogate(random), randomLowSurrogate(random)}); 216 for (PrimitiveSink sink : sinks) { 217 sink.putUnencodedChars(s); 218 } 219 } 220 }; 221 performAction(Random random, Iterable<? extends PrimitiveSink> sinks)222 abstract void performAction(Random random, Iterable<? extends PrimitiveSink> sinks); 223 224 private static final RandomHasherAction[] actions = values(); 225 pickAtRandom(Random random)226 static RandomHasherAction pickAtRandom(Random random) { 227 return actions[random.nextInt(actions.length)]; 228 } 229 } 230 231 /** 232 * Test that the hash function contains no funnels. A funnel is a situation where a set of input 233 * (key) bits 'affects' a strictly smaller set of output bits. Funneling is bad because it can 234 * result in more-than-ideal collisions for a non-uniformly distributed key space. In practice, 235 * most key spaces are ANYTHING BUT uniformly distributed. A bit(i) in the input is said to 236 * 'affect' a bit(j) in the output if two inputs, identical but for bit(i), will differ at output 237 * bit(j) about half the time 238 * 239 * <p>Funneling is pretty simple to detect. The key idea is to find example keys which 240 * unequivocably demonstrate that funneling cannot be occuring. This is done bit-by-bit. For 241 * each input bit(i) and output bit(j), two pairs of keys must be found with all bits identical 242 * except bit(i). One pair must differ in output bit(j), and one pair must not. This proves that 243 * input bit(i) can alter output bit(j). 244 */ checkNoFunnels(HashFunction function)245 static void checkNoFunnels(HashFunction function) { 246 Random rand = new Random(0); 247 int keyBits = 32; 248 int hashBits = function.bits(); 249 250 // output loop tests input bit 251 for (int i = 0; i < keyBits; i++) { 252 int same = 0x0; // bitset for output bits with same values 253 int diff = 0x0; // bitset for output bits with different values 254 int count = 0; 255 // originally was 2 * Math.log(...), making it try more times to avoid flakiness issues 256 int maxCount = (int) (4 * Math.log(2 * keyBits * hashBits) + 1); 257 while (same != 0xffffffff || diff != 0xffffffff) { 258 int key1 = rand.nextInt(); 259 // flip input bit for key2 260 int key2 = key1 ^ (1 << i); 261 // get hashes 262 int hash1 = function.hashInt(key1).asInt(); 263 int hash2 = function.hashInt(key2).asInt(); 264 // test whether the hash values have same output bits 265 same |= ~(hash1 ^ hash2); 266 // test whether the hash values have different output bits 267 diff |= (hash1 ^ hash2); 268 269 count++; 270 // check whether we've exceeded the probabilistically 271 // likely number of trials to have proven no funneling 272 if (count > maxCount) { 273 Assert.fail("input bit(" + i + ") was found not to affect all " + 274 hashBits + " output bits; The unaffected bits are " + 275 "as follows: " + ~(same & diff) + ". This was " + 276 "determined after " + count + " trials."); 277 } 278 } 279 } 280 } 281 282 /** 283 * Test for avalanche. Avalanche means that output bits differ with roughly 1/2 probability on 284 * different input keys. This test verifies that each possible 1-bit key delta achieves avalanche. 285 * 286 * <p>For more information: http://burtleburtle.net/bob/hash/avalanche.html 287 */ checkAvalanche(HashFunction function, int trials, double epsilon)288 static void checkAvalanche(HashFunction function, int trials, double epsilon) { 289 Random rand = new Random(0); 290 int keyBits = 32; 291 int hashBits = function.bits(); 292 for (int i = 0; i < keyBits; i++) { 293 int[] same = new int[hashBits]; 294 int[] diff = new int[hashBits]; 295 // go through trials to compute probability 296 for (int j = 0; j < trials; j++) { 297 int key1 = rand.nextInt(); 298 // flip input bit for key2 299 int key2 = key1 ^ (1 << i); 300 // compute hash values 301 int hash1 = function.hashInt(key1).asInt(); 302 int hash2 = function.hashInt(key2).asInt(); 303 for (int k = 0; k < hashBits; k++) { 304 if ((hash1 & (1 << k)) == (hash2 & (1 << k))) { 305 same[k] += 1; 306 } else { 307 diff[k] += 1; 308 } 309 } 310 } 311 // measure probability and assert it's within margin of error 312 for (int j = 0; j < hashBits; j++) { 313 double prob = (double) diff[j] / (double) (diff[j] + same[j]); 314 Assert.assertEquals(0.50d, prob, epsilon); 315 } 316 } 317 } 318 319 /** 320 * Test for 2-bit characteristics. A characteristic is a delta in the input which is repeated in 321 * the output. For example, if f() is a block cipher and c is a characteristic, then 322 * f(x^c) = f(x)^c with greater than expected probability. The test for funneling is merely a test 323 * for 1-bit characteristics. 324 * 325 * <p>There is more general code provided by Bob Jenkins to test arbitrarily sized characteristics 326 * using the magic of gaussian elimination: http://burtleburtle.net/bob/crypto/findingc.html. 327 */ checkNo2BitCharacteristics(HashFunction function)328 static void checkNo2BitCharacteristics(HashFunction function) { 329 Random rand = new Random(0); 330 int keyBits = 32; 331 332 // get every one of (keyBits choose 2) deltas: 333 for (int i = 0; i < keyBits; i++) { 334 for (int j = 0; j < keyBits; j++) { 335 if (j <= i) continue; 336 int count = 0; 337 int maxCount = 20; // the probability of error here is miniscule 338 boolean diff = false; 339 340 while (!diff) { 341 int delta = (1 << i) | (1 << j); 342 int key1 = rand.nextInt(); 343 // apply delta 344 int key2 = key1 ^ delta; 345 346 // get hashes 347 int hash1 = function.hashInt(key1).asInt(); 348 int hash2 = function.hashInt(key2).asInt(); 349 350 // this 2-bit candidate delta is not a characteristic 351 // if deltas are different 352 if ((hash1 ^ hash2) != delta) { 353 diff = true; 354 continue; 355 } 356 357 // check if we've exceeded the probabilistically 358 // likely number of trials to have proven 2-bit candidate 359 // is not a characteristic 360 count++; 361 if (count > maxCount) { 362 Assert.fail("2-bit delta (" + i + ", " + j + ") is likely a " + 363 "characteristic for this hash. This was " + 364 "determined after " + count + " trials"); 365 } 366 } 367 } 368 } 369 } 370 371 /** 372 * Test for avalanche with 2-bit deltas. Most probabilities of output bit(j) differing are well 373 * within 50%. 374 */ check2BitAvalanche(HashFunction function, int trials, double epsilon)375 static void check2BitAvalanche(HashFunction function, int trials, double epsilon) { 376 Random rand = new Random(0); 377 int keyBits = 32; 378 int hashBits = function.bits(); 379 for (int bit1 = 0; bit1 < keyBits; bit1++) { 380 for (int bit2 = 0; bit2 < keyBits; bit2++) { 381 if (bit2 <= bit1) continue; 382 int delta = (1 << bit1) | (1 << bit2); 383 int[] same = new int[hashBits]; 384 int[] diff = new int[hashBits]; 385 // go through trials to compute probability 386 for (int j = 0; j < trials; j++) { 387 int key1 = rand.nextInt(); 388 // flip input bit for key2 389 int key2 = key1 ^ delta; 390 // compute hash values 391 int hash1 = function.hashInt(key1).asInt(); 392 int hash2 = function.hashInt(key2).asInt(); 393 for (int k = 0; k < hashBits; k++) { 394 if ((hash1 & (1 << k)) == (hash2 & (1 << k))) { 395 same[k] += 1; 396 } else { 397 diff[k] += 1; 398 } 399 } 400 } 401 // measure probability and assert it's within margin of error 402 for (int j = 0; j < hashBits; j++) { 403 double prob = (double) diff[j] / (double) (diff[j] + same[j]); 404 Assert.assertEquals(0.50d, prob, epsilon); 405 } 406 } 407 } 408 } 409 410 /** 411 * Checks that a Hasher returns the same HashCode when given the same input, and also 412 * that the collision rate looks sane. 413 */ assertInvariants(HashFunction hashFunction)414 static void assertInvariants(HashFunction hashFunction) { 415 int objects = 100; 416 Set<HashCode> hashcodes = Sets.newHashSetWithExpectedSize(objects); 417 for (int i = 0; i < objects; i++) { 418 Object o = new Object(); 419 HashCode hashcode1 = hashFunction.hashObject(o, HashTestUtils.BAD_FUNNEL); 420 HashCode hashcode2 = hashFunction.hashObject(o, HashTestUtils.BAD_FUNNEL); 421 Assert.assertEquals(hashcode1, hashcode2); // idempotent 422 Assert.assertEquals(hashFunction.bits(), hashcode1.bits()); 423 Assert.assertEquals(hashFunction.bits(), hashcode1.asBytes().length * 8); 424 hashcodes.add(hashcode1); 425 } 426 Assert.assertTrue(hashcodes.size() > objects * 0.95); // quite relaxed test 427 428 assertHashBytesThrowsCorrectExceptions(hashFunction); 429 assertIndependentHashers(hashFunction); 430 assertShortcutsAreEquivalent(hashFunction, 512); 431 } 432 assertHashBytesThrowsCorrectExceptions(HashFunction hashFunction)433 static void assertHashBytesThrowsCorrectExceptions(HashFunction hashFunction) { 434 hashFunction.hashBytes(new byte[64], 0, 0); 435 436 try { 437 hashFunction.hashBytes(new byte[128], -1, 128); 438 Assert.fail(); 439 } catch (IndexOutOfBoundsException expected) {} 440 try { 441 hashFunction.hashBytes(new byte[128], 64, 256 /* too long len */); 442 Assert.fail(); 443 } catch (IndexOutOfBoundsException expected) {} 444 try { 445 hashFunction.hashBytes(new byte[64], 0, -1); 446 Assert.fail(); 447 } catch (IndexOutOfBoundsException expected) {} 448 } 449 assertIndependentHashers(HashFunction hashFunction)450 static void assertIndependentHashers(HashFunction hashFunction) { 451 int numActions = 100; 452 // hashcodes from non-overlapping hash computations 453 HashCode expected1 = randomHash(hashFunction, new Random(1L), numActions); 454 HashCode expected2 = randomHash(hashFunction, new Random(2L), numActions); 455 456 // equivalent, but overlapping, computations (should produce the same results as above) 457 Random random1 = new Random(1L); 458 Random random2 = new Random(2L); 459 Hasher hasher1 = hashFunction.newHasher(); 460 Hasher hasher2 = hashFunction.newHasher(); 461 for (int i = 0; i < numActions; i++) { 462 RandomHasherAction.pickAtRandom(random1).performAction(random1, ImmutableSet.of(hasher1)); 463 RandomHasherAction.pickAtRandom(random2).performAction(random2, ImmutableSet.of(hasher2)); 464 } 465 466 Assert.assertEquals(expected1, hasher1.hash()); 467 Assert.assertEquals(expected2, hasher2.hash()); 468 } 469 randomHash(HashFunction hashFunction, Random random, int numActions)470 static HashCode randomHash(HashFunction hashFunction, Random random, int numActions) { 471 Hasher hasher = hashFunction.newHasher(); 472 for (int i = 0; i < numActions; i++) { 473 RandomHasherAction.pickAtRandom(random).performAction(random, ImmutableSet.of(hasher)); 474 } 475 return hasher.hash(); 476 } 477 assertShortcutsAreEquivalent(HashFunction hashFunction, int trials)478 private static void assertShortcutsAreEquivalent(HashFunction hashFunction, int trials) { 479 Random random = new Random(42085L); 480 for (int i = 0; i < trials; i++) { 481 assertHashBytesEquivalence(hashFunction, random); 482 assertHashIntEquivalence(hashFunction, random); 483 assertHashLongEquivalence(hashFunction, random); 484 assertHashStringEquivalence(hashFunction, random); 485 assertHashStringWithSurrogatesEquivalence(hashFunction, random); 486 } 487 } 488 assertHashBytesEquivalence(HashFunction hashFunction, Random random)489 private static void assertHashBytesEquivalence(HashFunction hashFunction, Random random) { 490 int size = random.nextInt(2048); 491 byte[] bytes = new byte[size]; 492 random.nextBytes(bytes); 493 assertEquals(hashFunction.hashBytes(bytes), 494 hashFunction.newHasher(size).putBytes(bytes).hash()); 495 int off = random.nextInt(size); 496 int len = random.nextInt(size - off); 497 assertEquals(hashFunction.hashBytes(bytes, off, len), 498 hashFunction.newHasher(size).putBytes(bytes, off, len).hash()); 499 } 500 assertHashIntEquivalence(HashFunction hashFunction, Random random)501 private static void assertHashIntEquivalence(HashFunction hashFunction, Random random) { 502 int i = random.nextInt(); 503 assertEquals(hashFunction.hashInt(i), 504 hashFunction.newHasher().putInt(i).hash()); 505 } 506 assertHashLongEquivalence(HashFunction hashFunction, Random random)507 private static void assertHashLongEquivalence(HashFunction hashFunction, Random random) { 508 long l = random.nextLong(); 509 assertEquals(hashFunction.hashLong(l), 510 hashFunction.newHasher().putLong(l).hash()); 511 } 512 513 private static final ImmutableSet<Charset> CHARSETS = ImmutableSet.of( 514 Charsets.ISO_8859_1, 515 Charsets.US_ASCII, 516 Charsets.UTF_16, 517 Charsets.UTF_16BE, 518 Charsets.UTF_16LE, 519 Charsets.UTF_8); 520 assertHashStringEquivalence(HashFunction hashFunction, Random random)521 private static void assertHashStringEquivalence(HashFunction hashFunction, Random random) { 522 // Test that only data and data-order is important, not the individual operations. 523 new EqualsTester() 524 .addEqualityGroup( 525 hashFunction.hashUnencodedChars("abc"), 526 hashFunction.newHasher().putUnencodedChars("abc").hash(), 527 hashFunction.newHasher().putUnencodedChars("ab").putUnencodedChars("c").hash(), 528 hashFunction.newHasher().putUnencodedChars("a").putUnencodedChars("bc").hash(), 529 hashFunction.newHasher().putUnencodedChars("a").putUnencodedChars("b") 530 .putUnencodedChars("c").hash(), 531 hashFunction.newHasher().putChar('a').putUnencodedChars("bc").hash(), 532 hashFunction.newHasher().putUnencodedChars("ab").putChar('c').hash(), 533 hashFunction.newHasher().putChar('a').putChar('b').putChar('c').hash()) 534 .testEquals(); 535 536 int size = random.nextInt(2048); 537 byte[] bytes = new byte[size]; 538 random.nextBytes(bytes); 539 String string = new String(bytes); 540 assertEquals(hashFunction.hashUnencodedChars(string), 541 hashFunction.newHasher().putUnencodedChars(string).hash()); 542 for (Charset charset : CHARSETS) { 543 assertEquals(hashFunction.hashString(string, charset), 544 hashFunction.newHasher().putString(string, charset).hash()); 545 } 546 } 547 548 /** 549 * This verifies that putUnencodedChars(String) and hashUnencodedChars(String) are equivalent, 550 * even for funny strings composed by (possibly unmatched, and mostly illegal) surrogate 551 * characters. (But doesn't test that they do the right thing - just their consistency). 552 */ assertHashStringWithSurrogatesEquivalence( HashFunction hashFunction, Random random)553 private static void assertHashStringWithSurrogatesEquivalence( 554 HashFunction hashFunction, Random random) { 555 int size = random.nextInt(8) + 1; 556 char[] chars = new char[size]; 557 for (int i = 0; i < chars.length; i++) { 558 chars[i] = random.nextBoolean() ? randomLowSurrogate(random) : randomHighSurrogate(random); 559 } 560 String string = new String(chars); 561 assertEquals(hashFunction.hashUnencodedChars(string), 562 hashFunction.newHasher().putUnencodedChars(string).hash()); 563 } 564 randomLowSurrogate(Random random)565 static char randomLowSurrogate(Random random) { 566 return (char) (Character.MIN_LOW_SURROGATE 567 + random.nextInt(Character.MAX_LOW_SURROGATE - Character.MIN_LOW_SURROGATE + 1)); 568 } 569 randomHighSurrogate(Random random)570 static char randomHighSurrogate(Random random) { 571 return (char) (Character.MIN_HIGH_SURROGATE 572 + random.nextInt(Character.MAX_HIGH_SURROGATE - Character.MIN_HIGH_SURROGATE + 1)); 573 } 574 } 575