1# test the invariant that 2# iff a==b then hash(a)==hash(b) 3# 4# Also test that hash implementations are inherited as expected 5 6import datetime 7import os 8import sys 9import unittest 10from test.support.script_helper import assert_python_ok 11from collections.abc import Hashable 12 13IS_64BIT = sys.maxsize > 2**32 14 15def lcg(x, length=16): 16 """Linear congruential generator""" 17 if x == 0: 18 return bytes(length) 19 out = bytearray(length) 20 for i in range(length): 21 x = (214013 * x + 2531011) & 0x7fffffff 22 out[i] = (x >> 16) & 0xff 23 return bytes(out) 24 25def pysiphash(uint64): 26 """Convert SipHash24 output to Py_hash_t 27 """ 28 assert 0 <= uint64 < (1 << 64) 29 # simple unsigned to signed int64 30 if uint64 > (1 << 63) - 1: 31 int64 = uint64 - (1 << 64) 32 else: 33 int64 = uint64 34 # mangle uint64 to uint32 35 uint32 = (uint64 ^ uint64 >> 32) & 0xffffffff 36 # simple unsigned to signed int32 37 if uint32 > (1 << 31) - 1: 38 int32 = uint32 - (1 << 32) 39 else: 40 int32 = uint32 41 return int32, int64 42 43def skip_unless_internalhash(test): 44 """Skip decorator for tests that depend on SipHash24 or FNV""" 45 ok = sys.hash_info.algorithm in {"fnv", "siphash24"} 46 msg = "Requires SipHash24 or FNV" 47 return test if ok else unittest.skip(msg)(test) 48 49 50class HashEqualityTestCase(unittest.TestCase): 51 52 def same_hash(self, *objlist): 53 # Hash each object given and fail if 54 # the hash values are not all the same. 55 hashed = list(map(hash, objlist)) 56 for h in hashed[1:]: 57 if h != hashed[0]: 58 self.fail("hashed values differ: %r" % (objlist,)) 59 60 def test_numeric_literals(self): 61 self.same_hash(1, 1, 1.0, 1.0+0.0j) 62 self.same_hash(0, 0.0, 0.0+0.0j) 63 self.same_hash(-1, -1.0, -1.0+0.0j) 64 self.same_hash(-2, -2.0, -2.0+0.0j) 65 66 def test_coerced_integers(self): 67 self.same_hash(int(1), int(1), float(1), complex(1), 68 int('1'), float('1.0')) 69 self.same_hash(int(-2**31), float(-2**31)) 70 self.same_hash(int(1-2**31), float(1-2**31)) 71 self.same_hash(int(2**31-1), float(2**31-1)) 72 # for 64-bit platforms 73 self.same_hash(int(2**31), float(2**31)) 74 self.same_hash(int(-2**63), float(-2**63)) 75 self.same_hash(int(2**63), float(2**63)) 76 77 def test_coerced_floats(self): 78 self.same_hash(int(1.23e300), float(1.23e300)) 79 self.same_hash(float(0.5), complex(0.5, 0.0)) 80 81 def test_unaligned_buffers(self): 82 # The hash function for bytes-like objects shouldn't have 83 # alignment-dependent results (example in issue #16427). 84 b = b"123456789abcdefghijklmnopqrstuvwxyz" * 128 85 for i in range(16): 86 for j in range(16): 87 aligned = b[i:128+j] 88 unaligned = memoryview(b)[i:128+j] 89 self.assertEqual(hash(aligned), hash(unaligned)) 90 91 92_default_hash = object.__hash__ 93class DefaultHash(object): pass 94 95_FIXED_HASH_VALUE = 42 96class FixedHash(object): 97 def __hash__(self): 98 return _FIXED_HASH_VALUE 99 100class OnlyEquality(object): 101 def __eq__(self, other): 102 return self is other 103 104class OnlyInequality(object): 105 def __ne__(self, other): 106 return self is not other 107 108class InheritedHashWithEquality(FixedHash, OnlyEquality): pass 109class InheritedHashWithInequality(FixedHash, OnlyInequality): pass 110 111class NoHash(object): 112 __hash__ = None 113 114class HashInheritanceTestCase(unittest.TestCase): 115 default_expected = [object(), 116 DefaultHash(), 117 OnlyInequality(), 118 ] 119 fixed_expected = [FixedHash(), 120 InheritedHashWithEquality(), 121 InheritedHashWithInequality(), 122 ] 123 error_expected = [NoHash(), 124 OnlyEquality(), 125 ] 126 127 def test_default_hash(self): 128 for obj in self.default_expected: 129 self.assertEqual(hash(obj), _default_hash(obj)) 130 131 def test_fixed_hash(self): 132 for obj in self.fixed_expected: 133 self.assertEqual(hash(obj), _FIXED_HASH_VALUE) 134 135 def test_error_hash(self): 136 for obj in self.error_expected: 137 self.assertRaises(TypeError, hash, obj) 138 139 def test_hashable(self): 140 objects = (self.default_expected + 141 self.fixed_expected) 142 for obj in objects: 143 self.assertIsInstance(obj, Hashable) 144 145 def test_not_hashable(self): 146 for obj in self.error_expected: 147 self.assertNotIsInstance(obj, Hashable) 148 149 150# Issue #4701: Check that some builtin types are correctly hashable 151class DefaultIterSeq(object): 152 seq = range(10) 153 def __len__(self): 154 return len(self.seq) 155 def __getitem__(self, index): 156 return self.seq[index] 157 158class HashBuiltinsTestCase(unittest.TestCase): 159 hashes_to_check = [enumerate(range(10)), 160 iter(DefaultIterSeq()), 161 iter(lambda: 0, 0), 162 ] 163 164 def test_hashes(self): 165 _default_hash = object.__hash__ 166 for obj in self.hashes_to_check: 167 self.assertEqual(hash(obj), _default_hash(obj)) 168 169class HashRandomizationTests: 170 171 # Each subclass should define a field "repr_", containing the repr() of 172 # an object to be tested 173 174 def get_hash_command(self, repr_): 175 return 'print(hash(eval(%a)))' % repr_ 176 177 def get_hash(self, repr_, seed=None): 178 env = os.environ.copy() 179 env['__cleanenv'] = True # signal to assert_python not to do a copy 180 # of os.environ on its own 181 if seed is not None: 182 env['PYTHONHASHSEED'] = str(seed) 183 else: 184 env.pop('PYTHONHASHSEED', None) 185 out = assert_python_ok( 186 '-c', self.get_hash_command(repr_), 187 **env) 188 stdout = out[1].strip() 189 return int(stdout) 190 191 def test_randomized_hash(self): 192 # two runs should return different hashes 193 run1 = self.get_hash(self.repr_, seed='random') 194 run2 = self.get_hash(self.repr_, seed='random') 195 self.assertNotEqual(run1, run2) 196 197class StringlikeHashRandomizationTests(HashRandomizationTests): 198 repr_ = None 199 repr_long = None 200 201 # 32bit little, 64bit little, 32bit big, 64bit big 202 known_hashes = { 203 'djba33x': [ # only used for small strings 204 # seed 0, 'abc' 205 [193485960, 193485960, 193485960, 193485960], 206 # seed 42, 'abc' 207 [-678966196, 573763426263223372, -820489388, -4282905804826039665], 208 ], 209 'siphash24': [ 210 # NOTE: PyUCS2 layout depends on endianness 211 # seed 0, 'abc' 212 [1198583518, 4596069200710135518, 1198583518, 4596069200710135518], 213 # seed 42, 'abc' 214 [273876886, -4501618152524544106, 273876886, -4501618152524544106], 215 # seed 42, 'abcdefghijk' 216 [-1745215313, 4436719588892876975, -1745215313, 4436719588892876975], 217 # seed 0, 'äú∑ℇ' 218 [493570806, 5749986484189612790, -1006381564, -5915111450199468540], 219 # seed 42, 'äú∑ℇ' 220 [-1677110816, -2947981342227738144, -1860207793, -4296699217652516017], 221 ], 222 'fnv': [ 223 # seed 0, 'abc' 224 [-1600925533, 1453079729188098211, -1600925533, 225 1453079729188098211], 226 # seed 42, 'abc' 227 [-206076799, -4410911502303878509, -1024014457, 228 -3570150969479994130], 229 # seed 42, 'abcdefghijk' 230 [811136751, -5046230049376118746, -77208053 , 231 -4779029615281019666], 232 # seed 0, 'äú∑ℇ' 233 [44402817, 8998297579845987431, -1956240331, 234 -782697888614047887], 235 # seed 42, 'äú∑ℇ' 236 [-283066365, -4576729883824601543, -271871407, 237 -3927695501187247084], 238 ] 239 } 240 241 def get_expected_hash(self, position, length): 242 if length < sys.hash_info.cutoff: 243 algorithm = "djba33x" 244 else: 245 algorithm = sys.hash_info.algorithm 246 if sys.byteorder == 'little': 247 platform = 1 if IS_64BIT else 0 248 else: 249 assert(sys.byteorder == 'big') 250 platform = 3 if IS_64BIT else 2 251 return self.known_hashes[algorithm][position][platform] 252 253 def test_null_hash(self): 254 # PYTHONHASHSEED=0 disables the randomized hash 255 known_hash_of_obj = self.get_expected_hash(0, 3) 256 257 # Randomization is enabled by default: 258 self.assertNotEqual(self.get_hash(self.repr_), known_hash_of_obj) 259 260 # It can also be disabled by setting the seed to 0: 261 self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj) 262 263 @skip_unless_internalhash 264 def test_fixed_hash(self): 265 # test a fixed seed for the randomized hash 266 # Note that all types share the same values: 267 h = self.get_expected_hash(1, 3) 268 self.assertEqual(self.get_hash(self.repr_, seed=42), h) 269 270 @skip_unless_internalhash 271 def test_long_fixed_hash(self): 272 if self.repr_long is None: 273 return 274 h = self.get_expected_hash(2, 11) 275 self.assertEqual(self.get_hash(self.repr_long, seed=42), h) 276 277 278class StrHashRandomizationTests(StringlikeHashRandomizationTests, 279 unittest.TestCase): 280 repr_ = repr('abc') 281 repr_long = repr('abcdefghijk') 282 repr_ucs2 = repr('äú∑ℇ') 283 284 @skip_unless_internalhash 285 def test_empty_string(self): 286 self.assertEqual(hash(""), 0) 287 288 @skip_unless_internalhash 289 def test_ucs2_string(self): 290 h = self.get_expected_hash(3, 6) 291 self.assertEqual(self.get_hash(self.repr_ucs2, seed=0), h) 292 h = self.get_expected_hash(4, 6) 293 self.assertEqual(self.get_hash(self.repr_ucs2, seed=42), h) 294 295class BytesHashRandomizationTests(StringlikeHashRandomizationTests, 296 unittest.TestCase): 297 repr_ = repr(b'abc') 298 repr_long = repr(b'abcdefghijk') 299 300 @skip_unless_internalhash 301 def test_empty_string(self): 302 self.assertEqual(hash(b""), 0) 303 304class MemoryviewHashRandomizationTests(StringlikeHashRandomizationTests, 305 unittest.TestCase): 306 repr_ = "memoryview(b'abc')" 307 repr_long = "memoryview(b'abcdefghijk')" 308 309 @skip_unless_internalhash 310 def test_empty_string(self): 311 self.assertEqual(hash(memoryview(b"")), 0) 312 313class DatetimeTests(HashRandomizationTests): 314 def get_hash_command(self, repr_): 315 return 'import datetime; print(hash(%s))' % repr_ 316 317class DatetimeDateTests(DatetimeTests, unittest.TestCase): 318 repr_ = repr(datetime.date(1066, 10, 14)) 319 320class DatetimeDatetimeTests(DatetimeTests, unittest.TestCase): 321 repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7)) 322 323class DatetimeTimeTests(DatetimeTests, unittest.TestCase): 324 repr_ = repr(datetime.time(0)) 325 326 327class HashDistributionTestCase(unittest.TestCase): 328 329 def test_hash_distribution(self): 330 # check for hash collision 331 base = "abcdefghabcdefg" 332 for i in range(1, len(base)): 333 prefix = base[:i] 334 with self.subTest(prefix=prefix): 335 s15 = set() 336 s255 = set() 337 for c in range(256): 338 h = hash(prefix + chr(c)) 339 s15.add(h & 0xf) 340 s255.add(h & 0xff) 341 # SipHash24 distribution depends on key, usually > 60% 342 self.assertGreater(len(s15), 8, prefix) 343 self.assertGreater(len(s255), 128, prefix) 344 345if __name__ == "__main__": 346 unittest.main() 347