1 /** 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 // This is the murmur3 32 bit hash implementation 16 // The main structure and constants were taken from Austin Appleby. 17 // From his gitlab (https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp): 18 // - MurmurHash3 was written by Austin Appleby, and is placed in the public 19 // - domain. The author hereby disclaims copyright to this source code. 20 21 #ifndef LIBPANDABASE_UTILS_MURMUR3_HASH_H 22 #define LIBPANDABASE_UTILS_MURMUR3_HASH_H 23 24 #include "hash_base.h" 25 #include "logger.h" 26 27 namespace panda { 28 29 // In general murmur hash looks like that: 30 // key = |....|....|....|.|.|.| 31 // 32 32 32 8 8 8 32 // Firstly, we proceed each 32 bits block from key; 33 // Secondly, we proceed last 8 bits block which were not covered in previous step. 34 template <uint32_t seed_value> 35 class MurmurHash32 final : public HashBase<MurmurHash32<seed_value>> { 36 public: GetHash32WithSeedImpl(const uint8_t * key,size_t len,uint32_t seed)37 static uint32_t GetHash32WithSeedImpl(const uint8_t *key, size_t len, uint32_t seed) 38 { 39 return MurmurHash3(key, len, seed); 40 } GetHash32Impl(const uint8_t * key,size_t len)41 static uint32_t GetHash32Impl(const uint8_t *key, size_t len) 42 { 43 return GetHash32WithSeedImpl(key, len, seed_value); 44 } GetHash32StringImpl(const uint8_t * mutf8_string)45 static uint32_t GetHash32StringImpl(const uint8_t *mutf8_string) 46 { 47 return MurmurHash3String(mutf8_string, seed_value); 48 } GetHash32StringWithSeedImpl(const uint8_t * mutf8_string,uint32_t seed)49 static uint32_t GetHash32StringWithSeedImpl(const uint8_t *mutf8_string, uint32_t seed) 50 { 51 return MurmurHash3String(mutf8_string, seed); 52 } 53 54 private: 55 // Here are the main constants for hash counting: 56 // Many of them looks like some kind of magic 57 static constexpr uint32_t C1 = 0xCC9E2D51U; 58 static constexpr uint32_t C2 = 0x1B873593U; 59 static constexpr uint32_t MAX_BITS = 32; 60 static constexpr uint32_t FINALIZE_FIRST_SHIFT = 16; 61 static constexpr uint32_t FINALIZE_SECOND_SHIFT = 13; 62 static constexpr uint32_t FINALIZE_THIRD_SHIFT = 16; 63 static constexpr uint32_t FINALIZE_FIRST_MULTIPLICATOR = 0x85EBCA6BU; 64 static constexpr uint32_t FINALIZE_SECOND_MULTIPLICATOR = 0xC2BAE35U; 65 static constexpr uint32_t MAIN_FIRST_SHIFT = 15; 66 static constexpr uint32_t MAIN_SECOND_SHIFT = 13; 67 static constexpr uint32_t MAIN_CONSTANT = 0xE6546B64U; 68 static constexpr uint32_t MAIN_MULTIPLICATOR = 5; 69 static constexpr uint32_t TAIL_SHIFT = 8; 70 static constexpr uint32_t TAIL_LAST_SHIFT = 15; 71 static constexpr uint32_t BLOCK_SIZE = 4; 72 Rotl(uint32_t word,uint8_t shift)73 static uint32_t Rotl(uint32_t word, uint8_t shift) 74 { 75 return (word << shift) | (word >> (MAX_BITS - shift)); 76 } 77 78 // Finilize the result of the hash function Finalize(uint32_t h)79 static uint32_t Finalize(uint32_t h) 80 { 81 h ^= h >> FINALIZE_FIRST_SHIFT; 82 h *= FINALIZE_FIRST_MULTIPLICATOR; 83 h ^= h >> FINALIZE_SECOND_SHIFT; 84 h *= FINALIZE_SECOND_MULTIPLICATOR; 85 h ^= h >> FINALIZE_THIRD_SHIFT; 86 return h; 87 } 88 MurmurHash3(const uint8_t * key,size_t len,uint32_t seed)89 static uint32_t MurmurHash3(const uint8_t *key, size_t len, uint32_t seed) 90 { 91 // We start hashing from the seed 92 uint32_t hash = seed; 93 94 // Do the main part: 95 // Iterate for each 32bits 96 auto blocks = reinterpret_cast<uintptr_t>(key); 97 std::array<uint8_t, BLOCK_SIZE> memblock = {'\0', '\0', '\0', '\0'}; 98 for (size_t i = len / BLOCK_SIZE; i != 0; i--) { 99 for (auto &j : memblock) { 100 j = *reinterpret_cast<uint8_t *>(blocks); 101 blocks += sizeof(uint8_t); 102 } 103 // Do this because we don't want to dispatch Big/Little endianness. 104 uint32_t k1 = *reinterpret_cast<uint32_t *>(memblock.data()); 105 106 k1 *= C1; 107 k1 = Rotl(k1, MAIN_FIRST_SHIFT); 108 k1 *= C2; 109 110 hash ^= k1; 111 hash = Rotl(hash, MAIN_SECOND_SHIFT); 112 hash = hash * MAIN_MULTIPLICATOR + MAIN_CONSTANT; 113 } 114 115 // Proceed the tail: 116 // blocks is a pointer to the end of 32bits section 117 auto tail = blocks; 118 uint32_t k1 = 0; 119 for (size_t i = len & 3U; i > 0; i--) { 120 // Get ((uint8_t*)tail)[i - 1]: 121 uintptr_t block_pointer = tail + sizeof(uint8_t) * (i - 1); 122 uint8_t block = *reinterpret_cast<uint8_t *>(block_pointer); 123 uint32_t temp = static_cast<uint32_t>(block) << (TAIL_SHIFT * (i - 1U)); 124 k1 ^= temp; 125 if (i == 1) { 126 k1 = Rotl(k1, TAIL_LAST_SHIFT); 127 k1 *= C2; 128 hash ^= k1; 129 } 130 } 131 132 // Finalize the result 133 hash ^= len; 134 hash = Finalize(hash); 135 136 return hash; 137 } 138 MurmurHash3String(const uint8_t * mutf8_string,uint32_t seed)139 static uint32_t MurmurHash3String(const uint8_t *mutf8_string, uint32_t seed) 140 { 141 // We start hashing from the seed 142 uint32_t hash = seed; 143 // We should still compute length of the string, we will need it later 144 size_t mutf8_length = 0; 145 // Do the main part: 146 // Iterate for each 32bits 147 auto blocks = reinterpret_cast<uintptr_t>(mutf8_string); 148 std::array<uint8_t, BLOCK_SIZE> memblock = {'\0', '\0', '\0', '\0'}; 149 size_t tail_len = 0; 150 while (true) { 151 tail_len = 0; 152 for (unsigned char &i : memblock) { 153 i = *reinterpret_cast<uint8_t *>(blocks); 154 blocks += sizeof(uint8_t); 155 if (i == '\0') { 156 break; 157 } 158 tail_len++; 159 } 160 if (tail_len != BLOCK_SIZE) { 161 // We couldn't read four 8bytes value 162 break; 163 } 164 // Do this because we don't want to dispatch Big/Little endianness. 165 uint32_t k1 = *reinterpret_cast<uint32_t *>(memblock.data()); 166 167 k1 *= C1; 168 k1 = Rotl(k1, MAIN_FIRST_SHIFT); 169 k1 *= C2; 170 171 hash ^= k1; 172 hash = Rotl(hash, MAIN_SECOND_SHIFT); 173 hash = hash * MAIN_MULTIPLICATOR + MAIN_CONSTANT; 174 mutf8_length += BLOCK_SIZE; 175 } 176 177 // Proceed the tail: 178 179 mutf8_length += tail_len; 180 uint32_t k1 = 0; 181 for (size_t i = tail_len; i > 0; i--) { 182 uint8_t block = memblock[i - 1U]; 183 uint32_t temp = static_cast<uint32_t>(block) << (TAIL_SHIFT * (i - 1U)); 184 k1 ^= temp; 185 if (i == 1) { 186 k1 = Rotl(k1, TAIL_LAST_SHIFT); 187 k1 *= C2; 188 hash ^= k1; 189 } 190 } 191 192 // Finalize the result 193 hash ^= mutf8_length; 194 hash = Finalize(hash); 195 196 return hash; 197 } 198 }; 199 200 } // namespace panda 201 202 #endif // LIBPANDABASE_UTILS_MURMUR3_HASH_H 203