1 /** 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 // This is the murmur3 32 bit hash implementation 16 // The main structure and constants were taken from Austin Appleby. 17 // From his gitlab (https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp): 18 // - MurmurHash3 was written by Austin Appleby, and is placed in the public 19 // - domain. The author hereby disclaims copyright to this source code. 20 #ifndef LIBPANDABASE_UTILS_MURMUR3_HASH_H_ 21 #define LIBPANDABASE_UTILS_MURMUR3_HASH_H_ 22 23 #include "hash_base.h" 24 #include "logger.h" 25 26 namespace panda { 27 28 // In general murmur hash looks like that: 29 // key = |....|....|....|.|.|.| 30 // 32 32 32 8 8 8 31 // Firstly, we proceed each 32 bits block from key; 32 // Secondly, we proceed last 8 bits block which were not covered in previous step. 33 template <uint32_t seed_value> 34 class MurmurHash32 final : public HashBase<MurmurHash32<seed_value>> { 35 public: GetHash32WithSeedImpl(const uint8_t * key,size_t len,uint32_t seed)36 static uint32_t GetHash32WithSeedImpl(const uint8_t *key, size_t len, uint32_t seed) 37 { 38 return MurmurHash3(key, len, seed); 39 } GetHash32Impl(const uint8_t * key,size_t len)40 static uint32_t GetHash32Impl(const uint8_t *key, size_t len) 41 { 42 return GetHash32WithSeedImpl(key, len, seed_value); 43 } GetHash32StringImpl(const uint8_t * mutf8_string)44 static uint32_t GetHash32StringImpl(const uint8_t *mutf8_string) 45 { 46 return MurmurHash3String(mutf8_string, seed_value); 47 } GetHash32StringWithSeedImpl(const uint8_t * mutf8_string,uint32_t seed)48 static uint32_t GetHash32StringWithSeedImpl(const uint8_t *mutf8_string, uint32_t seed) 49 { 50 return MurmurHash3String(mutf8_string, seed); 51 } 52 53 private: 54 // Here are the main constants for hash counting: 55 // Many of them looks like some kind of magic 56 static constexpr uint32_t C1 = 0xCC9E2D51U; 57 static constexpr uint32_t C2 = 0x1B873593U; 58 static constexpr uint32_t MAX_BITS = 32; 59 static constexpr uint32_t FINALIZE_FIRST_SHIFT = 16; 60 static constexpr uint32_t FINALIZE_SECOND_SHIFT = 13; 61 static constexpr uint32_t FINALIZE_THIRD_SHIFT = 16; 62 static constexpr uint32_t FINALIZE_FIRST_MULTIPLICATOR = 0x85EBCA6BU; 63 static constexpr uint32_t FINALIZE_SECOND_MULTIPLICATOR = 0xC2BAE35U; 64 static constexpr uint32_t MAIN_FIRST_SHIFT = 15; 65 static constexpr uint32_t MAIN_SECOND_SHIFT = 13; 66 static constexpr uint32_t MAIN_CONSTANT = 0xE6546B64U; 67 static constexpr uint32_t MAIN_MULTIPLICATOR = 5; 68 static constexpr uint32_t TAIL_SHIFT = 8; 69 static constexpr uint32_t TAIL_LAST_SHIFT = 15; 70 static constexpr uint32_t BLOCK_SIZE = 4; 71 Rotl(uint32_t word,uint8_t shift)72 static uint32_t Rotl(uint32_t word, uint8_t shift) 73 { 74 return (word << shift) | (word >> (MAX_BITS - shift)); 75 } 76 77 // Finilize the result of the hash function Finalize(uint32_t h)78 static uint32_t Finalize(uint32_t h) 79 { 80 h ^= h >> FINALIZE_FIRST_SHIFT; 81 h *= FINALIZE_FIRST_MULTIPLICATOR; 82 h ^= h >> FINALIZE_SECOND_SHIFT; 83 h *= FINALIZE_SECOND_MULTIPLICATOR; 84 h ^= h >> FINALIZE_THIRD_SHIFT; 85 return h; 86 } 87 MurmurHash3(const uint8_t * key,size_t len,uint32_t seed)88 static uint32_t MurmurHash3(const uint8_t *key, size_t len, uint32_t seed) 89 { 90 // We start hashing from the seed 91 uint32_t hash = seed; 92 93 // Do the main part: 94 // Iterate for each 32bits 95 auto blocks = reinterpret_cast<uintptr_t>(key); 96 std::array<uint8_t, BLOCK_SIZE> memblock = {'\0', '\0', '\0', '\0'}; 97 for (size_t i = len / BLOCK_SIZE; i != 0; i--) { 98 for (auto &j : memblock) { 99 j = *reinterpret_cast<uint8_t *>(blocks); 100 blocks += sizeof(uint8_t); 101 } 102 // Do this because we don't want to dispatch Big/Little endianness. 103 uint32_t k1 = *reinterpret_cast<uint32_t *>(memblock.data()); 104 105 k1 *= C1; 106 k1 = Rotl(k1, MAIN_FIRST_SHIFT); 107 k1 *= C2; 108 109 hash ^= k1; 110 hash = Rotl(hash, MAIN_SECOND_SHIFT); 111 hash = hash * MAIN_MULTIPLICATOR + MAIN_CONSTANT; 112 } 113 114 // Proceed the tail: 115 // blocks is a pointer to the end of 32bits section 116 auto tail = blocks; 117 uint32_t k1 = 0; 118 for (size_t i = len & 3U; i > 0; i--) { 119 // Get ((uint8_t*)tail)[i - 1]: 120 uintptr_t block_pointer = tail + sizeof(uint8_t) * (i - 1); 121 uint8_t block = *reinterpret_cast<uint8_t *>(block_pointer); 122 uint32_t temp = static_cast<uint32_t>(block) << (TAIL_SHIFT * (i - 1U)); 123 k1 ^= temp; 124 if (i == 1) { 125 k1 = Rotl(k1, TAIL_LAST_SHIFT); 126 k1 *= C2; 127 hash ^= k1; 128 } 129 } 130 131 // Finalize the result 132 hash ^= len; 133 hash = Finalize(hash); 134 135 return hash; 136 } 137 MurmurHash3String(const uint8_t * mutf8_string,uint32_t seed)138 static uint32_t MurmurHash3String(const uint8_t *mutf8_string, uint32_t seed) 139 { 140 // We start hashing from the seed 141 uint32_t hash = seed; 142 // We should still compute length of the string, we will need it later 143 size_t mutf8_length = 0; 144 // Do the main part: 145 // Iterate for each 32bits 146 auto blocks = reinterpret_cast<uintptr_t>(mutf8_string); 147 std::array<uint8_t, BLOCK_SIZE> memblock = {'\0', '\0', '\0', '\0'}; 148 size_t tail_len = 0; 149 while (true) { 150 tail_len = 0; 151 for (unsigned char &i : memblock) { 152 i = *reinterpret_cast<uint8_t *>(blocks); 153 blocks += sizeof(uint8_t); 154 if (i == '\0') { 155 break; 156 } 157 tail_len++; 158 } 159 if (tail_len != BLOCK_SIZE) { 160 // We couldn't read four 8bytes value 161 break; 162 } 163 // Do this because we don't want to dispatch Big/Little endianness. 164 uint32_t k1 = *reinterpret_cast<uint32_t *>(memblock.data()); 165 166 k1 *= C1; 167 k1 = Rotl(k1, MAIN_FIRST_SHIFT); 168 k1 *= C2; 169 170 hash ^= k1; 171 hash = Rotl(hash, MAIN_SECOND_SHIFT); 172 hash = hash * MAIN_MULTIPLICATOR + MAIN_CONSTANT; 173 mutf8_length += BLOCK_SIZE; 174 } 175 176 // Proceed the tail: 177 178 mutf8_length += tail_len; 179 uint32_t k1 = 0; 180 for (size_t i = tail_len; i > 0; i--) { 181 uint8_t block = memblock[i - 1U]; 182 uint32_t temp = static_cast<uint32_t>(block) << (TAIL_SHIFT * (i - 1U)); 183 k1 ^= temp; 184 if (i == 1) { 185 k1 = Rotl(k1, TAIL_LAST_SHIFT); 186 k1 *= C2; 187 hash ^= k1; 188 } 189 } 190 191 // Finalize the result 192 hash ^= mutf8_length; 193 hash = Finalize(hash); 194 195 return hash; 196 } 197 }; 198 199 } // namespace panda 200 201 #endif // LIBPANDABASE_UTILS_MURMUR3_HASH_H_ 202