1 /* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 // This is the murmur3 32 bit hash implementation 17 // The main structure and constants were taken from Austin Appleby. 18 // From his gitlab (https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp): 19 // - MurmurHash3 was written by Austin Appleby, and is placed in the public 20 // - domain. The author hereby disclaims copyright to this source code. 21 22 #ifndef PANDA_LIBPANDABASE_UTILS_MURMUR3_HASH_H_ 23 #define PANDA_LIBPANDABASE_UTILS_MURMUR3_HASH_H_ 24 25 #include "hash_base.h" 26 #include "logger.h" 27 28 namespace panda { 29 30 // In general murmur hash looks like that: 31 // key = |....|....|....|.|.|.| 32 // 32 32 32 8 8 8 33 // Firstly, we proceed each 32 bits block from key; 34 // Secondly, we proceed last 8 bits block which were not covered in previous step. 35 template <uint32_t seed_value> 36 class MurmurHash32 final : public HashBase<MurmurHash32<seed_value>> { 37 public: GetHash32WithSeedImpl(const uint8_t * key,size_t len,uint32_t seed)38 static uint32_t GetHash32WithSeedImpl(const uint8_t *key, size_t len, uint32_t seed) 39 { 40 return MurmurHash3(key, len, seed); 41 } GetHash32Impl(const uint8_t * key,size_t len)42 static uint32_t GetHash32Impl(const uint8_t *key, size_t len) 43 { 44 return GetHash32WithSeedImpl(key, len, seed_value); 45 } GetHash32StringImpl(const uint8_t * mutf8_string)46 static uint32_t GetHash32StringImpl(const uint8_t *mutf8_string) 47 { 48 return MurmurHash3String(mutf8_string, seed_value); 49 } GetHash32StringWithSeedImpl(const uint8_t * mutf8_string,uint32_t seed)50 static uint32_t GetHash32StringWithSeedImpl(const uint8_t *mutf8_string, uint32_t seed) 51 { 52 return MurmurHash3String(mutf8_string, seed); 53 } 54 55 private: 56 // Here are the main constants for hash counting: 57 // Many of them look like some kinds of magic 58 static constexpr uint32_t C1 = 0xCC9E2D51U; 59 static constexpr uint32_t C2 = 0x1B873593U; 60 static constexpr uint32_t MAX_BITS = 32; 61 static constexpr uint32_t FINALIZE_FIRST_SHIFT = 16; 62 static constexpr uint32_t FINALIZE_SECOND_SHIFT = 13; 63 static constexpr uint32_t FINALIZE_THIRD_SHIFT = 16; 64 static constexpr uint32_t FINALIZE_FIRST_MULTIPLICATOR = 0x85EBCA6BU; 65 static constexpr uint32_t FINALIZE_SECOND_MULTIPLICATOR = 0xC2BAE35U; 66 static constexpr uint32_t MAIN_FIRST_SHIFT = 15; 67 static constexpr uint32_t MAIN_SECOND_SHIFT = 13; 68 static constexpr uint32_t MAIN_CONSTANT = 0xE6546B64U; 69 static constexpr uint32_t MAIN_MULTIPLICATOR = 5; 70 static constexpr uint32_t TAIL_SHIFT = 8; 71 static constexpr uint32_t TAIL_LAST_SHIFT = 15; 72 static constexpr uint32_t BLOCK_SIZE = 4; 73 Rotl(uint32_t word,uint8_t shift)74 static uint32_t Rotl(uint32_t word, uint8_t shift) 75 { 76 return (word << shift) | (word >> (MAX_BITS - shift)); 77 } 78 79 // Finalize the result of the hash function Finalize(uint32_t h)80 static uint32_t Finalize(uint32_t h) 81 { 82 h ^= h >> FINALIZE_FIRST_SHIFT; 83 h *= FINALIZE_FIRST_MULTIPLICATOR; 84 h ^= h >> FINALIZE_SECOND_SHIFT; 85 h *= FINALIZE_SECOND_MULTIPLICATOR; 86 h ^= h >> FINALIZE_THIRD_SHIFT; 87 return h; 88 } 89 MurmurHash3(const uint8_t * key,size_t len,uint32_t seed)90 static uint32_t MurmurHash3(const uint8_t *key, size_t len, uint32_t seed) 91 { 92 // We start hashing from the seed 93 uint32_t hash = seed; 94 95 // Do the main part: 96 // Iterate for each 32bits 97 auto blocks = reinterpret_cast<uintptr_t>(key); 98 std::array<uint8_t, BLOCK_SIZE> memblock = {'\0', '\0', '\0', '\0'}; 99 for (size_t i = len / BLOCK_SIZE; i != 0; i--) { 100 for (auto &j : memblock) { 101 j = *reinterpret_cast<uint8_t *>(blocks); 102 blocks += sizeof(uint8_t); 103 } 104 // Do this because we don't want to dispatch Big/Little endianness. 105 uint32_t k1 = *reinterpret_cast<uint32_t *>(memblock.data()); 106 107 k1 *= C1; 108 k1 = Rotl(k1, MAIN_FIRST_SHIFT); 109 k1 *= C2; 110 111 hash ^= k1; 112 hash = Rotl(hash, MAIN_SECOND_SHIFT); 113 hash = hash * MAIN_MULTIPLICATOR + MAIN_CONSTANT; 114 } 115 116 // Proceed the tail: 117 // blocks is a pointer to the end of 32bits section 118 auto tail = blocks; 119 uint32_t k1 = 0; 120 for (size_t i = len & 3U; i > 0; i--) { 121 // Get ((uint8_t*)tail)[i - 1]: 122 uintptr_t block_pointer = tail + sizeof(uint8_t) * (i - 1); 123 uint8_t block = *reinterpret_cast<uint8_t *>(block_pointer); 124 uint32_t temp = static_cast<uint32_t>(block) << (TAIL_SHIFT * (i - 1U)); 125 k1 ^= temp; 126 if (i == 1) { 127 k1 = Rotl(k1, TAIL_LAST_SHIFT); 128 k1 *= C2; 129 hash ^= k1; 130 } 131 } 132 133 // Finalize the result 134 hash ^= len; 135 hash = Finalize(hash); 136 137 return hash; 138 } 139 MurmurHash3String(const uint8_t * mutf8_string,uint32_t seed)140 static uint32_t MurmurHash3String(const uint8_t *mutf8_string, uint32_t seed) 141 { 142 // We start hashing from the seed 143 uint32_t hash = seed; 144 // We should still compute length of the string, because we will need it later 145 size_t mutf8_length = 0; 146 // Do the main part: 147 // Iterate for each 32bits 148 auto blocks = reinterpret_cast<uintptr_t>(mutf8_string); 149 std::array<uint8_t, BLOCK_SIZE> memblock = {'\0', '\0', '\0', '\0'}; 150 size_t tail_len = 0; 151 while (true) { 152 tail_len = 0; 153 for (unsigned char &i : memblock) { 154 i = *reinterpret_cast<uint8_t *>(blocks); 155 blocks += sizeof(uint8_t); 156 if (i == '\0') { 157 break; 158 } 159 tail_len++; 160 } 161 if (tail_len != BLOCK_SIZE) { 162 // We couldn't read four bytes value 163 break; 164 } 165 // Do this because we don't want to dispatch Big/Little endianness. 166 uint32_t k1 = *reinterpret_cast<uint32_t *>(memblock.data()); 167 168 k1 *= C1; 169 k1 = Rotl(k1, MAIN_FIRST_SHIFT); 170 k1 *= C2; 171 172 hash ^= k1; 173 hash = Rotl(hash, MAIN_SECOND_SHIFT); 174 hash = hash * MAIN_MULTIPLICATOR + MAIN_CONSTANT; 175 mutf8_length += BLOCK_SIZE; 176 } 177 178 // Proceed the tail 179 mutf8_length += tail_len; 180 uint32_t k1 = 0; 181 for (size_t i = tail_len; i > 0; i--) { 182 uint8_t block = memblock[i - 1U]; 183 uint32_t temp = static_cast<uint32_t>(block) << (TAIL_SHIFT * (i - 1U)); 184 k1 ^= temp; 185 if (i == 1) { 186 k1 = Rotl(k1, TAIL_LAST_SHIFT); 187 k1 *= C2; 188 hash ^= k1; 189 } 190 } 191 192 // Finalize the result 193 hash ^= mutf8_length; 194 hash = Finalize(hash); 195 196 return hash; 197 } 198 }; 199 200 } // namespace panda 201 202 #endif // PANDA_LIBPANDABASE_UTILS_MURMUR3_HASH_H_ 203