• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 // This is the murmur3 32 bit hash implementation
16 // The main structure and constants were taken from Austin Appleby.
17 // From his gitlab (https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp):
18 // - MurmurHash3 was written by Austin Appleby, and is placed in the public
19 // - domain. The author hereby disclaims copyright to this source code.
20 #ifndef LIBPANDABASE_UTILS_MURMUR3_HASH_H_
21 #define LIBPANDABASE_UTILS_MURMUR3_HASH_H_
22 
23 #include "hash_base.h"
24 #include "logger.h"
25 
26 namespace panda {
27 
28 // In general murmur hash looks like that:
29 // key = |....|....|....|.|.|.|
30 //         32   32   32  8 8 8
31 // Firstly, we proceed each 32 bits block from key;
32 // Secondly, we proceed last 8 bits block which were not covered in previous step.
33 template <uint32_t seed_value>
34 class MurmurHash32 final : public HashBase<MurmurHash32<seed_value>> {
35 public:
GetHash32WithSeedImpl(const uint8_t * key,size_t len,uint32_t seed)36     static uint32_t GetHash32WithSeedImpl(const uint8_t *key, size_t len, uint32_t seed)
37     {
38         return MurmurHash3(key, len, seed);
39     }
GetHash32Impl(const uint8_t * key,size_t len)40     static uint32_t GetHash32Impl(const uint8_t *key, size_t len)
41     {
42         return GetHash32WithSeedImpl(key, len, seed_value);
43     }
GetHash32StringImpl(const uint8_t * mutf8_string)44     static uint32_t GetHash32StringImpl(const uint8_t *mutf8_string)
45     {
46         return MurmurHash3String(mutf8_string, seed_value);
47     }
GetHash32StringWithSeedImpl(const uint8_t * mutf8_string,uint32_t seed)48     static uint32_t GetHash32StringWithSeedImpl(const uint8_t *mutf8_string, uint32_t seed)
49     {
50         return MurmurHash3String(mutf8_string, seed);
51     }
52 
53 private:
54     // Here are the main constants for hash counting:
55     // Many of them looks like some kind of magic
56     static constexpr uint32_t C1 = 0xCC9E2D51U;
57     static constexpr uint32_t C2 = 0x1B873593U;
58     static constexpr uint32_t MAX_BITS = 32;
59     static constexpr uint32_t FINALIZE_FIRST_SHIFT = 16;
60     static constexpr uint32_t FINALIZE_SECOND_SHIFT = 13;
61     static constexpr uint32_t FINALIZE_THIRD_SHIFT = 16;
62     static constexpr uint32_t FINALIZE_FIRST_MULTIPLICATOR = 0x85EBCA6BU;
63     static constexpr uint32_t FINALIZE_SECOND_MULTIPLICATOR = 0xC2BAE35U;
64     static constexpr uint32_t MAIN_FIRST_SHIFT = 15;
65     static constexpr uint32_t MAIN_SECOND_SHIFT = 13;
66     static constexpr uint32_t MAIN_CONSTANT = 0xE6546B64U;
67     static constexpr uint32_t MAIN_MULTIPLICATOR = 5;
68     static constexpr uint32_t TAIL_SHIFT = 8;
69     static constexpr uint32_t TAIL_LAST_SHIFT = 15;
70     static constexpr uint32_t BLOCK_SIZE = 4;
71 
Rotl(uint32_t word,uint8_t shift)72     static uint32_t Rotl(uint32_t word, uint8_t shift)
73     {
74         return (word << shift) | (word >> (MAX_BITS - shift));
75     }
76 
77     // Finilize the result of the hash function
Finalize(uint32_t h)78     static uint32_t Finalize(uint32_t h)
79     {
80         h ^= h >> FINALIZE_FIRST_SHIFT;
81         h *= FINALIZE_FIRST_MULTIPLICATOR;
82         h ^= h >> FINALIZE_SECOND_SHIFT;
83         h *= FINALIZE_SECOND_MULTIPLICATOR;
84         h ^= h >> FINALIZE_THIRD_SHIFT;
85         return h;
86     }
87 
MurmurHash3(const uint8_t * key,size_t len,uint32_t seed)88     static uint32_t MurmurHash3(const uint8_t *key, size_t len, uint32_t seed)
89     {
90         // We start hashing from the seed
91         uint32_t hash = seed;
92 
93         // Do the main part:
94         // Iterate for each 32bits
95         auto blocks = reinterpret_cast<uintptr_t>(key);
96         std::array<uint8_t, BLOCK_SIZE> memblock = {'\0', '\0', '\0', '\0'};
97         for (size_t i = len / BLOCK_SIZE; i != 0; i--) {
98             for (auto &j : memblock) {
99                 j = *reinterpret_cast<uint8_t *>(blocks);
100                 blocks += sizeof(uint8_t);
101             }
102             // Do this because we don't want to dispatch Big/Little endianness.
103             uint32_t k1 = *reinterpret_cast<uint32_t *>(memblock.data());
104 
105             k1 *= C1;
106             k1 = Rotl(k1, MAIN_FIRST_SHIFT);
107             k1 *= C2;
108 
109             hash ^= k1;
110             hash = Rotl(hash, MAIN_SECOND_SHIFT);
111             hash = hash * MAIN_MULTIPLICATOR + MAIN_CONSTANT;
112         }
113 
114         // Proceed the tail:
115         // blocks is a pointer to the end of 32bits section
116         auto tail = blocks;
117         uint32_t k1 = 0;
118         for (size_t i = len & 3U; i > 0; i--) {
119             // Get ((uint8_t*)tail)[i - 1]:
120             uintptr_t block_pointer = tail + sizeof(uint8_t) * (i - 1);
121             uint8_t block = *reinterpret_cast<uint8_t *>(block_pointer);
122             uint32_t temp = static_cast<uint32_t>(block) << (TAIL_SHIFT * (i - 1U));
123             k1 ^= temp;
124             if (i == 1) {
125                 k1 = Rotl(k1, TAIL_LAST_SHIFT);
126                 k1 *= C2;
127                 hash ^= k1;
128             }
129         }
130 
131         // Finalize the result
132         hash ^= len;
133         hash = Finalize(hash);
134 
135         return hash;
136     }
137 
MurmurHash3String(const uint8_t * mutf8_string,uint32_t seed)138     static uint32_t MurmurHash3String(const uint8_t *mutf8_string, uint32_t seed)
139     {
140         // We start hashing from the seed
141         uint32_t hash = seed;
142         // We should still compute length of the string, we will need it later
143         size_t mutf8_length = 0;
144         // Do the main part:
145         // Iterate for each 32bits
146         auto blocks = reinterpret_cast<uintptr_t>(mutf8_string);
147         std::array<uint8_t, BLOCK_SIZE> memblock = {'\0', '\0', '\0', '\0'};
148         size_t tail_len = 0;
149         while (true) {
150             tail_len = 0;
151             for (unsigned char &i : memblock) {
152                 i = *reinterpret_cast<uint8_t *>(blocks);
153                 blocks += sizeof(uint8_t);
154                 if (i == '\0') {
155                     break;
156                 }
157                 tail_len++;
158             }
159             if (tail_len != BLOCK_SIZE) {
160                 // We couldn't read four 8bytes value
161                 break;
162             }
163             // Do this because we don't want to dispatch Big/Little endianness.
164             uint32_t k1 = *reinterpret_cast<uint32_t *>(memblock.data());
165 
166             k1 *= C1;
167             k1 = Rotl(k1, MAIN_FIRST_SHIFT);
168             k1 *= C2;
169 
170             hash ^= k1;
171             hash = Rotl(hash, MAIN_SECOND_SHIFT);
172             hash = hash * MAIN_MULTIPLICATOR + MAIN_CONSTANT;
173             mutf8_length += BLOCK_SIZE;
174         }
175 
176         // Proceed the tail:
177 
178         mutf8_length += tail_len;
179         uint32_t k1 = 0;
180         for (size_t i = tail_len; i > 0; i--) {
181             uint8_t block = memblock[i - 1U];
182             uint32_t temp = static_cast<uint32_t>(block) << (TAIL_SHIFT * (i - 1U));
183             k1 ^= temp;
184             if (i == 1) {
185                 k1 = Rotl(k1, TAIL_LAST_SHIFT);
186                 k1 *= C2;
187                 hash ^= k1;
188             }
189         }
190 
191         // Finalize the result
192         hash ^= mutf8_length;
193         hash = Finalize(hash);
194 
195         return hash;
196     }
197 };
198 
199 }  // namespace panda
200 
201 #endif  // LIBPANDABASE_UTILS_MURMUR3_HASH_H_
202