• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 // This is the murmur3 32 bit hash implementation
16 // The main structure and constants were taken from Austin Appleby.
17 // From his gitlab (https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp):
18 // - MurmurHash3 was written by Austin Appleby, and is placed in the public
19 // - domain. The author hereby disclaims copyright to this source code.
20 
21 #ifndef LIBPANDABASE_UTILS_MURMUR3_HASH_H
22 #define LIBPANDABASE_UTILS_MURMUR3_HASH_H
23 
24 #include "hash_base.h"
25 #include "logger.h"
26 
27 namespace panda {
28 
29 // In general murmur hash looks like that:
30 // key = |....|....|....|.|.|.|
31 //         32   32   32  8 8 8
32 // Firstly, we proceed each 32 bits block from key;
33 // Secondly, we proceed last 8 bits block which were not covered in previous step.
34 template <uint32_t seed_value>
35 class MurmurHash32 final : public HashBase<MurmurHash32<seed_value>> {
36 public:
GetHash32WithSeedImpl(const uint8_t * key,size_t len,uint32_t seed)37     static uint32_t GetHash32WithSeedImpl(const uint8_t *key, size_t len, uint32_t seed)
38     {
39         return MurmurHash3(key, len, seed);
40     }
GetHash32Impl(const uint8_t * key,size_t len)41     static uint32_t GetHash32Impl(const uint8_t *key, size_t len)
42     {
43         return GetHash32WithSeedImpl(key, len, seed_value);
44     }
GetHash32StringImpl(const uint8_t * mutf8_string)45     static uint32_t GetHash32StringImpl(const uint8_t *mutf8_string)
46     {
47         return MurmurHash3String(mutf8_string, seed_value);
48     }
GetHash32StringWithSeedImpl(const uint8_t * mutf8_string,uint32_t seed)49     static uint32_t GetHash32StringWithSeedImpl(const uint8_t *mutf8_string, uint32_t seed)
50     {
51         return MurmurHash3String(mutf8_string, seed);
52     }
53 
54 private:
55     // Here are the main constants for hash counting:
56     // Many of them looks like some kind of magic
57     static constexpr uint32_t C1 = 0xCC9E2D51U;
58     static constexpr uint32_t C2 = 0x1B873593U;
59     static constexpr uint32_t MAX_BITS = 32;
60     static constexpr uint32_t FINALIZE_FIRST_SHIFT = 16;
61     static constexpr uint32_t FINALIZE_SECOND_SHIFT = 13;
62     static constexpr uint32_t FINALIZE_THIRD_SHIFT = 16;
63     static constexpr uint32_t FINALIZE_FIRST_MULTIPLICATOR = 0x85EBCA6BU;
64     static constexpr uint32_t FINALIZE_SECOND_MULTIPLICATOR = 0xC2BAE35U;
65     static constexpr uint32_t MAIN_FIRST_SHIFT = 15;
66     static constexpr uint32_t MAIN_SECOND_SHIFT = 13;
67     static constexpr uint32_t MAIN_CONSTANT = 0xE6546B64U;
68     static constexpr uint32_t MAIN_MULTIPLICATOR = 5;
69     static constexpr uint32_t TAIL_SHIFT = 8;
70     static constexpr uint32_t TAIL_LAST_SHIFT = 15;
71     static constexpr uint32_t BLOCK_SIZE = 4;
72 
Rotl(uint32_t word,uint8_t shift)73     static uint32_t Rotl(uint32_t word, uint8_t shift)
74     {
75         return (word << shift) | (word >> (MAX_BITS - shift));
76     }
77 
78     // Finilize the result of the hash function
Finalize(uint32_t h)79     static uint32_t Finalize(uint32_t h)
80     {
81         h ^= h >> FINALIZE_FIRST_SHIFT;
82         h *= FINALIZE_FIRST_MULTIPLICATOR;
83         h ^= h >> FINALIZE_SECOND_SHIFT;
84         h *= FINALIZE_SECOND_MULTIPLICATOR;
85         h ^= h >> FINALIZE_THIRD_SHIFT;
86         return h;
87     }
88 
MurmurHash3(const uint8_t * key,size_t len,uint32_t seed)89     static uint32_t MurmurHash3(const uint8_t *key, size_t len, uint32_t seed)
90     {
91         // We start hashing from the seed
92         uint32_t hash = seed;
93 
94         // Do the main part:
95         // Iterate for each 32bits
96         auto blocks = reinterpret_cast<uintptr_t>(key);
97         std::array<uint8_t, BLOCK_SIZE> memblock = {'\0', '\0', '\0', '\0'};
98         for (size_t i = len / BLOCK_SIZE; i != 0; i--) {
99             for (auto &j : memblock) {
100                 j = *reinterpret_cast<uint8_t *>(blocks);
101                 blocks += sizeof(uint8_t);
102             }
103             // Do this because we don't want to dispatch Big/Little endianness.
104             uint32_t k1 = *reinterpret_cast<uint32_t *>(memblock.data());
105 
106             k1 *= C1;
107             k1 = Rotl(k1, MAIN_FIRST_SHIFT);
108             k1 *= C2;
109 
110             hash ^= k1;
111             hash = Rotl(hash, MAIN_SECOND_SHIFT);
112             hash = hash * MAIN_MULTIPLICATOR + MAIN_CONSTANT;
113         }
114 
115         // Proceed the tail:
116         // blocks is a pointer to the end of 32bits section
117         auto tail = blocks;
118         uint32_t k1 = 0;
119         for (size_t i = len & 3U; i > 0; i--) {
120             // Get ((uint8_t*)tail)[i - 1]:
121             uintptr_t block_pointer = tail + sizeof(uint8_t) * (i - 1);
122             uint8_t block = *reinterpret_cast<uint8_t *>(block_pointer);
123             uint32_t temp = static_cast<uint32_t>(block) << (TAIL_SHIFT * (i - 1U));
124             k1 ^= temp;
125             if (i == 1) {
126                 k1 = Rotl(k1, TAIL_LAST_SHIFT);
127                 k1 *= C2;
128                 hash ^= k1;
129             }
130         }
131 
132         // Finalize the result
133         hash ^= len;
134         hash = Finalize(hash);
135 
136         return hash;
137     }
138 
MurmurHash3String(const uint8_t * mutf8_string,uint32_t seed)139     static uint32_t MurmurHash3String(const uint8_t *mutf8_string, uint32_t seed)
140     {
141         // We start hashing from the seed
142         uint32_t hash = seed;
143         // We should still compute length of the string, we will need it later
144         size_t mutf8_length = 0;
145         // Do the main part:
146         // Iterate for each 32bits
147         auto blocks = reinterpret_cast<uintptr_t>(mutf8_string);
148         std::array<uint8_t, BLOCK_SIZE> memblock = {'\0', '\0', '\0', '\0'};
149         size_t tail_len = 0;
150         while (true) {
151             tail_len = 0;
152             for (unsigned char &i : memblock) {
153                 i = *reinterpret_cast<uint8_t *>(blocks);
154                 blocks += sizeof(uint8_t);
155                 if (i == '\0') {
156                     break;
157                 }
158                 tail_len++;
159             }
160             if (tail_len != BLOCK_SIZE) {
161                 // We couldn't read four 8bytes value
162                 break;
163             }
164             // Do this because we don't want to dispatch Big/Little endianness.
165             uint32_t k1 = *reinterpret_cast<uint32_t *>(memblock.data());
166 
167             k1 *= C1;
168             k1 = Rotl(k1, MAIN_FIRST_SHIFT);
169             k1 *= C2;
170 
171             hash ^= k1;
172             hash = Rotl(hash, MAIN_SECOND_SHIFT);
173             hash = hash * MAIN_MULTIPLICATOR + MAIN_CONSTANT;
174             mutf8_length += BLOCK_SIZE;
175         }
176 
177         // Proceed the tail:
178 
179         mutf8_length += tail_len;
180         uint32_t k1 = 0;
181         for (size_t i = tail_len; i > 0; i--) {
182             uint8_t block = memblock[i - 1U];
183             uint32_t temp = static_cast<uint32_t>(block) << (TAIL_SHIFT * (i - 1U));
184             k1 ^= temp;
185             if (i == 1) {
186                 k1 = Rotl(k1, TAIL_LAST_SHIFT);
187                 k1 *= C2;
188                 hash ^= k1;
189             }
190         }
191 
192         // Finalize the result
193         hash ^= mutf8_length;
194         hash = Finalize(hash);
195 
196         return hash;
197     }
198 };
199 
200 }  // namespace panda
201 
202 #endif  // LIBPANDABASE_UTILS_MURMUR3_HASH_H
203