• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 // This is the murmur3 32 bit hash implementation
17 // The main structure and constants were taken from Austin Appleby.
18 // From his gitlab (https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp):
19 // - MurmurHash3 was written by Austin Appleby, and is placed in the public
20 // - domain. The author hereby disclaims copyright to this source code.
21 
22 #ifndef PANDA_LIBPANDABASE_UTILS_MURMUR3_HASH_H_
23 #define PANDA_LIBPANDABASE_UTILS_MURMUR3_HASH_H_
24 
25 #include "hash_base.h"
26 #include "logger.h"
27 
28 namespace panda {
29 
30 // In general murmur hash looks like that:
31 // key = |....|....|....|.|.|.|
32 //         32   32   32  8 8 8
33 // Firstly, we proceed each 32 bits block from key;
34 // Secondly, we proceed last 8 bits block which were not covered in previous step.
35 template <uint32_t seed_value>
36 class MurmurHash32 final : public HashBase<MurmurHash32<seed_value>> {
37 public:
GetHash32WithSeedImpl(const uint8_t * key,size_t len,uint32_t seed)38     static uint32_t GetHash32WithSeedImpl(const uint8_t *key, size_t len, uint32_t seed)
39     {
40         return MurmurHash3(key, len, seed);
41     }
GetHash32Impl(const uint8_t * key,size_t len)42     static uint32_t GetHash32Impl(const uint8_t *key, size_t len)
43     {
44         return GetHash32WithSeedImpl(key, len, seed_value);
45     }
GetHash32StringImpl(const uint8_t * mutf8_string)46     static uint32_t GetHash32StringImpl(const uint8_t *mutf8_string)
47     {
48         return MurmurHash3String(mutf8_string, seed_value);
49     }
GetHash32StringWithSeedImpl(const uint8_t * mutf8_string,uint32_t seed)50     static uint32_t GetHash32StringWithSeedImpl(const uint8_t *mutf8_string, uint32_t seed)
51     {
52         return MurmurHash3String(mutf8_string, seed);
53     }
54 
55 private:
56     // Here are the main constants for hash counting:
57     // Many of them look like some kinds of magic
58     static constexpr uint32_t C1 = 0xCC9E2D51U;
59     static constexpr uint32_t C2 = 0x1B873593U;
60     static constexpr uint32_t MAX_BITS = 32;
61     static constexpr uint32_t FINALIZE_FIRST_SHIFT = 16;
62     static constexpr uint32_t FINALIZE_SECOND_SHIFT = 13;
63     static constexpr uint32_t FINALIZE_THIRD_SHIFT = 16;
64     static constexpr uint32_t FINALIZE_FIRST_MULTIPLICATOR = 0x85EBCA6BU;
65     static constexpr uint32_t FINALIZE_SECOND_MULTIPLICATOR = 0xC2BAE35U;
66     static constexpr uint32_t MAIN_FIRST_SHIFT = 15;
67     static constexpr uint32_t MAIN_SECOND_SHIFT = 13;
68     static constexpr uint32_t MAIN_CONSTANT = 0xE6546B64U;
69     static constexpr uint32_t MAIN_MULTIPLICATOR = 5;
70     static constexpr uint32_t TAIL_SHIFT = 8;
71     static constexpr uint32_t TAIL_LAST_SHIFT = 15;
72     static constexpr uint32_t BLOCK_SIZE = 4;
73 
Rotl(uint32_t word,uint8_t shift)74     static uint32_t Rotl(uint32_t word, uint8_t shift)
75     {
76         return (word << shift) | (word >> (MAX_BITS - shift));
77     }
78 
79     // Finalize the result of the hash function
Finalize(uint32_t h)80     static uint32_t Finalize(uint32_t h)
81     {
82         h ^= h >> FINALIZE_FIRST_SHIFT;
83         h *= FINALIZE_FIRST_MULTIPLICATOR;
84         h ^= h >> FINALIZE_SECOND_SHIFT;
85         h *= FINALIZE_SECOND_MULTIPLICATOR;
86         h ^= h >> FINALIZE_THIRD_SHIFT;
87         return h;
88     }
89 
MurmurHash3(const uint8_t * key,size_t len,uint32_t seed)90     static uint32_t MurmurHash3(const uint8_t *key, size_t len, uint32_t seed)
91     {
92         // We start hashing from the seed
93         uint32_t hash = seed;
94 
95         // Do the main part:
96         // Iterate for each 32bits
97         auto blocks = reinterpret_cast<uintptr_t>(key);
98         std::array<uint8_t, BLOCK_SIZE> memblock = {'\0', '\0', '\0', '\0'};
99         for (size_t i = len / BLOCK_SIZE; i != 0; i--) {
100             for (auto &j : memblock) {
101                 j = *reinterpret_cast<uint8_t *>(blocks);
102                 blocks += sizeof(uint8_t);
103             }
104             // Do this because we don't want to dispatch Big/Little endianness.
105             uint32_t k1 = *reinterpret_cast<uint32_t *>(memblock.data());
106 
107             k1 *= C1;
108             k1 = Rotl(k1, MAIN_FIRST_SHIFT);
109             k1 *= C2;
110 
111             hash ^= k1;
112             hash = Rotl(hash, MAIN_SECOND_SHIFT);
113             hash = hash * MAIN_MULTIPLICATOR + MAIN_CONSTANT;
114         }
115 
116         // Proceed the tail:
117         // blocks is a pointer to the end of 32bits section
118         auto tail = blocks;
119         uint32_t k1 = 0;
120         for (size_t i = len & 3U; i > 0; i--) {
121             // Get ((uint8_t*)tail)[i - 1]:
122             uintptr_t block_pointer = tail + sizeof(uint8_t) * (i - 1);
123             uint8_t block = *reinterpret_cast<uint8_t *>(block_pointer);
124             uint32_t temp = static_cast<uint32_t>(block) << (TAIL_SHIFT * (i - 1U));
125             k1 ^= temp;
126             if (i == 1) {
127                 k1 = Rotl(k1, TAIL_LAST_SHIFT);
128                 k1 *= C2;
129                 hash ^= k1;
130             }
131         }
132 
133         // Finalize the result
134         hash ^= len;
135         hash = Finalize(hash);
136 
137         return hash;
138     }
139 
MurmurHash3String(const uint8_t * mutf8_string,uint32_t seed)140     static uint32_t MurmurHash3String(const uint8_t *mutf8_string, uint32_t seed)
141     {
142         // We start hashing from the seed
143         uint32_t hash = seed;
144         // We should still compute length of the string, because we will need it later
145         size_t mutf8_length = 0;
146         // Do the main part:
147         // Iterate for each 32bits
148         auto blocks = reinterpret_cast<uintptr_t>(mutf8_string);
149         std::array<uint8_t, BLOCK_SIZE> memblock = {'\0', '\0', '\0', '\0'};
150         size_t tail_len = 0;
151         while (true) {
152             tail_len = 0;
153             for (unsigned char &i : memblock) {
154                 i = *reinterpret_cast<uint8_t *>(blocks);
155                 blocks += sizeof(uint8_t);
156                 if (i == '\0') {
157                     break;
158                 }
159                 tail_len++;
160             }
161             if (tail_len != BLOCK_SIZE) {
162                 // We couldn't read four bytes value
163                 break;
164             }
165             // Do this because we don't want to dispatch Big/Little endianness.
166             uint32_t k1 = *reinterpret_cast<uint32_t *>(memblock.data());
167 
168             k1 *= C1;
169             k1 = Rotl(k1, MAIN_FIRST_SHIFT);
170             k1 *= C2;
171 
172             hash ^= k1;
173             hash = Rotl(hash, MAIN_SECOND_SHIFT);
174             hash = hash * MAIN_MULTIPLICATOR + MAIN_CONSTANT;
175             mutf8_length += BLOCK_SIZE;
176         }
177 
178         // Proceed the tail
179         mutf8_length += tail_len;
180         uint32_t k1 = 0;
181         for (size_t i = tail_len; i > 0; i--) {
182             uint8_t block = memblock[i - 1U];
183             uint32_t temp = static_cast<uint32_t>(block) << (TAIL_SHIFT * (i - 1U));
184             k1 ^= temp;
185             if (i == 1) {
186                 k1 = Rotl(k1, TAIL_LAST_SHIFT);
187                 k1 *= C2;
188                 hash ^= k1;
189             }
190         }
191 
192         // Finalize the result
193         hash ^= mutf8_length;
194         hash = Finalize(hash);
195 
196         return hash;
197     }
198 };
199 
200 }  // namespace panda
201 
202 #endif  // PANDA_LIBPANDABASE_UTILS_MURMUR3_HASH_H_
203