1 /* 2 * Copyright (c) 2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef COMMON_COMPONENTS_PLATFORM_STRING_HASH_ARM64_H 17 #define COMMON_COMPONENTS_PLATFORM_STRING_HASH_ARM64_H 18 19 #include <cstdint> 20 21 #include <arm_neon.h> 22 23 #include "common_components/base/config.h" 24 #include "common_components/platform/string_hash.h" 25 26 namespace common { 27 class StringHashInternal { 28 friend class StringHashHelper; 29 private: 30 template <typename T> ComputeHashForDataOfLongString(const T * data,size_t size,uint32_t hashSeed)31 static uint32_t ComputeHashForDataOfLongString(const T *data, size_t size, 32 uint32_t hashSeed) 33 { 34 constexpr uint32_t blockSize = StringHash::BLOCK_SIZE; 35 constexpr uint32_t scale = StringHash::BLOCK_MULTIPLY; 36 uint32_t hash[blockSize] = {}; 37 uint32_t index = 0; 38 uint32_t remainder = size & (blockSize - 1); 39 switch (remainder) { 40 #define CASE(N) case (N): \ 41 hash[blockSize - (N)] = data[index++] * StringHash::MULTIPLIER[blockSize - (N)]; [[fallthrough]] 42 CASE(StringHash::SIZE_3); 43 CASE(StringHash::SIZE_2); 44 CASE(StringHash::SIZE_1); 45 #undef CASE 46 default: 47 break; 48 } 49 hash[0] += hashSeed * StringHash::MULTIPLIER[blockSize - 1 - remainder]; 50 51 uint32_t dataMul[blockSize] = {}; 52 for (; index < size; index += blockSize) { 53 for (size_t i = 0; i < blockSize; i++) { 54 dataMul[i] = data[index + i] * StringHash::MULTIPLIER[i]; 55 hash[i] = hash[i] * scale + dataMul[i]; 56 } 57 } 58 uint32_t hashTotal = 0; 59 for (size_t i = 0; i < blockSize; i++) { 60 hashTotal += hash[i]; 61 } 62 return hashTotal; 63 } 64 65 template <> 66 uint32_t ComputeHashForDataOfLongString<uint8_t>(const uint8_t *data, 67 size_t size, uint32_t hashSeed) 68 { 69 const uint32x4_t multiplierVec = vld1q_u32(StringHash::MULTIPLIER); 70 constexpr uint32_t multiplierHash = StringHash::MULTIPLIER[0] * StringHash::MULTIPLIER[2]; 71 72 uint32_t hash = hashSeed; 73 const uint8_t *dataEnd = data + size; 74 const uint8_t *vecEnd = data + (size & (~15)); 75 const uint8_t *p = data; 76 constexpr size_t UINT8_LOOP_SIZE = 16; // neon 128bit / uint8_t 8bit = 16 77 for (; p < vecEnd; p += UINT8_LOOP_SIZE) { 78 uint8x16_t dataVec8 = vld1q_u8(p); 79 uint16x8_t dataVec16_1 = vmovl_u8(vget_low_u16(dataVec8)); 80 uint16x8_t dataVec16_2 = vmovl_u8(vget_high_u16(dataVec8)); 81 uint32x4_t dataVec32_1 = vmovl_u16(vget_low_u16(dataVec16_1)); 82 uint32x4_t dataVec32_3 = vmovl_u16(vget_low_u16(dataVec16_2)); 83 uint32x4_t dataVec32_2 = vmovl_u16(vget_high_u16(dataVec16_1)); 84 uint32x4_t dataVec32_4 = vmovl_u16(vget_high_u16(dataVec16_2)); 85 86 dataVec32_1 = vmulq_u32(dataVec32_1, multiplierVec); 87 hash = hash * multiplierHash + vaddvq_u32(dataVec32_1); 88 89 dataVec32_2 = vmulq_u32(dataVec32_2, multiplierVec); 90 hash = hash * multiplierHash + vaddvq_u32(dataVec32_2); 91 92 dataVec32_3 = vmulq_u32(dataVec32_3, multiplierVec); 93 hash = hash * multiplierHash + vaddvq_u32(dataVec32_3); 94 95 dataVec32_4 = vmulq_u32(dataVec32_4, multiplierVec); 96 hash = hash * multiplierHash + vaddvq_u32(dataVec32_4); 97 } 98 99 for (; p < dataEnd; p++) { 100 hash = (hash << static_cast<uint32_t>(StringHash::HASH_SHIFT)) - hash + *p; 101 } 102 return hash; 103 } 104 105 template <> 106 uint32_t ComputeHashForDataOfLongString<uint16_t>(const uint16_t *data, 107 size_t size, uint32_t hashSeed) 108 { 109 const uint32x4_t multiplierVec = vld1q_u32(StringHash::MULTIPLIER); 110 constexpr uint32_t multiplierHash = StringHash::MULTIPLIER[0] * StringHash::MULTIPLIER[2]; 111 112 uint32_t hash = hashSeed; 113 const uint16_t *dataEnd = data + size; 114 const uint16_t *vecEnd = data + (size & (~7)); 115 const uint16_t *p = data; 116 constexpr size_t UINT16_LOOP_SIZE = 8; // neon 128bit / uint16_t 16bit = 8 117 for (; p < vecEnd; p += UINT16_LOOP_SIZE) { 118 uint16x8_t dataVec16 = vld1q_u16(p); 119 uint32x4_t dataVec32_1 = vmovl_u16(vget_low_u16(dataVec16)); 120 dataVec32_1 = vmulq_u32(dataVec32_1, multiplierVec); 121 hash = hash * multiplierHash + vaddvq_u32(dataVec32_1); 122 123 uint32x4_t dataVec32_2 = vmovl_u16(vget_high_u16(dataVec16)); 124 dataVec32_2 = vmulq_u32(dataVec32_2, multiplierVec); 125 hash = hash * multiplierHash + vaddvq_u32(dataVec32_2); 126 } 127 128 for (; p < dataEnd; p++) { 129 hash = (hash << static_cast<uint32_t>(StringHash::HASH_SHIFT)) - hash + *p; 130 } 131 return hash; 132 } 133 }; 134 } // namespace common 135 #endif // COMMON_COMPONENTS_PLATFORM_STRING_HASH_ARM64_H