1 /* 2 * Copyright (c) 2024-2025 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef COMPILER_OPTIMIZER_CODEGEN_FAST_DIVISOR_H 17 #define COMPILER_OPTIMIZER_CODEGEN_FAST_DIVISOR_H 18 19 #include <cstddef> 20 #include <cstdint> 21 22 #include "compiler/optimizer/code_generator/type_info.h" 23 #include "libpandabase/macros.h" 24 #include "utils/bit_utils.h" 25 #include "utils/math_helpers.h" 26 27 namespace ark::compiler { 28 /* 29 * These helper classes calculate special values (i.e. magic, shift) for fast division/modulo by (u)int32/(u)int64 30 * constant. Then user replaces heavy div/mod operations with much lighter mul/shift/add/sub. The algorithms are 31 * described in "Hacker's Delight" book by Henry S. Warren (Chapter 10: Integer Division By Constants). 32 */ 33 34 class FastConstSignedDivisor { 35 public: FastConstSignedDivisor(int64_t divisor,size_t bitWidth)36 FastConstSignedDivisor(int64_t divisor, size_t bitWidth) 37 { 38 static_assert(sizeof(uint64_t) == DOUBLE_WORD_SIZE_BYTES); 39 ASSERT(divisor <= -2L || divisor >= 2L); 40 ASSERT(bitWidth == WORD_SIZE || bitWidth == DOUBLE_WORD_SIZE); 41 // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult) 42 uint64_t highOne = static_cast<uint64_t>(1U) << (bitWidth - 1U); 43 auto ad = bit_cast<uint64_t>(helpers::math::AbsOrMin(divisor)); 44 uint64_t t = highOne + (bit_cast<uint64_t>(divisor) >> (DOUBLE_WORD_SIZE - 1U)); 45 uint64_t anc = t - 1U - t % ad; 46 auto p = static_cast<int64_t>(bitWidth - 1U); 47 uint64_t q1 = highOne / anc; 48 uint64_t r1 = highOne - q1 * anc; 49 uint64_t q2 = highOne / ad; 50 uint64_t r2 = highOne - q2 * ad; 51 uint64_t delta = 0U; 52 53 do { 54 ++p; 55 q1 *= 2U; 56 r1 *= 2U; 57 if (r1 >= anc) { 58 ++q1; 59 r1 -= anc; 60 } 61 q2 *= 2U; 62 r2 *= 2U; 63 if (r2 >= ad) { 64 ++q2; 65 r2 -= ad; 66 } 67 delta = ad - r2; 68 } while (q1 < delta || (q1 == delta && r1 == 0)); 69 70 magic_ = static_cast<int64_t>(q2) + 1U; 71 if (divisor < 0) { 72 magic_ = -magic_; 73 } 74 if (bitWidth == WORD_SIZE) { 75 magic_ = static_cast<int64_t>(down_cast<int32_t>(magic_)); 76 } 77 shift_ = p - static_cast<int64_t>(bitWidth); 78 } 79 GetMagic()80 int64_t GetMagic() const 81 { 82 return magic_; 83 } 84 GetShift()85 int64_t GetShift() const 86 { 87 return shift_; 88 } 89 90 private: 91 int64_t magic_ {0L}; 92 int64_t shift_ {0L}; 93 }; 94 95 class FastConstUnsignedDivisor { 96 public: FastConstUnsignedDivisor(uint64_t divisor,size_t bitWidth)97 FastConstUnsignedDivisor(uint64_t divisor, size_t bitWidth) 98 { 99 static_assert(sizeof(uint64_t) == DOUBLE_WORD_SIZE_BYTES); 100 ASSERT(divisor != 0U); 101 ASSERT(bitWidth >= 1U); 102 ASSERT(bitWidth <= DOUBLE_WORD_SIZE); 103 uint64_t sizeMinusOne = static_cast<uint64_t>(bitWidth) - 1U; 104 // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult) 105 uint64_t highOne = static_cast<uint64_t>(1U) << sizeMinusOne; 106 uint64_t nc = ((highOne - 1U) | highOne) - (-divisor % divisor); 107 auto p = static_cast<int64_t>(sizeMinusOne); 108 uint64_t q1 = highOne / nc; 109 uint64_t r1 = highOne - q1 * nc; 110 uint64_t q2 = (highOne - 1U) / divisor; 111 uint64_t r2 = (highOne - 1U) - q2 * divisor; 112 113 uint64_t delta = 0U; 114 do { 115 ++p; 116 if (r1 >= nc - r1) { 117 q1 = 2U * q1 + 1U; 118 r1 = 2U * r1 - nc; 119 } else { 120 q1 *= 2U; 121 r1 *= 2U; 122 } 123 124 if (r2 + 1U >= divisor - r2) { 125 if (q2 >= (highOne - 1U)) { 126 add_ = true; 127 } 128 q2 = 2U * q2 + 1U; 129 r2 = 2U * r2 + 1U - divisor; 130 } else { 131 if (q2 >= highOne) { 132 add_ = true; 133 } 134 q2 *= 2U; 135 r2 = 2U * r2 + 1; 136 } 137 138 delta = divisor - 1U - r2; 139 } while ((p < 2L * static_cast<int64_t>(bitWidth)) && (q1 < delta || (q1 == delta && r1 == 0U))); 140 141 magic_ = q2 + 1U; 142 if (bitWidth == WORD_SIZE) { 143 magic_ &= std::numeric_limits<uint32_t>::max(); 144 } 145 shift_ = static_cast<uint64_t>(p - static_cast<int64_t>(bitWidth)); 146 } 147 GetMagic()148 uint64_t GetMagic() const 149 { 150 return magic_; 151 } 152 GetShift()153 uint64_t GetShift() const 154 { 155 return shift_; 156 } 157 GetAdd()158 bool GetAdd() const 159 { 160 return add_; 161 } 162 163 private: 164 uint64_t magic_ {0U}; 165 uint64_t shift_ {0U}; 166 bool add_ {false}; 167 }; 168 } // namespace ark::compiler 169 170 #endif // COMPILER_OPTIMIZER_CODEGEN_FAST_DIVISOR_H 171