1 /* 2 * Copyright (c) 2024 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef COMPILER_OPTIMIZER_CODEGEN_FAST_DIVISOR_H 17 #define COMPILER_OPTIMIZER_CODEGEN_FAST_DIVISOR_H 18 19 #include <cstddef> 20 #include <cstdint> 21 22 #include "compiler/optimizer/code_generator/type_info.h" 23 #include "libpandabase/macros.h" 24 #include "utils/bit_utils.h" 25 #include "utils/math_helpers.h" 26 27 namespace ark::compiler { 28 /* 29 * These helper classes calculate special values (i.e. magic, shift) for fast division/modulo by (u)int32/(u)int64 30 * constant. Then user replaces heavy div/mod operations with much lighter mul/shift/add/sub. The algorithms are 31 * described in "Hacker's Delight" book by Henry S. Warren (Chapter 10: Integer Division By Constants). 32 */ 33 34 class FastConstSignedDivisor { 35 public: FastConstSignedDivisor(int64_t divisor,size_t bitWidth)36 FastConstSignedDivisor(int64_t divisor, size_t bitWidth) 37 { 38 static_assert(sizeof(uint64_t) == DOUBLE_WORD_SIZE_BYTES); 39 ASSERT(divisor <= -2L || divisor >= 2L); 40 ASSERT(bitWidth == WORD_SIZE || bitWidth == DOUBLE_WORD_SIZE); 41 // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult) 42 uint64_t highOne = static_cast<uint64_t>(1U) << (bitWidth - 1U); 43 auto ad = bit_cast<uint64_t>(helpers::math::AbsOrMin(divisor)); 44 uint64_t t = highOne + (bit_cast<uint64_t>(divisor) >> (DOUBLE_WORD_SIZE - 1U)); 45 uint64_t anc = t - 1U - t % ad; 46 auto p = static_cast<int64_t>(bitWidth - 1U); 47 uint64_t q1 = highOne / anc; 48 uint64_t r1 = highOne - q1 * anc; 49 uint64_t q2 = highOne / ad; 50 uint64_t r2 = highOne - q2 * ad; 51 uint64_t delta = 0U; 52 53 do { 54 ++p; 55 q1 *= 2U; 56 r1 *= 2U; 57 if (r1 >= anc) { 58 ++q1; 59 r1 -= anc; 60 } 61 q2 *= 2U; 62 r2 *= 2U; 63 if (r2 >= ad) { 64 ++q2; 65 r2 -= ad; 66 } 67 delta = ad - r2; 68 } while (q1 < delta || (q1 == delta && r1 == 0)); 69 70 magic_ = q2 + 1U; 71 if (divisor < 0) { 72 magic_ = -magic_; 73 } 74 if (bitWidth == WORD_SIZE) { 75 magic_ = static_cast<int64_t>(down_cast<int32_t>(magic_)); 76 } 77 shift_ = p - static_cast<int64_t>(bitWidth); 78 } 79 GetMagic()80 int64_t GetMagic() const 81 { 82 return magic_; 83 } 84 GetShift()85 int64_t GetShift() const 86 { 87 return shift_; 88 } 89 90 private: 91 int64_t magic_ {0L}; 92 int64_t shift_ {0L}; 93 }; 94 95 class FastConstUnsignedDivisor { 96 public: FastConstUnsignedDivisor(uint64_t divisor,size_t bitWidth)97 FastConstUnsignedDivisor(uint64_t divisor, size_t bitWidth) 98 { 99 static_assert(sizeof(uint64_t) == DOUBLE_WORD_SIZE_BYTES); 100 ASSERT(divisor != 0U); 101 uint64_t sizeMinusOne = static_cast<uint64_t>(bitWidth) - 1U; 102 // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult) 103 uint64_t highOne = static_cast<uint64_t>(1U) << sizeMinusOne; 104 uint64_t nc = ((highOne - 1U) | highOne) - (-divisor % divisor); 105 auto p = static_cast<int64_t>(sizeMinusOne); 106 uint64_t q1 = highOne / nc; 107 uint64_t r1 = highOne - q1 * nc; 108 uint64_t q2 = (highOne - 1U) / divisor; 109 uint64_t r2 = (highOne - 1U) - q2 * divisor; 110 111 uint64_t delta = 0U; 112 do { 113 ++p; 114 if (r1 >= nc - r1) { 115 q1 = 2U * q1 + 1U; 116 r1 = 2U * r1 - nc; 117 } else { 118 q1 *= 2U; 119 r1 *= 2U; 120 } 121 122 if (r2 + 1U >= divisor - r2) { 123 if (q2 >= (highOne - 1U)) { 124 add_ = true; 125 } 126 q2 = 2U * q2 + 1U; 127 r2 = 2U * r2 + 1U - divisor; 128 } else { 129 if (q2 >= highOne) { 130 add_ = true; 131 } 132 q2 *= 2U; 133 r2 = 2U * r2 + 1; 134 } 135 136 delta = divisor - 1U - r2; 137 } while ((p < 2L * static_cast<int64_t>(bitWidth)) && (q1 < delta || (q1 == delta && r1 == 0U))); 138 139 magic_ = q2 + 1U; 140 if (bitWidth == WORD_SIZE) { 141 magic_ &= std::numeric_limits<uint32_t>::max(); 142 } 143 shift_ = static_cast<uint64_t>(p - static_cast<int64_t>(bitWidth)); 144 } 145 GetMagic()146 uint64_t GetMagic() const 147 { 148 return magic_; 149 } 150 GetShift()151 uint64_t GetShift() const 152 { 153 return shift_; 154 } 155 GetAdd()156 bool GetAdd() const 157 { 158 return add_; 159 } 160 161 private: 162 uint64_t magic_ {0U}; 163 uint64_t shift_ {0U}; 164 bool add_ {false}; 165 }; 166 } // namespace ark::compiler 167 168 #endif // COMPILER_OPTIMIZER_CODEGEN_FAST_DIVISOR_H 169