• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024-2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef COMPILER_OPTIMIZER_CODEGEN_FAST_DIVISOR_H
17 #define COMPILER_OPTIMIZER_CODEGEN_FAST_DIVISOR_H
18 
19 #include <cstddef>
20 #include <cstdint>
21 
22 #include "compiler/optimizer/code_generator/type_info.h"
23 #include "libpandabase/macros.h"
24 #include "utils/bit_utils.h"
25 #include "utils/math_helpers.h"
26 
27 namespace ark::compiler {
28 /*
29  * These helper classes calculate special values (i.e. magic, shift) for fast division/modulo by (u)int32/(u)int64
30  * constant. Then user replaces heavy div/mod operations with much lighter mul/shift/add/sub. The algorithms are
31  * described in "Hacker's Delight" book by Henry S. Warren (Chapter 10: Integer Division By Constants).
32  */
33 
34 class FastConstSignedDivisor {
35 public:
FastConstSignedDivisor(int64_t divisor,size_t bitWidth)36     FastConstSignedDivisor(int64_t divisor, size_t bitWidth)
37     {
38         static_assert(sizeof(uint64_t) == DOUBLE_WORD_SIZE_BYTES);
39         ASSERT(divisor <= -2L || divisor >= 2L);
40         ASSERT(bitWidth == WORD_SIZE || bitWidth == DOUBLE_WORD_SIZE);
41         // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult)
42         uint64_t highOne = static_cast<uint64_t>(1U) << (bitWidth - 1U);
43         auto ad = bit_cast<uint64_t>(helpers::math::AbsOrMin(divisor));
44         uint64_t t = highOne + (bit_cast<uint64_t>(divisor) >> (DOUBLE_WORD_SIZE - 1U));
45         uint64_t anc = t - 1U - t % ad;
46         auto p = static_cast<int64_t>(bitWidth - 1U);
47         uint64_t q1 = highOne / anc;
48         uint64_t r1 = highOne - q1 * anc;
49         uint64_t q2 = highOne / ad;
50         uint64_t r2 = highOne - q2 * ad;
51         uint64_t delta = 0U;
52 
53         do {
54             ++p;
55             q1 *= 2U;
56             r1 *= 2U;
57             if (r1 >= anc) {
58                 ++q1;
59                 r1 -= anc;
60             }
61             q2 *= 2U;
62             r2 *= 2U;
63             if (r2 >= ad) {
64                 ++q2;
65                 r2 -= ad;
66             }
67             delta = ad - r2;
68         } while (q1 < delta || (q1 == delta && r1 == 0));
69 
70         magic_ = static_cast<int64_t>(q2) + 1U;
71         if (divisor < 0) {
72             magic_ = -magic_;
73         }
74         if (bitWidth == WORD_SIZE) {
75             magic_ = static_cast<int64_t>(down_cast<int32_t>(magic_));
76         }
77         shift_ = p - static_cast<int64_t>(bitWidth);
78     }
79 
GetMagic()80     int64_t GetMagic() const
81     {
82         return magic_;
83     }
84 
GetShift()85     int64_t GetShift() const
86     {
87         return shift_;
88     }
89 
90 private:
91     int64_t magic_ {0L};
92     int64_t shift_ {0L};
93 };
94 
95 class FastConstUnsignedDivisor {
96 public:
FastConstUnsignedDivisor(uint64_t divisor,size_t bitWidth)97     FastConstUnsignedDivisor(uint64_t divisor, size_t bitWidth)
98     {
99         static_assert(sizeof(uint64_t) == DOUBLE_WORD_SIZE_BYTES);
100         ASSERT(divisor != 0U);
101         ASSERT(bitWidth >= 1U);
102         ASSERT(bitWidth <= DOUBLE_WORD_SIZE);
103         uint64_t sizeMinusOne = static_cast<uint64_t>(bitWidth) - 1U;
104         // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult)
105         uint64_t highOne = static_cast<uint64_t>(1U) << sizeMinusOne;
106         uint64_t nc = ((highOne - 1U) | highOne) - (-divisor % divisor);
107         auto p = static_cast<int64_t>(sizeMinusOne);
108         uint64_t q1 = highOne / nc;
109         uint64_t r1 = highOne - q1 * nc;
110         uint64_t q2 = (highOne - 1U) / divisor;
111         uint64_t r2 = (highOne - 1U) - q2 * divisor;
112 
113         uint64_t delta = 0U;
114         do {
115             ++p;
116             if (r1 >= nc - r1) {
117                 q1 = 2U * q1 + 1U;
118                 r1 = 2U * r1 - nc;
119             } else {
120                 q1 *= 2U;
121                 r1 *= 2U;
122             }
123 
124             if (r2 + 1U >= divisor - r2) {
125                 if (q2 >= (highOne - 1U)) {
126                     add_ = true;
127                 }
128                 q2 = 2U * q2 + 1U;
129                 r2 = 2U * r2 + 1U - divisor;
130             } else {
131                 if (q2 >= highOne) {
132                     add_ = true;
133                 }
134                 q2 *= 2U;
135                 r2 = 2U * r2 + 1;
136             }
137 
138             delta = divisor - 1U - r2;
139         } while ((p < 2L * static_cast<int64_t>(bitWidth)) && (q1 < delta || (q1 == delta && r1 == 0U)));
140 
141         magic_ = q2 + 1U;
142         if (bitWidth == WORD_SIZE) {
143             magic_ &= std::numeric_limits<uint32_t>::max();
144         }
145         shift_ = static_cast<uint64_t>(p - static_cast<int64_t>(bitWidth));
146     }
147 
GetMagic()148     uint64_t GetMagic() const
149     {
150         return magic_;
151     }
152 
GetShift()153     uint64_t GetShift() const
154     {
155         return shift_;
156     }
157 
GetAdd()158     bool GetAdd() const
159     {
160         return add_;
161     }
162 
163 private:
164     uint64_t magic_ {0U};
165     uint64_t shift_ {0U};
166     bool add_ {false};
167 };
168 }  // namespace ark::compiler
169 
170 #endif  // COMPILER_OPTIMIZER_CODEGEN_FAST_DIVISOR_H
171