1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef BERBERIS_HEAVY_OPTIMIZER_RISCV64_CALL_INTRINSIC_H_
18 #define BERBERIS_HEAVY_OPTIMIZER_RISCV64_CALL_INTRINSIC_H_
19
20 #include <type_traits>
21
22 #include "berberis/backend/code_emitter.h"
23 #include "berberis/backend/common/machine_ir.h"
24 #include "berberis/backend/x86_64/machine_ir.h"
25 #include "berberis/backend/x86_64/machine_ir_builder.h"
26 #include "berberis/base/bit_util.h"
27 #include "berberis/base/dependent_false.h"
28 #include "berberis/intrinsics/simd_register.h"
29
30 #include "simd_register.h"
31
32 namespace berberis {
33
34 namespace call_intrinsic_internal {
35
36 // TODO(b/308951522) Create Expand node in backend and use that instead so it
37 // can be optimized when possible.
38 template <typename IntrinsicType>
SignExtend64(x86_64::MachineIRBuilder * builder,MachineReg dst,MachineReg src)39 void SignExtend64(x86_64::MachineIRBuilder* builder, MachineReg dst, MachineReg src) {
40 // Note, RISCV64 ABI mandates type-extension up to 32-bit and then sign
41 // extension to 64-bit. This meants U8 and U16 are effectively zero-extended
42 // to 64-bit.
43 if constexpr (sizeof(IntrinsicType) == 1) {
44 if constexpr (std::is_signed_v<IntrinsicType>) {
45 builder->Gen<x86_64::MovsxbqRegReg>(dst, src);
46 } else {
47 builder->Gen<x86_64::MovzxbqRegReg>(dst, src);
48 }
49 } else if constexpr (sizeof(IntrinsicType) == 2) {
50 if constexpr (std::is_signed_v<IntrinsicType>) {
51 builder->Gen<x86_64::MovsxwqRegReg>(dst, src);
52 } else {
53 builder->Gen<x86_64::MovzxwqRegReg>(dst, src);
54 }
55 } else if constexpr (sizeof(IntrinsicType) == 4) {
56 builder->Gen<x86_64::MovsxlqRegReg>(dst, src);
57 } else {
58 static_assert(kDependentTypeFalse<IntrinsicType>,
59 "Unsupported type, only integrals with size 4, 2 and 1 are supported.");
60 }
61 }
62
63 template <typename IntrinsicType>
SignExtend64Result(x86_64::MachineIRBuilder * builder,MachineReg dst,MachineReg src)64 void SignExtend64Result(x86_64::MachineIRBuilder* builder, MachineReg dst, MachineReg src) {
65 if constexpr (sizeof(IntrinsicType) == 8) {
66 builder->Gen<PseudoCopy>(dst, src, 8);
67 } else {
68 static_assert(sizeof(IntrinsicType) == 4, "8- and 16-bit return values are not yet supported");
69 call_intrinsic_internal::SignExtend64<IntrinsicType>(builder, dst, src);
70 }
71 }
72
73 template <
74 typename IntrinsicType,
75 typename AssemblerType,
76 typename std::enable_if_t<!std::is_same_v<AssemblerType, MachineReg> &&
77 std::is_integral_v<IntrinsicType> && (sizeof(IntrinsicType) <= 4),
78 bool> = true>
GenPrepareCallImmArg(x86_64::MachineIRBuilder * builder,AssemblerType val)79 x86_64::CallImm::Arg GenPrepareCallImmArg(x86_64::MachineIRBuilder* builder, AssemblerType val) {
80 static_assert(std::is_same_v<AssemblerType, IntrinsicType>);
81 MachineReg reg = builder->ir()->AllocVReg();
82 MachineReg temp_reg = builder->ir()->AllocVReg();
83 // SignExtend64 works with registers, we need to copy integral value to a register before calling
84 // it.
85 builder->Gen<x86_64::MovlRegImm>(temp_reg, static_cast<uint32_t>(val));
86 SignExtend64<IntrinsicType>(builder, reg, temp_reg);
87 return {reg, x86_64::CallImm::kIntRegType};
88 }
89
90 template <
91 typename IntrinsicType,
92 typename AssemblerType,
93 typename std::enable_if_t<std::is_same_v<AssemblerType, MachineReg> &&
94 (std::is_integral_v<IntrinsicType> ||
95 std::is_pointer_v<IntrinsicType>)&&(sizeof(IntrinsicType) == 8),
96 bool> = true>
GenPrepareCallImmArg(x86_64::MachineIRBuilder *,AssemblerType reg)97 x86_64::CallImm::Arg GenPrepareCallImmArg(x86_64::MachineIRBuilder* /*builder*/,
98 AssemblerType reg) {
99 return {reg, x86_64::CallImm::kIntRegType};
100 }
101
102 template <
103 typename IntrinsicType,
104 typename AssemblerType,
105 typename std::enable_if_t<std::is_same_v<AssemblerType, MachineReg> &&
106 std::is_integral_v<IntrinsicType> && (sizeof(IntrinsicType) <= 4),
107 bool> = true>
GenPrepareCallImmArg(x86_64::MachineIRBuilder * builder,AssemblerType reg)108 x86_64::CallImm::Arg GenPrepareCallImmArg(x86_64::MachineIRBuilder* builder, AssemblerType reg) {
109 MachineReg new_reg = builder->ir()->AllocVReg();
110 SignExtend64<IntrinsicType>(builder, new_reg, reg);
111 return {new_reg, x86_64::CallImm::kIntRegType};
112 }
113
114 template <typename IntrinsicType,
115 typename AssemblerType,
116 typename std::enable_if_t<std::is_same_v<AssemblerType, SimdReg>, bool> = true>
GenPrepareCallImmArg(x86_64::MachineIRBuilder *,AssemblerType reg)117 x86_64::CallImm::Arg GenPrepareCallImmArg(x86_64::MachineIRBuilder* /*builder*/,
118 AssemblerType reg) {
119 return {reg.machine_reg(), x86_64::CallImm::kXmmRegType};
120 }
121
122 template <typename IntrinsicResType, typename AssemblerResType>
LoadCallIntrinsicResult(x86_64::MachineIRBuilder * builder,MachineReg result_ptr,AssemblerResType result)123 void LoadCallIntrinsicResult(x86_64::MachineIRBuilder* builder,
124 MachineReg result_ptr,
125 AssemblerResType result) {
126 static_assert(std::tuple_size_v<IntrinsicResType> == std::tuple_size_v<AssemblerResType>);
127 constexpr const uint32_t kResultTupleSize = std::tuple_size_v<IntrinsicResType>;
128 static_assert(kResultTupleSize > 1, "Result tuple size is expected to be at least 2");
129
130 if constexpr (kResultTupleSize == 2) {
131 using FirstElementType = std::tuple_element_t<0, IntrinsicResType>;
132 using SecondElementType = std::tuple_element_t<1, IntrinsicResType>;
133
134 auto first_reg = std::get<0>(result);
135 auto second_reg = std::get<1>(result);
136
137 if constexpr (std::is_same_v<FirstElementType, SIMD128Register>) {
138 builder->Gen<x86_64::MovdquXRegMemBaseDisp>(first_reg.machine_reg(), result_ptr, 0);
139 if constexpr (std::is_same_v<SecondElementType, SIMD128Register>) {
140 builder->Gen<x86_64::MovdquXRegMemBaseDisp>(second_reg.machine_reg(), result_ptr, 16);
141 } else if constexpr (std::is_integral_v<SecondElementType>) {
142 builder->Gen<x86_64::MovqRegMemBaseDisp>(second_reg, result_ptr, 16);
143 } else {
144 static_assert(kDependentTypeFalse<IntrinsicResType>, "Unsupported intrinsic return type.");
145 }
146 } else {
147 static_assert(kDependentTypeFalse<IntrinsicResType>, "Unsupported intrinsic return type.");
148 }
149 } else if constexpr (kResultTupleSize == 3) {
150 using FirstElementType = std::tuple_element_t<0, IntrinsicResType>;
151 using SecondElementType = std::tuple_element_t<1, IntrinsicResType>;
152 using ThirdElementType = std::tuple_element_t<2, IntrinsicResType>;
153
154 if constexpr (std::is_same_v<FirstElementType, SIMD128Register> &&
155 std::is_same_v<SecondElementType, SIMD128Register> &&
156 std::is_same_v<ThirdElementType, SIMD128Register>) {
157 builder->Gen<x86_64::MovdquXRegMemBaseDisp>(
158 std::get<0>(result).machine_reg(), result_ptr, 0 * 16);
159 builder->Gen<x86_64::MovdquXRegMemBaseDisp>(
160 std::get<1>(result).machine_reg(), result_ptr, 1 * 16);
161 builder->Gen<x86_64::MovdquXRegMemBaseDisp>(
162 std::get<2>(result).machine_reg(), result_ptr, 2 * 16);
163 } else {
164 static_assert(kDependentTypeFalse<IntrinsicResType>, "Unsupported intrinsic return type.");
165 }
166 } else if constexpr (kResultTupleSize == 4) {
167 using FirstElementType = std::tuple_element_t<0, IntrinsicResType>;
168 using SecondElementType = std::tuple_element_t<1, IntrinsicResType>;
169 using ThirdElementType = std::tuple_element_t<2, IntrinsicResType>;
170 using FourthElementType = std::tuple_element_t<3, IntrinsicResType>;
171
172 if constexpr (std::is_same_v<FirstElementType, SIMD128Register> &&
173 std::is_same_v<SecondElementType, SIMD128Register> &&
174 std::is_same_v<ThirdElementType, SIMD128Register> &&
175 std::is_same_v<FourthElementType, SIMD128Register>) {
176 builder->Gen<x86_64::MovdquXRegMemBaseDisp>(
177 std::get<0>(result).machine_reg(), result_ptr, 0 * 16);
178 builder->Gen<x86_64::MovdquXRegMemBaseDisp>(
179 std::get<1>(result).machine_reg(), result_ptr, 1 * 16);
180 builder->Gen<x86_64::MovdquXRegMemBaseDisp>(
181 std::get<2>(result).machine_reg(), result_ptr, 2 * 16);
182 builder->Gen<x86_64::MovdquXRegMemBaseDisp>(
183 std::get<3>(result).machine_reg(), result_ptr, 3 * 16);
184 } else {
185 static_assert(kDependentTypeFalse<IntrinsicResType>, "Unsupported intrinsic return type.");
186 }
187 } else {
188 static_assert(kDependentTypeFalse<IntrinsicResType>, "Unsupported intrinsic return type.");
189 }
190 }
191
192 } // namespace call_intrinsic_internal
193
194 // Specialization for IntrinsicResType=void
195 template <typename IntrinsicResType,
196 typename... IntrinsicArgType,
197 typename... AssemblerArgType,
198 std::enable_if_t<std::is_same_v<IntrinsicResType, void>, bool> = true>
CallIntrinsicImpl(x86_64::MachineIRBuilder * builder,IntrinsicResType (* function)(IntrinsicArgType...),MachineReg flag_register,AssemblerArgType...args)199 void CallIntrinsicImpl(x86_64::MachineIRBuilder* builder,
200 IntrinsicResType (*function)(IntrinsicArgType...),
201 MachineReg flag_register,
202 AssemblerArgType... args) {
203 // Store fixed parameters into registers and prepare list of input parameters for
204 // GenPrepareCallImmArg.
205 constexpr const size_t kArgumentArraySize = sizeof...(IntrinsicArgType);
206 std::array<x86_64::CallImm::Arg, kArgumentArraySize> args_for_call_imm;
207 size_t index = 0;
208
209 ((args_for_call_imm[index++] =
210 call_intrinsic_internal::GenPrepareCallImmArg<IntrinsicArgType, AssemblerArgType>(builder,
211 args)),
212 ...);
213
214 builder->GenCallImm(bit_cast<uintptr_t>(function), flag_register, args_for_call_imm);
215 }
216
217 template <typename AssemblerResType,
218 typename IntrinsicResType,
219 typename... IntrinsicArgType,
220 typename... AssemblerArgType,
221 std::enable_if_t<!std::is_same_v<IntrinsicResType, void>, bool> = true>
CallIntrinsicImpl(x86_64::MachineIRBuilder * builder,IntrinsicResType (* function)(IntrinsicArgType...),AssemblerResType result,MachineReg flag_register,AssemblerArgType...args)222 void CallIntrinsicImpl(x86_64::MachineIRBuilder* builder,
223 IntrinsicResType (*function)(IntrinsicArgType...),
224 AssemblerResType result,
225 MachineReg flag_register,
226 AssemblerArgType... args) {
227 constexpr const bool kIsResultOnStack = sizeof(IntrinsicResType) > 16;
228
229 // Store fixed parameters into registers and prepare list of input parameters for
230 // GenPrepareCallImmArg.
231 constexpr const size_t kArgumentArraySize =
232 kIsResultOnStack ? sizeof...(IntrinsicArgType) + 1 : sizeof...(IntrinsicArgType);
233
234 std::array<x86_64::CallImm::Arg, kArgumentArraySize> args_for_call_imm;
235
236 size_t index = 0;
237 if constexpr (kIsResultOnStack) {
238 builder->ir()->ReserveArgs(sizeof(IntrinsicResType));
239 args_for_call_imm[index++] = {x86_64::kMachineRegRSP, x86_64::CallImm::kIntRegType};
240 }
241
242 ((args_for_call_imm[index++] =
243 call_intrinsic_internal::GenPrepareCallImmArg<IntrinsicArgType, AssemblerArgType>(builder,
244 args)),
245 ...);
246
247 auto* call = builder->GenCallImm(bit_cast<uintptr_t>(function), flag_register, args_for_call_imm);
248
249 if constexpr (kIsResultOnStack) {
250 call_intrinsic_internal::LoadCallIntrinsicResult<IntrinsicResType>(
251 builder, call->IntResultAt(0), result);
252 } else if constexpr (std::tuple_size_v<IntrinsicResType> == 1) {
253 using ResultType = std::tuple_element_t<0, IntrinsicResType>;
254 if constexpr (std::is_integral_v<ResultType>) {
255 call_intrinsic_internal::SignExtend64Result<ResultType>(
256 builder, result, call->IntResultAt(0));
257 } else {
258 builder->Gen<PseudoCopy>(result.machine_reg(), call->XmmResultAt(0), 16);
259 }
260 } else if constexpr (std::tuple_size_v<IntrinsicResType> == 2) {
261 using ResultType1 = std::tuple_element_t<0, IntrinsicResType>;
262 using ResultType2 = std::tuple_element_t<1, IntrinsicResType>;
263 // The only case where it is not on stack is two integral types
264 static_assert(std::is_integral_v<ResultType1> && std::is_integral_v<ResultType2>);
265
266 call_intrinsic_internal::SignExtend64Result<ResultType1>(
267 builder, std::get<0>(result), call->IntResultAt(0));
268 call_intrinsic_internal::SignExtend64Result<ResultType2>(
269 builder, std::get<1>(result), call->IntResultAt(1));
270 } else {
271 static_assert(kDependentTypeFalse<IntrinsicResType>, "Unsupported result type");
272 }
273 }
274
275 } // namespace berberis
276 #endif // BERBERIS_HEAVY_OPTIMIZER_RISCV64_CALL_INTRINSIC_H_
277