• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef BERBERIS_HEAVY_OPTIMIZER_RISCV64_CALL_INTRINSIC_H_
18 #define BERBERIS_HEAVY_OPTIMIZER_RISCV64_CALL_INTRINSIC_H_
19 
20 #include <type_traits>
21 
22 #include "berberis/backend/code_emitter.h"
23 #include "berberis/backend/common/machine_ir.h"
24 #include "berberis/backend/x86_64/machine_ir.h"
25 #include "berberis/backend/x86_64/machine_ir_builder.h"
26 #include "berberis/base/bit_util.h"
27 #include "berberis/base/dependent_false.h"
28 #include "berberis/intrinsics/simd_register.h"
29 
30 #include "simd_register.h"
31 
32 namespace berberis {
33 
34 namespace call_intrinsic_internal {
35 
36 // TODO(b/308951522) Create Expand node in backend and use that instead so it
37 // can be optimized when possible.
38 template <typename IntrinsicType>
SignExtend64(x86_64::MachineIRBuilder * builder,MachineReg dst,MachineReg src)39 void SignExtend64(x86_64::MachineIRBuilder* builder, MachineReg dst, MachineReg src) {
40   // Note, RISCV64 ABI mandates type-extension up to 32-bit and then sign
41   // extension to 64-bit. This meants U8 and U16 are effectively zero-extended
42   // to 64-bit.
43   if constexpr (sizeof(IntrinsicType) == 1) {
44     if constexpr (std::is_signed_v<IntrinsicType>) {
45       builder->Gen<x86_64::MovsxbqRegReg>(dst, src);
46     } else {
47       builder->Gen<x86_64::MovzxbqRegReg>(dst, src);
48     }
49   } else if constexpr (sizeof(IntrinsicType) == 2) {
50     if constexpr (std::is_signed_v<IntrinsicType>) {
51       builder->Gen<x86_64::MovsxwqRegReg>(dst, src);
52     } else {
53       builder->Gen<x86_64::MovzxwqRegReg>(dst, src);
54     }
55   } else if constexpr (sizeof(IntrinsicType) == 4) {
56     builder->Gen<x86_64::MovsxlqRegReg>(dst, src);
57   } else {
58     static_assert(kDependentTypeFalse<IntrinsicType>,
59                   "Unsupported type, only integrals with size 4, 2 and 1 are supported.");
60   }
61 }
62 
63 template <typename IntrinsicType>
SignExtend64Result(x86_64::MachineIRBuilder * builder,MachineReg dst,MachineReg src)64 void SignExtend64Result(x86_64::MachineIRBuilder* builder, MachineReg dst, MachineReg src) {
65   if constexpr (sizeof(IntrinsicType) == 8) {
66     builder->Gen<PseudoCopy>(dst, src, 8);
67   } else {
68     static_assert(sizeof(IntrinsicType) == 4, "8- and 16-bit return values are not yet supported");
69     call_intrinsic_internal::SignExtend64<IntrinsicType>(builder, dst, src);
70   }
71 }
72 
73 template <
74     typename IntrinsicType,
75     typename AssemblerType,
76     typename std::enable_if_t<!std::is_same_v<AssemblerType, MachineReg> &&
77                                   std::is_integral_v<IntrinsicType> && (sizeof(IntrinsicType) <= 4),
78                               bool> = true>
GenPrepareCallImmArg(x86_64::MachineIRBuilder * builder,AssemblerType val)79 x86_64::CallImm::Arg GenPrepareCallImmArg(x86_64::MachineIRBuilder* builder, AssemblerType val) {
80   static_assert(std::is_same_v<AssemblerType, IntrinsicType>);
81   MachineReg reg = builder->ir()->AllocVReg();
82   MachineReg temp_reg = builder->ir()->AllocVReg();
83   // SignExtend64 works with registers, we need to copy integral value to a register before calling
84   // it.
85   builder->Gen<x86_64::MovlRegImm>(temp_reg, static_cast<uint32_t>(val));
86   SignExtend64<IntrinsicType>(builder, reg, temp_reg);
87   return {reg, x86_64::CallImm::kIntRegType};
88 }
89 
90 template <
91     typename IntrinsicType,
92     typename AssemblerType,
93     typename std::enable_if_t<std::is_same_v<AssemblerType, MachineReg> &&
94                                   (std::is_integral_v<IntrinsicType> ||
95                                    std::is_pointer_v<IntrinsicType>)&&(sizeof(IntrinsicType) == 8),
96                               bool> = true>
GenPrepareCallImmArg(x86_64::MachineIRBuilder *,AssemblerType reg)97 x86_64::CallImm::Arg GenPrepareCallImmArg(x86_64::MachineIRBuilder* /*builder*/,
98                                           AssemblerType reg) {
99   return {reg, x86_64::CallImm::kIntRegType};
100 }
101 
102 template <
103     typename IntrinsicType,
104     typename AssemblerType,
105     typename std::enable_if_t<std::is_same_v<AssemblerType, MachineReg> &&
106                                   std::is_integral_v<IntrinsicType> && (sizeof(IntrinsicType) <= 4),
107                               bool> = true>
GenPrepareCallImmArg(x86_64::MachineIRBuilder * builder,AssemblerType reg)108 x86_64::CallImm::Arg GenPrepareCallImmArg(x86_64::MachineIRBuilder* builder, AssemblerType reg) {
109   MachineReg new_reg = builder->ir()->AllocVReg();
110   SignExtend64<IntrinsicType>(builder, new_reg, reg);
111   return {new_reg, x86_64::CallImm::kIntRegType};
112 }
113 
114 template <typename IntrinsicType,
115           typename AssemblerType,
116           typename std::enable_if_t<std::is_same_v<AssemblerType, SimdReg>, bool> = true>
GenPrepareCallImmArg(x86_64::MachineIRBuilder *,AssemblerType reg)117 x86_64::CallImm::Arg GenPrepareCallImmArg(x86_64::MachineIRBuilder* /*builder*/,
118                                           AssemblerType reg) {
119   return {reg.machine_reg(), x86_64::CallImm::kXmmRegType};
120 }
121 
122 template <typename IntrinsicResType, typename AssemblerResType>
LoadCallIntrinsicResult(x86_64::MachineIRBuilder * builder,MachineReg result_ptr,AssemblerResType result)123 void LoadCallIntrinsicResult(x86_64::MachineIRBuilder* builder,
124                              MachineReg result_ptr,
125                              AssemblerResType result) {
126   static_assert(std::tuple_size_v<IntrinsicResType> == std::tuple_size_v<AssemblerResType>);
127   constexpr const uint32_t kResultTupleSize = std::tuple_size_v<IntrinsicResType>;
128   static_assert(kResultTupleSize > 1, "Result tuple size is expected to be at least 2");
129 
130   if constexpr (kResultTupleSize == 2) {
131     using FirstElementType = std::tuple_element_t<0, IntrinsicResType>;
132     using SecondElementType = std::tuple_element_t<1, IntrinsicResType>;
133 
134     auto first_reg = std::get<0>(result);
135     auto second_reg = std::get<1>(result);
136 
137     if constexpr (std::is_same_v<FirstElementType, SIMD128Register>) {
138       builder->Gen<x86_64::MovdquXRegMemBaseDisp>(first_reg.machine_reg(), result_ptr, 0);
139       if constexpr (std::is_same_v<SecondElementType, SIMD128Register>) {
140         builder->Gen<x86_64::MovdquXRegMemBaseDisp>(second_reg.machine_reg(), result_ptr, 16);
141       } else if constexpr (std::is_integral_v<SecondElementType>) {
142         builder->Gen<x86_64::MovqRegMemBaseDisp>(second_reg, result_ptr, 16);
143       } else {
144         static_assert(kDependentTypeFalse<IntrinsicResType>, "Unsupported intrinsic return type.");
145       }
146     } else {
147       static_assert(kDependentTypeFalse<IntrinsicResType>, "Unsupported intrinsic return type.");
148     }
149   } else if constexpr (kResultTupleSize == 3) {
150     using FirstElementType = std::tuple_element_t<0, IntrinsicResType>;
151     using SecondElementType = std::tuple_element_t<1, IntrinsicResType>;
152     using ThirdElementType = std::tuple_element_t<2, IntrinsicResType>;
153 
154     if constexpr (std::is_same_v<FirstElementType, SIMD128Register> &&
155                   std::is_same_v<SecondElementType, SIMD128Register> &&
156                   std::is_same_v<ThirdElementType, SIMD128Register>) {
157       builder->Gen<x86_64::MovdquXRegMemBaseDisp>(
158           std::get<0>(result).machine_reg(), result_ptr, 0 * 16);
159       builder->Gen<x86_64::MovdquXRegMemBaseDisp>(
160           std::get<1>(result).machine_reg(), result_ptr, 1 * 16);
161       builder->Gen<x86_64::MovdquXRegMemBaseDisp>(
162           std::get<2>(result).machine_reg(), result_ptr, 2 * 16);
163     } else {
164       static_assert(kDependentTypeFalse<IntrinsicResType>, "Unsupported intrinsic return type.");
165     }
166   } else if constexpr (kResultTupleSize == 4) {
167     using FirstElementType = std::tuple_element_t<0, IntrinsicResType>;
168     using SecondElementType = std::tuple_element_t<1, IntrinsicResType>;
169     using ThirdElementType = std::tuple_element_t<2, IntrinsicResType>;
170     using FourthElementType = std::tuple_element_t<3, IntrinsicResType>;
171 
172     if constexpr (std::is_same_v<FirstElementType, SIMD128Register> &&
173                   std::is_same_v<SecondElementType, SIMD128Register> &&
174                   std::is_same_v<ThirdElementType, SIMD128Register> &&
175                   std::is_same_v<FourthElementType, SIMD128Register>) {
176       builder->Gen<x86_64::MovdquXRegMemBaseDisp>(
177           std::get<0>(result).machine_reg(), result_ptr, 0 * 16);
178       builder->Gen<x86_64::MovdquXRegMemBaseDisp>(
179           std::get<1>(result).machine_reg(), result_ptr, 1 * 16);
180       builder->Gen<x86_64::MovdquXRegMemBaseDisp>(
181           std::get<2>(result).machine_reg(), result_ptr, 2 * 16);
182       builder->Gen<x86_64::MovdquXRegMemBaseDisp>(
183           std::get<3>(result).machine_reg(), result_ptr, 3 * 16);
184     } else {
185       static_assert(kDependentTypeFalse<IntrinsicResType>, "Unsupported intrinsic return type.");
186     }
187   } else {
188     static_assert(kDependentTypeFalse<IntrinsicResType>, "Unsupported intrinsic return type.");
189   }
190 }
191 
192 }  // namespace call_intrinsic_internal
193 
194 // Specialization for IntrinsicResType=void
195 template <typename IntrinsicResType,
196           typename... IntrinsicArgType,
197           typename... AssemblerArgType,
198           std::enable_if_t<std::is_same_v<IntrinsicResType, void>, bool> = true>
CallIntrinsicImpl(x86_64::MachineIRBuilder * builder,IntrinsicResType (* function)(IntrinsicArgType...),MachineReg flag_register,AssemblerArgType...args)199 void CallIntrinsicImpl(x86_64::MachineIRBuilder* builder,
200                        IntrinsicResType (*function)(IntrinsicArgType...),
201                        MachineReg flag_register,
202                        AssemblerArgType... args) {
203   // Store fixed parameters into registers and prepare list of input parameters for
204   // GenPrepareCallImmArg.
205   constexpr const size_t kArgumentArraySize = sizeof...(IntrinsicArgType);
206   std::array<x86_64::CallImm::Arg, kArgumentArraySize> args_for_call_imm;
207   size_t index = 0;
208 
209   ((args_for_call_imm[index++] =
210         call_intrinsic_internal::GenPrepareCallImmArg<IntrinsicArgType, AssemblerArgType>(builder,
211                                                                                           args)),
212    ...);
213 
214   builder->GenCallImm(bit_cast<uintptr_t>(function), flag_register, args_for_call_imm);
215 }
216 
217 template <typename AssemblerResType,
218           typename IntrinsicResType,
219           typename... IntrinsicArgType,
220           typename... AssemblerArgType,
221           std::enable_if_t<!std::is_same_v<IntrinsicResType, void>, bool> = true>
CallIntrinsicImpl(x86_64::MachineIRBuilder * builder,IntrinsicResType (* function)(IntrinsicArgType...),AssemblerResType result,MachineReg flag_register,AssemblerArgType...args)222 void CallIntrinsicImpl(x86_64::MachineIRBuilder* builder,
223                        IntrinsicResType (*function)(IntrinsicArgType...),
224                        AssemblerResType result,
225                        MachineReg flag_register,
226                        AssemblerArgType... args) {
227   constexpr const bool kIsResultOnStack = sizeof(IntrinsicResType) > 16;
228 
229   // Store fixed parameters into registers and prepare list of input parameters for
230   // GenPrepareCallImmArg.
231   constexpr const size_t kArgumentArraySize =
232       kIsResultOnStack ? sizeof...(IntrinsicArgType) + 1 : sizeof...(IntrinsicArgType);
233 
234   std::array<x86_64::CallImm::Arg, kArgumentArraySize> args_for_call_imm;
235 
236   size_t index = 0;
237   if constexpr (kIsResultOnStack) {
238     builder->ir()->ReserveArgs(sizeof(IntrinsicResType));
239     args_for_call_imm[index++] = {x86_64::kMachineRegRSP, x86_64::CallImm::kIntRegType};
240   }
241 
242   ((args_for_call_imm[index++] =
243         call_intrinsic_internal::GenPrepareCallImmArg<IntrinsicArgType, AssemblerArgType>(builder,
244                                                                                           args)),
245    ...);
246 
247   auto* call = builder->GenCallImm(bit_cast<uintptr_t>(function), flag_register, args_for_call_imm);
248 
249   if constexpr (kIsResultOnStack) {
250     call_intrinsic_internal::LoadCallIntrinsicResult<IntrinsicResType>(
251         builder, call->IntResultAt(0), result);
252   } else if constexpr (std::tuple_size_v<IntrinsicResType> == 1) {
253     using ResultType = std::tuple_element_t<0, IntrinsicResType>;
254     if constexpr (std::is_integral_v<ResultType>) {
255       call_intrinsic_internal::SignExtend64Result<ResultType>(
256           builder, result, call->IntResultAt(0));
257     } else {
258       builder->Gen<PseudoCopy>(result.machine_reg(), call->XmmResultAt(0), 16);
259     }
260   } else if constexpr (std::tuple_size_v<IntrinsicResType> == 2) {
261     using ResultType1 = std::tuple_element_t<0, IntrinsicResType>;
262     using ResultType2 = std::tuple_element_t<1, IntrinsicResType>;
263     // The only case where it is not on stack is two integral types
264     static_assert(std::is_integral_v<ResultType1> && std::is_integral_v<ResultType2>);
265 
266     call_intrinsic_internal::SignExtend64Result<ResultType1>(
267         builder, std::get<0>(result), call->IntResultAt(0));
268     call_intrinsic_internal::SignExtend64Result<ResultType2>(
269         builder, std::get<1>(result), call->IntResultAt(1));
270   } else {
271     static_assert(kDependentTypeFalse<IntrinsicResType>, "Unsupported result type");
272   }
273 }
274 
275 }  // namespace berberis
276 #endif  // BERBERIS_HEAVY_OPTIMIZER_RISCV64_CALL_INTRINSIC_H_
277