1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef BERBERIS_HEAVY_OPTIMIZER_RISCV64_INLINE_INTRINSIC_H_
18 #define BERBERIS_HEAVY_OPTIMIZER_RISCV64_INLINE_INTRINSIC_H_
19
20 #include <cfenv>
21 #include <cstdint>
22 #include <tuple>
23 #include <type_traits>
24 #include <utility>
25 #include <variant>
26
27 #include "berberis/assembler/x86_64.h"
28 #include "berberis/backend/common/machine_ir.h"
29 #include "berberis/backend/x86_64/machine_insn_intrinsics.h"
30 #include "berberis/backend/x86_64/machine_ir.h"
31 #include "berberis/backend/x86_64/machine_ir_builder.h"
32 #include "berberis/base/checks.h"
33 #include "berberis/base/config.h"
34 #include "berberis/base/dependent_false.h"
35 #include "berberis/intrinsics/common_to_x86/intrinsics_bindings.h"
36 #include "berberis/intrinsics/intrinsics.h"
37 #include "berberis/intrinsics/intrinsics_args.h"
38 #include "berberis/intrinsics/intrinsics_process_bindings.h"
39 #include "berberis/intrinsics/macro_assembler.h"
40 #include "berberis/runtime_primitives/platform.h"
41
42 #include "simd_register.h"
43
44 namespace berberis {
45
46 template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
47 bool TryInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
48 ResType result,
49 FlagRegister flag_register,
50 ArgType... args);
51
52 template <auto kFunc>
53 class InlineIntrinsic {
54 public:
55 template <typename ResType, typename FlagRegister, typename... ArgType>
TryInlineWithHostRounding(x86_64::MachineIRBuilder * builder,ResType result,FlagRegister flag_register,ArgType...args)56 static bool TryInlineWithHostRounding(x86_64::MachineIRBuilder* builder,
57 ResType result,
58 FlagRegister flag_register,
59 ArgType... args) {
60 std::tuple args_tuple = std::make_tuple(args...);
61 if constexpr (IsTagEq<&intrinsics::FMul<intrinsics::Float64>>()) {
62 auto [rm, frm, src1, src2] = args_tuple;
63 if (rm != FPFlags::DYN) {
64 return false;
65 }
66 return TryInlineIntrinsicForHeavyOptimizer<
67 &intrinsics::FMulHostRounding<intrinsics::Float64>>(
68 builder, result, flag_register, src1, src2);
69 } else if constexpr (IsTagEq<&intrinsics::FMul<intrinsics::Float32>>()) {
70 auto [rm, frm, src1, src2] = args_tuple;
71 if (rm != FPFlags::DYN) {
72 return false;
73 }
74 return TryInlineIntrinsicForHeavyOptimizer<
75 &intrinsics::FMulHostRounding<intrinsics::Float32>>(
76 builder, result, flag_register, src1, src2);
77 } else if constexpr (IsTagEq<&intrinsics::FAdd<intrinsics::Float64>>()) {
78 auto [rm, frm, src1, src2] = args_tuple;
79 if (rm != FPFlags::DYN) {
80 return false;
81 }
82 return TryInlineIntrinsicForHeavyOptimizer<
83 &intrinsics::FAddHostRounding<intrinsics::Float64>>(
84 builder, result, flag_register, src1, src2);
85 } else if constexpr (IsTagEq<&intrinsics::FAdd<intrinsics::Float32>>()) {
86 auto [rm, frm, src1, src2] = args_tuple;
87 if (rm != FPFlags::DYN) {
88 return false;
89 }
90 return TryInlineIntrinsicForHeavyOptimizer<
91 &intrinsics::FAddHostRounding<intrinsics::Float32>>(
92 builder, result, flag_register, src1, src2);
93 } else if constexpr (IsTagEq<&intrinsics::FSub<intrinsics::Float64>>()) {
94 auto [rm, frm, src1, src2] = args_tuple;
95 if (rm != FPFlags::DYN) {
96 return false;
97 }
98 return TryInlineIntrinsicForHeavyOptimizer<
99 &intrinsics::FSubHostRounding<intrinsics::Float64>>(
100 builder, result, flag_register, src1, src2);
101 } else if constexpr (IsTagEq<&intrinsics::FSub<intrinsics::Float32>>()) {
102 auto [rm, frm, src1, src2] = args_tuple;
103 if (rm != FPFlags::DYN) {
104 return false;
105 }
106 return TryInlineIntrinsicForHeavyOptimizer<
107 &intrinsics::FSubHostRounding<intrinsics::Float32>>(
108 builder, result, flag_register, src1, src2);
109 } else if constexpr (IsTagEq<&intrinsics::FDiv<intrinsics::Float64>>()) {
110 auto [rm, frm, src1, src2] = args_tuple;
111 if (rm != FPFlags::DYN) {
112 return false;
113 }
114 return TryInlineIntrinsicForHeavyOptimizer<
115 &intrinsics::FDivHostRounding<intrinsics::Float64>>(
116 builder, result, flag_register, src1, src2);
117 } else if constexpr (IsTagEq<&intrinsics::FDiv<intrinsics::Float32>>()) {
118 auto [rm, frm, src1, src2] = args_tuple;
119 if (rm != FPFlags::DYN) {
120 return false;
121 }
122 return TryInlineIntrinsicForHeavyOptimizer<
123 &intrinsics::FDivHostRounding<intrinsics::Float32>>(
124 builder, result, flag_register, src1, src2);
125 } else if constexpr (IsTagEq<&intrinsics::FCvtFloatToInteger<int64_t, intrinsics::Float64>>()) {
126 auto [rm, frm, src] = args_tuple;
127 if (rm != FPFlags::DYN) {
128 return false;
129 }
130 return TryInlineIntrinsicForHeavyOptimizer<
131 &intrinsics::FCvtFloatToIntegerHostRounding<int64_t, intrinsics::Float64>>(
132 builder, result, flag_register, src);
133 } else if constexpr (IsTagEq<&intrinsics::FCvtFloatToInteger<int64_t, intrinsics::Float32>>()) {
134 auto [rm, frm, src] = args_tuple;
135 if (rm != FPFlags::DYN) {
136 return false;
137 }
138 return TryInlineIntrinsicForHeavyOptimizer<
139 &intrinsics::FCvtFloatToIntegerHostRounding<int64_t, intrinsics::Float32>>(
140 builder, result, flag_register, src);
141 } else if constexpr (IsTagEq<&intrinsics::FCvtFloatToInteger<int32_t, intrinsics::Float64>>()) {
142 auto [rm, frm, src] = args_tuple;
143 if (rm != FPFlags::DYN) {
144 return false;
145 }
146 return TryInlineIntrinsicForHeavyOptimizer<
147 &intrinsics::FCvtFloatToIntegerHostRounding<int32_t, intrinsics::Float64>>(
148 builder, result, flag_register, src);
149 } else if constexpr (IsTagEq<&intrinsics::FCvtFloatToInteger<int32_t, intrinsics::Float32>>()) {
150 auto [rm, frm, src] = args_tuple;
151 if (rm != FPFlags::DYN) {
152 return false;
153 }
154 return TryInlineIntrinsicForHeavyOptimizer<
155 &intrinsics::FCvtFloatToIntegerHostRounding<int32_t, intrinsics::Float32>>(
156 builder, result, flag_register, src);
157 }
158 return false;
159 }
160
161 private:
162 // Comparison of pointers which point to different functions is generally not a
163 // constexpr since such functions can be merged in object code (comparing
164 // pointers to the same function is constexpr). This helper compares them using
165 // templates explicitly telling that we are not worried about such subtleties here.
166 template <auto kFunction>
167 class FunctionCompareTag;
168
169 // Note, if we define it as a variable clang doesn't consider it a constexpr in TryInline funcs.
170 template <auto kOtherFunction>
IsTagEq()171 static constexpr bool IsTagEq() {
172 return std::is_same_v<FunctionCompareTag<kFunc>, FunctionCompareTag<kOtherFunction>>;
173 }
174 };
175
176 template <typename DestRegClass, typename SrcRegClass>
Mov(x86_64::MachineIRBuilder * builder,MachineReg dest,MachineReg src)177 void Mov(x86_64::MachineIRBuilder* builder, MachineReg dest, MachineReg src) {
178 using DestType = typename DestRegClass::Type;
179 using SrcType = typename SrcRegClass::Type;
180 constexpr const auto src_reg_class = SrcRegClass::template kRegClass<x86_64::MachineInsnX86_64>;
181 if constexpr (std::is_integral_v<DestType>) {
182 if constexpr (std::is_integral_v<SrcType>) {
183 builder->Gen<PseudoCopy>(dest, src, src_reg_class.RegSize());
184 } else if constexpr (SrcRegClass::kAsRegister == 'x') {
185 if constexpr (src_reg_class.RegSize() == 4) {
186 if (host_platform::kHasAVX) {
187 builder->Gen<x86_64::VmovdRegXReg>(dest, src);
188 } else {
189 builder->Gen<x86_64::MovdRegXReg>(dest, src);
190 }
191 } else {
192 static_assert(src_reg_class.RegSize() >= 8);
193 if (host_platform::kHasAVX) {
194 builder->Gen<x86_64::VmovqRegXReg>(dest, src);
195 } else {
196 builder->Gen<x86_64::MovqRegXReg>(dest, src);
197 }
198 }
199 } else {
200 static_assert(kDependentTypeFalse<std::tuple<DestRegClass, SrcRegClass>>);
201 }
202 } else if (DestRegClass::kAsRegister == 'x') {
203 if constexpr (src_reg_class.RegSize() == 4) {
204 if constexpr (std::is_integral_v<SrcType>) {
205 if (host_platform::kHasAVX) {
206 builder->Gen<x86_64::VmovdXRegReg>(dest, src);
207 } else {
208 builder->Gen<x86_64::MovdXRegReg>(dest, src);
209 }
210 } else if constexpr (SrcRegClass::kAsRegister == 'x') {
211 builder->Gen<PseudoCopy>(dest, src, 16);
212 } else {
213 static_assert(kDependentTypeFalse<std::tuple<DestRegClass, SrcRegClass>>);
214 }
215 } else {
216 static_assert(src_reg_class.RegSize() >= 8);
217 if constexpr (std::is_integral_v<SrcType>) {
218 if (host_platform::kHasAVX) {
219 builder->Gen<x86_64::VmovqXRegReg>(dest, src);
220 } else {
221 builder->Gen<x86_64::MovqXRegReg>(dest, src);
222 }
223 } else if constexpr (SrcRegClass::kAsRegister == 'x') {
224 builder->Gen<PseudoCopy>(dest, src, 16);
225 } else {
226 static_assert(kDependentTypeFalse<std::tuple<DestRegClass, SrcRegClass>>);
227 }
228 }
229 }
230 }
231
232 template <typename DestRegClass, typename SrcReg>
MovFromInput(x86_64::MachineIRBuilder * builder,MachineReg dest,SrcReg src)233 void MovFromInput(x86_64::MachineIRBuilder* builder, MachineReg dest, SrcReg src) {
234 if constexpr (std::is_same_v<SrcReg, SimdReg>) {
235 Mov<DestRegClass, intrinsics::bindings::XmmReg>(builder, dest, src.machine_reg());
236 } else {
237 Mov<DestRegClass, intrinsics::bindings::GeneralReg64>(builder, dest, src);
238 }
239 }
240 template <typename SrcRegClass, typename DestReg>
MovToResult(x86_64::MachineIRBuilder * builder,DestReg dest,MachineReg src)241 void MovToResult(x86_64::MachineIRBuilder* builder, DestReg dest, MachineReg src) {
242 if constexpr (std::is_same_v<DestReg, SimdReg>) {
243 Mov<intrinsics::bindings::XmmReg, SrcRegClass>(builder, dest.machine_reg(), src);
244 } else {
245 Mov<intrinsics::bindings::GeneralReg64, SrcRegClass>(builder, dest, src);
246 }
247 }
248
249 template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
250 class TryBindingBasedInlineIntrinsicForHeavyOptimizer {
251 template <auto kFunctionForFriend,
252 typename ResTypeForFriend,
253 typename FlagRegisterForFriend,
254 typename... ArgTypeForFriend>
255 friend bool TryInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
256 ResTypeForFriend result,
257 FlagRegisterForFriend flag_register,
258 ArgTypeForFriend... args);
259 template <auto kFunctionForFriend, typename FlagRegisterForFriend, typename... ArgTypeForFriend>
260 friend bool TryInlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder* builder,
261 FlagRegisterForFriend flag_register,
262 ArgTypeForFriend... args);
263
264 template <auto kFunc,
265 typename Assembler_common_x86,
266 typename Assembler_x86_64,
267 typename MacroAssembler,
268 typename Result,
269 typename Callback,
270 typename... Args>
271 friend Result intrinsics::bindings::ProcessBindings(Callback callback,
272 Result def_result,
273 Args&&... args);
274
275 template <
276 auto kIntrinsicTemplateName,
277 auto kMacroInstructionTemplateName,
278 auto kMnemo,
279 typename GetOpcode,
280 intrinsics::bindings::CPUIDRestriction kCPUIDRestrictionTemplateValue,
281 intrinsics::bindings::PreciseNanOperationsHandling kPreciseNanOperationsHandlingTemplateValue,
282 bool kSideEffectsTemplateValue,
283 typename... Types>
284 friend class intrinsics::bindings::AsmCallInfo;
285
286 TryBindingBasedInlineIntrinsicForHeavyOptimizer() = delete;
287 TryBindingBasedInlineIntrinsicForHeavyOptimizer(
288 const TryBindingBasedInlineIntrinsicForHeavyOptimizer&) = delete;
289 TryBindingBasedInlineIntrinsicForHeavyOptimizer(
290 TryBindingBasedInlineIntrinsicForHeavyOptimizer&&) = delete;
291 TryBindingBasedInlineIntrinsicForHeavyOptimizer& operator=(
292 const TryBindingBasedInlineIntrinsicForHeavyOptimizer&) = delete;
293 TryBindingBasedInlineIntrinsicForHeavyOptimizer& operator=(
294 TryBindingBasedInlineIntrinsicForHeavyOptimizer&&) = delete;
295
TryBindingBasedInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder * builder,ResType result,FlagRegister flag_register,ArgType...args)296 TryBindingBasedInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
297 ResType result,
298 FlagRegister flag_register,
299 ArgType... args)
300 : builder_(builder),
301 result_{result},
302 xmm_result_reg_{},
303 flag_register_{flag_register},
304 input_args_(std::tuple{args...}),
305 success_(
306 intrinsics::bindings::ProcessBindings<kFunction,
307 AssemblerX86<x86_64::Assembler>,
308 x86_64::Assembler,
309 std::tuple<MacroAssembler<x86_64::Assembler>>,
310 bool,
311 TryBindingBasedInlineIntrinsicForHeavyOptimizer&>(
312 *this,
313 false)) {}
314
315 operator bool() { return success_; }
316
317 // TODO(b/232598137) The MachineIR bindings for some macros can't be instantiated yet. This should
318 // be removed once they're supported.
319 template <typename AsmCallInfo,
320 std::enable_if_t<AsmCallInfo::template kOpcode<MachineOpcode> ==
321 MachineOpcode::kMachineOpUndefined,
322 bool> = true>
operator()323 std::optional<bool> /*ProcessBindingsClient*/ operator()(AsmCallInfo /* asm_call_info */) {
324 return false;
325 }
326
327 template <typename AsmCallInfo,
328 std::enable_if_t<AsmCallInfo::template kOpcode<MachineOpcode> !=
329 MachineOpcode::kMachineOpUndefined,
330 bool> = true>
operator()331 std::optional<bool> /*ProcessBindingsClient*/ operator()(AsmCallInfo asm_call_info) {
332 static_assert(std::is_same_v<decltype(kFunction), typename AsmCallInfo::IntrinsicType>);
333 if constexpr (AsmCallInfo::kPreciseNanOperationsHandling !=
334 intrinsics::bindings::kNoNansOperation) {
335 return false;
336 }
337
338 if constexpr (AsmCallInfo::kCPUIDRestriction == intrinsics::bindings::kHasAVX) {
339 if (!host_platform::kHasAVX) {
340 return false;
341 }
342 } else if constexpr (AsmCallInfo::kCPUIDRestriction == intrinsics::bindings::kHasBMI) {
343 if (!host_platform::kHasBMI) {
344 return false;
345 }
346 } else if constexpr (AsmCallInfo::kCPUIDRestriction == intrinsics::bindings::kHasLZCNT) {
347 if (!host_platform::kHasLZCNT) {
348 return false;
349 }
350 } else if constexpr (AsmCallInfo::kCPUIDRestriction == intrinsics::bindings::kHasPOPCNT) {
351 if (!host_platform::kHasPOPCNT) {
352 return false;
353 }
354 } else if constexpr (AsmCallInfo::kCPUIDRestriction ==
355 intrinsics::bindings::kNoCPUIDRestriction) {
356 // No restrictions. Do nothing.
357 } else {
358 static_assert(berberis::kDependentValueFalse<AsmCallInfo::kCPUIDRestriction>);
359 }
360
361 // constructor_args_t here is used to generate a tuple of constructor args from the AsmCallInfo
362 // bindings. The tuple parameter pack will be expanded by the tuple specialization on the
363 // MachineInsn in machine_insn_intrinsics.h.
364 using MachineInsn = typename AsmCallInfo::template MachineInsn<berberis::x86_64::MachineInsn,
365 x86_64::constructor_args_t,
366 MachineOpcode>;
367 std::apply(MachineInsn::kGenFunc,
368 std::tuple_cat(std::tuple<x86_64::MachineIRBuilder&>{*builder_},
369 UnwrapSimdReg(AsmCallInfo::template MakeTuplefromBindings<
370 TryBindingBasedInlineIntrinsicForHeavyOptimizer&>(
371 *this, asm_call_info))));
372 ProcessBindingsResults<AsmCallInfo>(type_wrapper<typename AsmCallInfo::Bindings>());
373 return true;
374 }
375
376 template <typename ArgBinding, typename AsmCallInfo>
operator()377 auto /*MakeTuplefromBindingsClient*/ operator()(ArgTraits<ArgBinding>, AsmCallInfo) {
378 static constexpr const auto& arg_info = ArgTraits<ArgBinding>::arg_info;
379 if constexpr (arg_info.arg_type == ArgInfo::IMM_ARG) {
380 auto imm = std::get<arg_info.from>(input_args_);
381 return std::tuple{imm};
382 } else {
383 return ProcessArgInput<ArgBinding, AsmCallInfo>();
384 }
385 }
386
387 template <typename ArgBinding, typename AsmCallInfo>
ProcessArgInput()388 auto ProcessArgInput() {
389 static constexpr const auto& arg_info = ArgTraits<ArgBinding>::arg_info;
390 using RegisterClass = typename ArgTraits<ArgBinding>::RegisterClass;
391 using Usage = typename ArgTraits<ArgBinding>::Usage;
392 static constexpr const auto kNumOut = std::tuple_size_v<typename AsmCallInfo::OutputArguments>;
393
394 if constexpr (arg_info.arg_type == ArgInfo::IN_ARG) {
395 static_assert(std::is_same_v<Usage, intrinsics::bindings::Use>);
396 static_assert(!RegisterClass::kIsImplicitReg);
397 if constexpr (RegisterClass::kAsRegister == 'x' &&
398 std::is_same_v<std::tuple_element_t<arg_info.from, std::tuple<ArgType...>>,
399 MachineReg>) {
400 auto xmm_reg = AllocVReg();
401 MovFromInput<RegisterClass>(builder_, xmm_reg, std::get<arg_info.from>(input_args_));
402 return std::tuple{xmm_reg};
403 } else {
404 return std::tuple{std::get<arg_info.from>(input_args_)};
405 }
406 } else if constexpr (arg_info.arg_type == ArgInfo::IN_OUT_ARG) {
407 static_assert(!std::is_same_v<ResType, std::monostate>);
408 static_assert(std::is_same_v<Usage, intrinsics::bindings::UseDef>);
409 static_assert(!RegisterClass::kIsImplicitReg);
410 if constexpr (RegisterClass::kAsRegister == 'x') {
411 if constexpr (kNumOut > 1) {
412 static_assert(kDependentTypeFalse<ArgTraits<ArgBinding>>);
413 } else {
414 CHECK(xmm_result_reg_.IsInvalidReg());
415 xmm_result_reg_ = AllocVReg();
416 MovFromInput<RegisterClass>(
417 builder_, xmm_result_reg_, std::get<arg_info.from>(input_args_));
418 return std::tuple{xmm_result_reg_};
419 }
420 } else if constexpr (kNumOut > 1) {
421 auto res = std::get<arg_info.to>(result_);
422 MovFromInput<RegisterClass>(builder_, res, std::get<arg_info.from>(input_args_));
423 return std::tuple{res};
424 } else {
425 MovFromInput<RegisterClass>(builder_, result_, std::get<arg_info.from>(input_args_));
426 return std::tuple{result_};
427 }
428 } else if constexpr (arg_info.arg_type == ArgInfo::IN_OUT_TMP_ARG) {
429 static_assert(!std::is_same_v<ResType, std::monostate>);
430 static_assert(std::is_same_v<Usage, intrinsics::bindings::UseDef>);
431 static_assert(RegisterClass::kIsImplicitReg);
432 if constexpr (kNumOut > 1) {
433 static_assert(kDependentTypeFalse<ArgTraits<ArgBinding>>);
434 } else {
435 CHECK(implicit_result_reg_.IsInvalidReg());
436 implicit_result_reg_ = AllocVReg();
437 MovFromInput<RegisterClass>(
438 builder_, implicit_result_reg_, std::get<arg_info.from>(input_args_));
439 return std::tuple{implicit_result_reg_};
440 }
441 } else if constexpr (arg_info.arg_type == ArgInfo::IN_TMP_ARG) {
442 if constexpr (RegisterClass::kIsImplicitReg) {
443 auto implicit_reg = AllocVReg();
444 MovFromInput<RegisterClass>(builder_, implicit_reg, std::get<arg_info.from>(input_args_));
445 return std::tuple{implicit_reg};
446 } else {
447 static_assert(std::is_same_v<Usage, intrinsics::bindings::UseDef>);
448 return std::tuple{std::get<arg_info.from>(input_args_)};
449 }
450 } else if constexpr (arg_info.arg_type == ArgInfo::OUT_TMP_ARG) {
451 if constexpr (kNumOut > 1) {
452 static_assert(kDependentTypeFalse<ArgTraits<ArgBinding>>);
453 } else {
454 CHECK(implicit_result_reg_.IsInvalidReg());
455 implicit_result_reg_ = AllocVReg();
456 return std::tuple{implicit_result_reg_};
457 }
458 } else if constexpr (arg_info.arg_type == ArgInfo::OUT_ARG) {
459 static_assert(!std::is_same_v<ResType, std::monostate>);
460 static_assert(std::is_same_v<Usage, intrinsics::bindings::Def> ||
461 std::is_same_v<Usage, intrinsics::bindings::DefEarlyClobber>);
462 if constexpr (RegisterClass::kAsRegister == 'x') {
463 CHECK(xmm_result_reg_.IsInvalidReg());
464 xmm_result_reg_ = AllocVReg();
465 return std::tuple{xmm_result_reg_};
466 } else if constexpr (kNumOut > 1) {
467 return std::tuple{std::get<arg_info.to>(result_)};
468 } else if constexpr (RegisterClass::kIsImplicitReg) {
469 if constexpr (RegisterClass::kAsRegister == 0) {
470 return std::tuple{flag_register_};
471 } else {
472 CHECK(implicit_result_reg_.IsInvalidReg());
473 implicit_result_reg_ = AllocVReg();
474 return std::tuple{implicit_result_reg_};
475 }
476 } else {
477 return std::tuple{result_};
478 }
479 } else if constexpr (arg_info.arg_type == ArgInfo::TMP_ARG) {
480 static_assert(std::is_same_v<Usage, intrinsics::bindings::Def> ||
481 std::is_same_v<Usage, intrinsics::bindings::DefEarlyClobber>);
482 if constexpr (RegisterClass::kAsRegister == 'm') {
483 static_assert(std::is_same_v<Usage, intrinsics::bindings::DefEarlyClobber>);
484 if (scratch_arg_ >= 2) {
485 FATAL("Only two scratch registers are supported for now");
486 }
487 return std::tuple{x86_64::kMachineRegRBP,
488 static_cast<int32_t>(offsetof(ThreadState, intrinsics_scratch_area) +
489 config::kScratchAreaSlotSize * scratch_arg_++)};
490 } else if constexpr (RegisterClass::kIsImplicitReg) {
491 if constexpr (RegisterClass::kAsRegister == 0) {
492 return std::tuple{flag_register_};
493 } else {
494 auto implicit_reg = AllocVReg();
495 return std::tuple{implicit_reg};
496 }
497 } else {
498 auto reg = AllocVReg();
499 return std::tuple{reg};
500 }
501 } else {
502 static_assert(berberis::kDependentValueFalse<arg_info.arg_type>);
503 }
504 }
505
506 template <typename T>
507 struct type_wrapper {
508 using type = T;
509 };
510
511 template <typename AsmCallInfo, typename... ArgBinding>
ProcessBindingsResults(type_wrapper<std::tuple<ArgBinding...>>)512 void ProcessBindingsResults(type_wrapper<std::tuple<ArgBinding...>>) {
513 (ProcessBindingResult<ArgBinding, AsmCallInfo>(), ...);
514 if constexpr (std::tuple_size_v<typename AsmCallInfo::OutputArguments> == 0) {
515 // No return value. Do nothing.
516 } else if constexpr (std::tuple_size_v<typename AsmCallInfo::OutputArguments> == 1) {
517 using ReturnType = std::tuple_element_t<0, typename AsmCallInfo::OutputArguments>;
518 if constexpr (std::is_integral_v<ReturnType> && sizeof(ReturnType) < sizeof(int32_t)) {
519 // Don't handle these types just yet. We are not sure how to expand them and there
520 // are no examples.
521 static_assert(kDependentTypeFalse<ReturnType>);
522 }
523 if constexpr (std::is_same_v<ReturnType, int32_t> || std::is_same_v<ReturnType, uint32_t>) {
524 // Expands 32 bit values as signed. Even if actual results are processed as unsigned!
525 // TODO(b/308951522) replace with Expand node when it's created.
526 builder_->Gen<x86_64::MovsxlqRegReg>(result_, result_);
527 } else if constexpr (std::is_integral_v<ReturnType> &&
528 sizeof(ReturnType) == sizeof(int64_t)) {
529 // Do nothing, we have already produced expanded value.
530 } else if constexpr (std::is_same_v<ReturnType, intrinsics::Float32> ||
531 std::is_same_v<ReturnType, intrinsics::Float64>) {
532 // Do nothing, NaN boxing is handled by semantics player.
533 } else {
534 static_assert(kDependentTypeFalse<ReturnType>);
535 }
536 } else {
537 static_assert(kDependentTypeFalse<typename AsmCallInfo::OutputArguments>);
538 }
539 }
540
541 template <typename ArgBinding, typename AsmCallInfo>
ProcessBindingResult()542 void ProcessBindingResult() {
543 if constexpr (ArgTraits<ArgBinding>::Class::kIsImmediate) {
544 return;
545 } else {
546 using RegisterClass = typename ArgTraits<ArgBinding>::RegisterClass;
547 static constexpr const auto& arg_info = ArgTraits<ArgBinding>::arg_info;
548 if constexpr (RegisterClass::kAsRegister == 'm' || RegisterClass::kAsRegister == 0) {
549 return;
550 } else if constexpr ((arg_info.arg_type == ArgInfo::IN_OUT_ARG ||
551 arg_info.arg_type == ArgInfo::OUT_ARG) &&
552 RegisterClass::kAsRegister == 'x') {
553 CHECK(!xmm_result_reg_.IsInvalidReg());
554 MovToResult<RegisterClass>(builder_, result_, xmm_result_reg_);
555 } else if constexpr ((arg_info.arg_type == ArgInfo::OUT_ARG ||
556 arg_info.arg_type == ArgInfo::IN_OUT_TMP_ARG ||
557 arg_info.arg_type == ArgInfo::OUT_TMP_ARG) &&
558 RegisterClass::kIsImplicitReg) {
559 CHECK(!implicit_result_reg_.IsInvalidReg());
560 MovToResult<RegisterClass>(builder_, result_, implicit_result_reg_);
561 }
562 }
563 }
564
AllocVReg()565 MachineReg AllocVReg() { return builder_->ir()->AllocVReg(); }
566
567 template <typename T>
UnwrapSimdReg(T r)568 static constexpr auto UnwrapSimdReg(T r) {
569 if constexpr (std::is_same_v<T, SimdReg>) {
570 return r.machine_reg();
571 } else {
572 return r;
573 }
574 }
575
576 template <typename... T>
UnwrapSimdReg(std::tuple<T...> regs)577 static constexpr auto UnwrapSimdReg(std::tuple<T...> regs) {
578 constexpr const auto num_args = std::tuple_size<std::tuple<T...>>::value;
579 return UnwrapSimdReg(std::make_index_sequence<num_args>(), regs);
580 }
581
582 template <typename... T, auto... I>
UnwrapSimdReg(std::index_sequence<I...>,std::tuple<T...> regs)583 static constexpr auto UnwrapSimdReg(std::index_sequence<I...>, std::tuple<T...> regs) {
584 return std::make_tuple(UnwrapSimdReg(std::get<I>(regs))...);
585 }
586
587 private:
588 x86_64::MachineIRBuilder* builder_;
589 ResType result_;
590 MachineReg xmm_result_reg_;
591 MachineReg implicit_result_reg_;
592 FlagRegister flag_register_;
593 std::tuple<ArgType...> input_args_;
594 uint32_t scratch_arg_ = 0;
595 bool success_;
596 };
597
598 template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
TryInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder * builder,ResType result,FlagRegister flag_register,ArgType...args)599 bool TryInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
600 ResType result,
601 FlagRegister flag_register,
602 ArgType... args) {
603 if (InlineIntrinsic<kFunction>::TryInlineWithHostRounding(
604 builder, result, flag_register, args...)) {
605 return true;
606 }
607
608 return TryBindingBasedInlineIntrinsicForHeavyOptimizer<kFunction,
609 ResType,
610 FlagRegister,
611 ArgType...>(
612 builder, result, flag_register, args...);
613 }
614
615 template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
InlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder * builder,ResType result,FlagRegister flag_register,ArgType...args)616 void InlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
617 ResType result,
618 FlagRegister flag_register,
619 ArgType... args) {
620 bool success = TryInlineIntrinsicForHeavyOptimizer<kFunction, ResType, FlagRegister, ArgType...>(
621 builder, result, flag_register, args...);
622 CHECK(success);
623 }
624
625 template <auto kFunction, typename FlagRegister, typename... ArgType>
TryInlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder * builder,FlagRegister flag_register,ArgType...args)626 bool TryInlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder* builder,
627 FlagRegister flag_register,
628 ArgType... args) {
629 return TryBindingBasedInlineIntrinsicForHeavyOptimizer<kFunction,
630 std::monostate,
631 FlagRegister,
632 ArgType...>(
633 builder, std::monostate{}, flag_register, args...);
634 }
635
636 template <auto kFunction, typename FlagRegister, typename... ArgType>
InlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder * builder,FlagRegister flag_register,ArgType...args)637 void InlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder* builder,
638 FlagRegister flag_register,
639 ArgType... args) {
640 bool success = TryInlineIntrinsicForHeavyOptimizerVoid<kFunction, FlagRegister, ArgType...>(
641 builder, flag_register, args...);
642 CHECK(success);
643 }
644
645 } // namespace berberis
646
647 #endif // BERBERIS_HEAVY_OPTIMIZER_RISCV64_INLINE_INTRINSIC_H_
648