• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_riscv64.h"
18 
19 #include "code_generator_riscv64.h"
20 #include "intrinsic_objects.h"
21 #include "intrinsics_utils.h"
22 #include "optimizing/locations.h"
23 #include "well_known_classes.h"
24 
25 namespace art HIDDEN {
26 namespace riscv64 {
27 
28 using IntrinsicSlowPathRISCV64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorRISCV64,
29                                                    SlowPathCodeRISCV64,
30                                                    Riscv64Assembler>;
31 
32 #define __ assembler->
33 
34 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
35 class ReadBarrierSystemArrayCopySlowPathRISCV64 : public SlowPathCodeRISCV64 {
36  public:
ReadBarrierSystemArrayCopySlowPathRISCV64(HInstruction * instruction,Location tmp)37   ReadBarrierSystemArrayCopySlowPathRISCV64(HInstruction* instruction, Location tmp)
38       : SlowPathCodeRISCV64(instruction), tmp_(tmp) {}
39 
EmitNativeCode(CodeGenerator * codegen_in)40   void EmitNativeCode(CodeGenerator* codegen_in) override {
41     DCHECK(codegen_in->EmitBakerReadBarrier());
42     CodeGeneratorRISCV64* codegen = down_cast<CodeGeneratorRISCV64*>(codegen_in);
43     Riscv64Assembler* assembler = codegen->GetAssembler();
44     LocationSummary* locations = instruction_->GetLocations();
45     DCHECK(locations->CanCall());
46     DCHECK(instruction_->IsInvokeStaticOrDirect())
47         << "Unexpected instruction in read barrier arraycopy slow path: "
48         << instruction_->DebugName();
49     DCHECK(instruction_->GetLocations()->Intrinsified());
50     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
51 
52     const int32_t element_size = DataType::Size(DataType::Type::kReference);
53 
54     XRegister src_curr_addr = locations->GetTemp(0).AsRegister<XRegister>();
55     XRegister dst_curr_addr = locations->GetTemp(1).AsRegister<XRegister>();
56     XRegister src_stop_addr = locations->GetTemp(2).AsRegister<XRegister>();
57     XRegister tmp_reg = tmp_.AsRegister<XRegister>();
58 
59     __ Bind(GetEntryLabel());
60     // The source range and destination pointer were initialized before entering the slow-path.
61     Riscv64Label slow_copy_loop;
62     __ Bind(&slow_copy_loop);
63     __ Loadwu(tmp_reg, src_curr_addr, 0);
64     codegen->MaybeUnpoisonHeapReference(tmp_reg);
65     // TODO: Inline the mark bit check before calling the runtime?
66     // tmp_reg = ReadBarrier::Mark(tmp_reg);
67     // No need to save live registers; it's taken care of by the
68     // entrypoint. Also, there is no need to update the stack mask,
69     // as this runtime call will not trigger a garbage collection.
70     // (See ReadBarrierMarkSlowPathRISCV64::EmitNativeCode for more
71     // explanations.)
72     int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(tmp_);
73     // This runtime call does not require a stack map.
74     codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
75     codegen->MaybePoisonHeapReference(tmp_reg);
76     __ Storew(tmp_reg, dst_curr_addr, 0);
77     __ Addi(src_curr_addr, src_curr_addr, element_size);
78     __ Addi(dst_curr_addr, dst_curr_addr, element_size);
79     __ Bne(src_curr_addr, src_stop_addr, &slow_copy_loop);
80     __ J(GetExitLabel());
81   }
82 
GetDescription() const83   const char* GetDescription() const override {
84     return "ReadBarrierSystemArrayCopySlowPathRISCV64";
85   }
86 
87  private:
88   Location tmp_;
89 
90   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathRISCV64);
91 };
92 
93 // The MethodHandle.invokeExact intrinsic sets up arguments to match the target method call. If we
94 // need to go to the slow path, we call art_quick_invoke_polymorphic_with_hidden_receiver, which
95 // expects the MethodHandle object in a0 (in place of the actual ArtMethod).
96 class InvokePolymorphicSlowPathRISCV64 : public SlowPathCodeRISCV64 {
97  public:
InvokePolymorphicSlowPathRISCV64(HInstruction * instruction,XRegister method_handle)98   InvokePolymorphicSlowPathRISCV64(HInstruction* instruction, XRegister method_handle)
99       : SlowPathCodeRISCV64(instruction), method_handle_(method_handle) {
100     DCHECK(instruction->IsInvokePolymorphic());
101   }
102 
EmitNativeCode(CodeGenerator * codegen_in)103   void EmitNativeCode(CodeGenerator* codegen_in) override {
104     CodeGeneratorRISCV64* codegen = down_cast<CodeGeneratorRISCV64*>(codegen_in);
105     Riscv64Assembler* assembler = codegen->GetAssembler();
106     __ Bind(GetEntryLabel());
107 
108     SaveLiveRegisters(codegen, instruction_->GetLocations());
109     // Passing `MethodHandle` object as hidden argument.
110     __ Mv(A0, method_handle_);
111     codegen->InvokeRuntime(QuickEntrypointEnum::kQuickInvokePolymorphicWithHiddenReceiver,
112                            instruction_);
113 
114     RestoreLiveRegisters(codegen, instruction_->GetLocations());
115     __ J(GetExitLabel());
116   }
117 
GetDescription() const118   const char* GetDescription() const override { return "InvokePolymorphicSlowPathRISCV64"; }
119 
120  private:
121   const XRegister method_handle_;
122   DISALLOW_COPY_AND_ASSIGN(InvokePolymorphicSlowPathRISCV64);
123 };
124 
TryDispatch(HInvoke * invoke)125 bool IntrinsicLocationsBuilderRISCV64::TryDispatch(HInvoke* invoke) {
126   Dispatch(invoke);
127   LocationSummary* res = invoke->GetLocations();
128   if (res == nullptr) {
129     return false;
130   }
131   return res->Intrinsified();
132 }
133 
GetAssembler()134 Riscv64Assembler* IntrinsicCodeGeneratorRISCV64::GetAssembler() {
135   return codegen_->GetAssembler();
136 }
137 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)138 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
139   LocationSummary* locations =
140       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
141   locations->SetInAt(0, Location::RequiresFpuRegister());
142   locations->SetOut(Location::RequiresRegister());
143 }
144 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)145 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
146   LocationSummary* locations =
147       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
148   locations->SetInAt(0, Location::RequiresRegister());
149   locations->SetOut(Location::RequiresFpuRegister());
150 }
151 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)152 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
153   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
154   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
155   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
156 
157   LocationSummary* const locations =
158       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
159   InvokeRuntimeCallingConvention calling_convention;
160 
161   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
162   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
163 }
164 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)165 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
166   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
167   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
168   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
169   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
170 
171   LocationSummary* const locations =
172       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
173   InvokeRuntimeCallingConvention calling_convention;
174 
175   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
176   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
177   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
178 }
179 
CreateFpFpFpToFpNoOverlapLocations(ArenaAllocator * allocator,HInvoke * invoke)180 static void CreateFpFpFpToFpNoOverlapLocations(ArenaAllocator* allocator, HInvoke* invoke) {
181   DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
182   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
183   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
184   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(2)->GetType()));
185   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
186 
187   LocationSummary* const locations =
188       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
189 
190   locations->SetInAt(0, Location::RequiresFpuRegister());
191   locations->SetInAt(1, Location::RequiresFpuRegister());
192   locations->SetInAt(2, Location::RequiresFpuRegister());
193   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
194 }
195 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,Location::OutputOverlap overlaps=Location::kOutputOverlap)196 static void CreateFPToFPLocations(ArenaAllocator* allocator,
197                                   HInvoke* invoke,
198                                   Location::OutputOverlap overlaps = Location::kOutputOverlap) {
199   LocationSummary* locations =
200       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
201   locations->SetInAt(0, Location::RequiresFpuRegister());
202   locations->SetOut(Location::RequiresFpuRegister(), overlaps);
203 }
204 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)205 void IntrinsicLocationsBuilderRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
206   CreateFPToIntLocations(allocator_, invoke);
207 }
208 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)209 void IntrinsicCodeGeneratorRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
210   LocationSummary* locations = invoke->GetLocations();
211   Riscv64Assembler* assembler = GetAssembler();
212   __ FMvXD(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>());
213 }
214 
VisitDoubleLongBitsToDouble(HInvoke * invoke)215 void IntrinsicLocationsBuilderRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
216   CreateIntToFPLocations(allocator_, invoke);
217 }
218 
VisitDoubleLongBitsToDouble(HInvoke * invoke)219 void IntrinsicCodeGeneratorRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
220   LocationSummary* locations = invoke->GetLocations();
221   Riscv64Assembler* assembler = GetAssembler();
222   __ FMvDX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>());
223 }
224 
VisitFloatFloatToRawIntBits(HInvoke * invoke)225 void IntrinsicLocationsBuilderRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
226   CreateFPToIntLocations(allocator_, invoke);
227 }
228 
VisitFloatFloatToRawIntBits(HInvoke * invoke)229 void IntrinsicCodeGeneratorRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
230   LocationSummary* locations = invoke->GetLocations();
231   Riscv64Assembler* assembler = GetAssembler();
232   __ FMvXW(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>());
233 }
234 
VisitFloatIntBitsToFloat(HInvoke * invoke)235 void IntrinsicLocationsBuilderRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
236   CreateIntToFPLocations(allocator_, invoke);
237 }
238 
VisitFloatIntBitsToFloat(HInvoke * invoke)239 void IntrinsicCodeGeneratorRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
240   LocationSummary* locations = invoke->GetLocations();
241   Riscv64Assembler* assembler = GetAssembler();
242   __ FMvWX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>());
243 }
244 
VisitDoubleIsInfinite(HInvoke * invoke)245 void IntrinsicLocationsBuilderRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) {
246   CreateFPToIntLocations(allocator_, invoke);
247 }
248 
VisitDoubleIsInfinite(HInvoke * invoke)249 void IntrinsicCodeGeneratorRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) {
250   LocationSummary* locations = invoke->GetLocations();
251   Riscv64Assembler* assembler = GetAssembler();
252   XRegister out = locations->Out().AsRegister<XRegister>();
253   __ FClassD(out, locations->InAt(0).AsFpuRegister<FRegister>());
254   __ Andi(out, out, kPositiveInfinity | kNegativeInfinity);
255   __ Snez(out, out);
256 }
257 
VisitFloatIsInfinite(HInvoke * invoke)258 void IntrinsicLocationsBuilderRISCV64::VisitFloatIsInfinite(HInvoke* invoke) {
259   CreateFPToIntLocations(allocator_, invoke);
260 }
261 
VisitFloatIsInfinite(HInvoke * invoke)262 void IntrinsicCodeGeneratorRISCV64::VisitFloatIsInfinite(HInvoke* invoke) {
263   LocationSummary* locations = invoke->GetLocations();
264   Riscv64Assembler* assembler = GetAssembler();
265   XRegister out = locations->Out().AsRegister<XRegister>();
266   __ FClassS(out, locations->InAt(0).AsFpuRegister<FRegister>());
267   __ Andi(out, out, kPositiveInfinity | kNegativeInfinity);
268   __ Snez(out, out);
269 }
270 
CreateIntToIntNoOverlapLocations(ArenaAllocator * allocator,HInvoke * invoke)271 static void CreateIntToIntNoOverlapLocations(ArenaAllocator* allocator, HInvoke* invoke) {
272   LocationSummary* locations =
273       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
274   locations->SetInAt(0, Location::RequiresRegister());
275   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
276 }
277 
278 template <typename EmitOp>
EmitMemoryPeek(HInvoke * invoke,EmitOp && emit_op)279 void EmitMemoryPeek(HInvoke* invoke, EmitOp&& emit_op) {
280   LocationSummary* locations = invoke->GetLocations();
281   emit_op(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
282 }
283 
VisitMemoryPeekByte(HInvoke * invoke)284 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekByte(HInvoke* invoke) {
285   CreateIntToIntNoOverlapLocations(allocator_, invoke);
286 }
287 
VisitMemoryPeekByte(HInvoke * invoke)288 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekByte(HInvoke* invoke) {
289   Riscv64Assembler* assembler = GetAssembler();
290   EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lb(rd, rs1, 0); });
291 }
292 
VisitMemoryPeekIntNative(HInvoke * invoke)293 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) {
294   CreateIntToIntNoOverlapLocations(allocator_, invoke);
295 }
296 
VisitMemoryPeekIntNative(HInvoke * invoke)297 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) {
298   Riscv64Assembler* assembler = GetAssembler();
299   EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lw(rd, rs1, 0); });
300 }
301 
VisitMemoryPeekLongNative(HInvoke * invoke)302 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) {
303   CreateIntToIntNoOverlapLocations(allocator_, invoke);
304 }
305 
VisitMemoryPeekLongNative(HInvoke * invoke)306 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) {
307   Riscv64Assembler* assembler = GetAssembler();
308   EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Ld(rd, rs1, 0); });
309 }
310 
VisitMemoryPeekShortNative(HInvoke * invoke)311 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) {
312   CreateIntToIntNoOverlapLocations(allocator_, invoke);
313 }
314 
VisitMemoryPeekShortNative(HInvoke * invoke)315 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) {
316   Riscv64Assembler* assembler = GetAssembler();
317   EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lh(rd, rs1, 0); });
318 }
319 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)320 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
321   LocationSummary* locations =
322       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
323   locations->SetInAt(0, Location::RequiresRegister());
324   locations->SetInAt(1, Location::RequiresRegister());
325 }
326 
CreateIntIntToIntSlowPathCallLocations(ArenaAllocator * allocator,HInvoke * invoke)327 static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
328   LocationSummary* locations =
329       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
330   locations->SetInAt(0, Location::RequiresRegister());
331   locations->SetInAt(1, Location::RequiresRegister());
332   // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
333   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
334 }
335 
336 template <typename EmitOp>
EmitMemoryPoke(HInvoke * invoke,EmitOp && emit_op)337 void EmitMemoryPoke(HInvoke* invoke, EmitOp&& emit_op) {
338   LocationSummary* locations = invoke->GetLocations();
339   emit_op(locations->InAt(1).AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
340 }
341 
VisitMemoryPokeByte(HInvoke * invoke)342 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeByte(HInvoke* invoke) {
343   CreateIntIntToVoidLocations(allocator_, invoke);
344 }
345 
VisitMemoryPokeByte(HInvoke * invoke)346 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeByte(HInvoke* invoke) {
347   Riscv64Assembler* assembler = GetAssembler();
348   EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sb(rs2, rs1, 0); });
349 }
350 
VisitMemoryPokeIntNative(HInvoke * invoke)351 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) {
352   CreateIntIntToVoidLocations(allocator_, invoke);
353 }
354 
VisitMemoryPokeIntNative(HInvoke * invoke)355 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) {
356   Riscv64Assembler* assembler = GetAssembler();
357   EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sw(rs2, rs1, 0); });
358 }
359 
VisitMemoryPokeLongNative(HInvoke * invoke)360 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) {
361   CreateIntIntToVoidLocations(allocator_, invoke);
362 }
363 
VisitMemoryPokeLongNative(HInvoke * invoke)364 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) {
365   Riscv64Assembler* assembler = GetAssembler();
366   EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sd(rs2, rs1, 0); });
367 }
368 
VisitMemoryPokeShortNative(HInvoke * invoke)369 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) {
370   CreateIntIntToVoidLocations(allocator_, invoke);
371 }
372 
VisitMemoryPokeShortNative(HInvoke * invoke)373 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) {
374   Riscv64Assembler* assembler = GetAssembler();
375   EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sh(rs2, rs1, 0); });
376 }
377 
GenerateReverseBytes(CodeGeneratorRISCV64 * codegen,Location rd,XRegister rs1,DataType::Type type)378 static void GenerateReverseBytes(CodeGeneratorRISCV64* codegen,
379                                  Location rd,
380                                  XRegister rs1,
381                                  DataType::Type type) {
382   Riscv64Assembler* assembler = codegen->GetAssembler();
383   switch (type) {
384     case DataType::Type::kUint16:
385       // There is no 16-bit reverse bytes instruction.
386       __ Rev8(rd.AsRegister<XRegister>(), rs1);
387       __ Srli(rd.AsRegister<XRegister>(), rd.AsRegister<XRegister>(), 48);
388       break;
389     case DataType::Type::kInt16:
390       // There is no 16-bit reverse bytes instruction.
391       __ Rev8(rd.AsRegister<XRegister>(), rs1);
392       __ Srai(rd.AsRegister<XRegister>(), rd.AsRegister<XRegister>(), 48);
393       break;
394     case DataType::Type::kInt32:
395       // There is no 32-bit reverse bytes instruction.
396       __ Rev8(rd.AsRegister<XRegister>(), rs1);
397       __ Srai(rd.AsRegister<XRegister>(), rd.AsRegister<XRegister>(), 32);
398       break;
399     case DataType::Type::kInt64:
400       __ Rev8(rd.AsRegister<XRegister>(), rs1);
401       break;
402     case DataType::Type::kFloat32:
403       // There is no 32-bit reverse bytes instruction.
404       __ Rev8(rs1, rs1);  // Note: Clobbers `rs1`.
405       __ Srai(rs1, rs1, 32);
406       __ FMvWX(rd.AsFpuRegister<FRegister>(), rs1);
407       break;
408     case DataType::Type::kFloat64:
409       __ Rev8(rs1, rs1);  // Note: Clobbers `rs1`.
410       __ FMvDX(rd.AsFpuRegister<FRegister>(), rs1);
411       break;
412     default:
413       LOG(FATAL) << "Unexpected type: " << type;
414       UNREACHABLE();
415   }
416 }
417 
GenerateReverseBytes(CodeGeneratorRISCV64 * codegen,HInvoke * invoke,DataType::Type type)418 static void GenerateReverseBytes(CodeGeneratorRISCV64* codegen,
419                                  HInvoke* invoke,
420                                  DataType::Type type) {
421   DCHECK_EQ(type, invoke->GetType());
422   LocationSummary* locations = invoke->GetLocations();
423   GenerateReverseBytes(codegen, locations->Out(), locations->InAt(0).AsRegister<XRegister>(), type);
424 }
425 
GenerateReverse(CodeGeneratorRISCV64 * codegen,HInvoke * invoke,DataType::Type type)426 static void GenerateReverse(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) {
427   DCHECK_EQ(type, invoke->GetType());
428   Riscv64Assembler* assembler = codegen->GetAssembler();
429   LocationSummary* locations = invoke->GetLocations();
430   XRegister in = locations->InAt(0).AsRegister<XRegister>();
431   XRegister out = locations->Out().AsRegister<XRegister>();
432   ScratchRegisterScope srs(assembler);
433   XRegister temp1 = srs.AllocateXRegister();
434   XRegister temp2 = srs.AllocateXRegister();
435 
436   auto maybe_extend_mask = [type, assembler](XRegister mask, XRegister temp) {
437     if (type == DataType::Type::kInt64) {
438       __ Slli(temp, mask, 32);
439       __ Add(mask, mask, temp);
440     }
441   };
442 
443   // Swap bits in bit pairs.
444   __ Li(temp1, 0x55555555);
445   maybe_extend_mask(temp1, temp2);
446   __ Srli(temp2, in, 1);
447   __ And(out, in, temp1);
448   __ And(temp2, temp2, temp1);
449   __ Sh1Add(out, out, temp2);
450 
451   // Swap bit pairs in 4-bit groups.
452   __ Li(temp1, 0x33333333);
453   maybe_extend_mask(temp1, temp2);
454   __ Srli(temp2, out, 2);
455   __ And(out, out, temp1);
456   __ And(temp2, temp2, temp1);
457   __ Sh2Add(out, out, temp2);
458 
459   // Swap 4-bit groups in 8-bit groups.
460   __ Li(temp1, 0x0f0f0f0f);
461   maybe_extend_mask(temp1, temp2);
462   __ Srli(temp2, out, 4);
463   __ And(out, out, temp1);
464   __ And(temp2, temp2, temp1);
465   __ Slli(out, out, 4);
466   __ Add(out, out, temp2);
467 
468   GenerateReverseBytes(codegen, Location::RegisterLocation(out), out, type);
469 }
470 
VisitIntegerReverse(HInvoke * invoke)471 void IntrinsicLocationsBuilderRISCV64::VisitIntegerReverse(HInvoke* invoke) {
472   CreateIntToIntNoOverlapLocations(allocator_, invoke);
473 }
474 
VisitIntegerReverse(HInvoke * invoke)475 void IntrinsicCodeGeneratorRISCV64::VisitIntegerReverse(HInvoke* invoke) {
476   GenerateReverse(codegen_, invoke, DataType::Type::kInt32);
477 }
478 
VisitLongReverse(HInvoke * invoke)479 void IntrinsicLocationsBuilderRISCV64::VisitLongReverse(HInvoke* invoke) {
480   CreateIntToIntNoOverlapLocations(allocator_, invoke);
481 }
482 
VisitLongReverse(HInvoke * invoke)483 void IntrinsicCodeGeneratorRISCV64::VisitLongReverse(HInvoke* invoke) {
484   GenerateReverse(codegen_, invoke, DataType::Type::kInt64);
485 }
486 
VisitIntegerReverseBytes(HInvoke * invoke)487 void IntrinsicLocationsBuilderRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) {
488   CreateIntToIntNoOverlapLocations(allocator_, invoke);
489 }
490 
VisitIntegerReverseBytes(HInvoke * invoke)491 void IntrinsicCodeGeneratorRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) {
492   GenerateReverseBytes(codegen_, invoke, DataType::Type::kInt32);
493 }
494 
VisitLongReverseBytes(HInvoke * invoke)495 void IntrinsicLocationsBuilderRISCV64::VisitLongReverseBytes(HInvoke* invoke) {
496   CreateIntToIntNoOverlapLocations(allocator_, invoke);
497 }
498 
VisitLongReverseBytes(HInvoke * invoke)499 void IntrinsicCodeGeneratorRISCV64::VisitLongReverseBytes(HInvoke* invoke) {
500   GenerateReverseBytes(codegen_, invoke, DataType::Type::kInt64);
501 }
502 
VisitShortReverseBytes(HInvoke * invoke)503 void IntrinsicLocationsBuilderRISCV64::VisitShortReverseBytes(HInvoke* invoke) {
504   CreateIntToIntNoOverlapLocations(allocator_, invoke);
505 }
506 
VisitShortReverseBytes(HInvoke * invoke)507 void IntrinsicCodeGeneratorRISCV64::VisitShortReverseBytes(HInvoke* invoke) {
508   GenerateReverseBytes(codegen_, invoke, DataType::Type::kInt16);
509 }
510 
511 template <typename EmitOp>
EmitIntegralUnOp(HInvoke * invoke,EmitOp && emit_op)512 void EmitIntegralUnOp(HInvoke* invoke, EmitOp&& emit_op) {
513   LocationSummary* locations = invoke->GetLocations();
514   emit_op(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
515 }
516 
VisitIntegerBitCount(HInvoke * invoke)517 void IntrinsicLocationsBuilderRISCV64::VisitIntegerBitCount(HInvoke* invoke) {
518   CreateIntToIntNoOverlapLocations(allocator_, invoke);
519 }
520 
VisitIntegerBitCount(HInvoke * invoke)521 void IntrinsicCodeGeneratorRISCV64::VisitIntegerBitCount(HInvoke* invoke) {
522   Riscv64Assembler* assembler = GetAssembler();
523   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpopw(rd, rs1); });
524 }
525 
VisitLongBitCount(HInvoke * invoke)526 void IntrinsicLocationsBuilderRISCV64::VisitLongBitCount(HInvoke* invoke) {
527   CreateIntToIntNoOverlapLocations(allocator_, invoke);
528 }
529 
VisitLongBitCount(HInvoke * invoke)530 void IntrinsicCodeGeneratorRISCV64::VisitLongBitCount(HInvoke* invoke) {
531   Riscv64Assembler* assembler = GetAssembler();
532   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpop(rd, rs1); });
533 }
534 
VisitIntegerHighestOneBit(HInvoke * invoke)535 void IntrinsicLocationsBuilderRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) {
536   CreateIntToIntNoOverlapLocations(allocator_, invoke);
537 }
538 
VisitIntegerHighestOneBit(HInvoke * invoke)539 void IntrinsicCodeGeneratorRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) {
540   Riscv64Assembler* assembler = GetAssembler();
541   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
542     ScratchRegisterScope srs(assembler);
543     XRegister tmp = srs.AllocateXRegister();
544     XRegister tmp2 = srs.AllocateXRegister();
545     __ Clzw(tmp, rs1);
546     __ Li(tmp2, INT64_C(-0x80000000));
547     __ Srlw(tmp2, tmp2, tmp);
548     __ And(rd, rs1, tmp2);  // Make sure the result is zero if the input is zero.
549   });
550 }
551 
VisitLongHighestOneBit(HInvoke * invoke)552 void IntrinsicLocationsBuilderRISCV64::VisitLongHighestOneBit(HInvoke* invoke) {
553   CreateIntToIntNoOverlapLocations(allocator_, invoke);
554 }
555 
VisitLongHighestOneBit(HInvoke * invoke)556 void IntrinsicCodeGeneratorRISCV64::VisitLongHighestOneBit(HInvoke* invoke) {
557   Riscv64Assembler* assembler = GetAssembler();
558   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
559     ScratchRegisterScope srs(assembler);
560     XRegister tmp = srs.AllocateXRegister();
561     XRegister tmp2 = srs.AllocateXRegister();
562     __ Clz(tmp, rs1);
563     __ Li(tmp2, INT64_C(-0x8000000000000000));
564     __ Srl(tmp2, tmp2, tmp);
565     __ And(rd, rs1, tmp2);  // Make sure the result is zero if the input is zero.
566   });
567 }
568 
VisitIntegerLowestOneBit(HInvoke * invoke)569 void IntrinsicLocationsBuilderRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) {
570   CreateIntToIntNoOverlapLocations(allocator_, invoke);
571 }
572 
VisitIntegerLowestOneBit(HInvoke * invoke)573 void IntrinsicCodeGeneratorRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) {
574   Riscv64Assembler* assembler = GetAssembler();
575   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
576     ScratchRegisterScope srs(assembler);
577     XRegister tmp = srs.AllocateXRegister();
578     __ NegW(tmp, rs1);
579     __ And(rd, rs1, tmp);
580   });
581 }
582 
VisitLongLowestOneBit(HInvoke * invoke)583 void IntrinsicLocationsBuilderRISCV64::VisitLongLowestOneBit(HInvoke* invoke) {
584   CreateIntToIntNoOverlapLocations(allocator_, invoke);
585 }
586 
VisitLongLowestOneBit(HInvoke * invoke)587 void IntrinsicCodeGeneratorRISCV64::VisitLongLowestOneBit(HInvoke* invoke) {
588   Riscv64Assembler* assembler = GetAssembler();
589   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
590     ScratchRegisterScope srs(assembler);
591     XRegister tmp = srs.AllocateXRegister();
592     __ Neg(tmp, rs1);
593     __ And(rd, rs1, tmp);
594   });
595 }
596 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)597 void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
598   CreateIntToIntNoOverlapLocations(allocator_, invoke);
599 }
600 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)601 void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
602   Riscv64Assembler* assembler = GetAssembler();
603   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clzw(rd, rs1); });
604 }
605 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)606 void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
607   CreateIntToIntNoOverlapLocations(allocator_, invoke);
608 }
609 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)610 void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
611   Riscv64Assembler* assembler = GetAssembler();
612   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clz(rd, rs1); });
613 }
614 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)615 void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
616   CreateIntToIntNoOverlapLocations(allocator_, invoke);
617 }
618 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)619 void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
620   Riscv64Assembler* assembler = GetAssembler();
621   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctzw(rd, rs1); });
622 }
623 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)624 void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
625   CreateIntToIntNoOverlapLocations(allocator_, invoke);
626 }
627 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)628 void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
629   Riscv64Assembler* assembler = GetAssembler();
630   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctz(rd, rs1); });
631 }
632 
GenerateDivRemUnsigned(HInvoke * invoke,bool is_div,CodeGeneratorRISCV64 * codegen)633 static void GenerateDivRemUnsigned(HInvoke* invoke, bool is_div, CodeGeneratorRISCV64* codegen) {
634   LocationSummary* locations = invoke->GetLocations();
635   Riscv64Assembler* assembler = codegen->GetAssembler();
636   DataType::Type type = invoke->GetType();
637   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
638 
639   XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
640   XRegister divisor = locations->InAt(1).AsRegister<XRegister>();
641   XRegister out = locations->Out().AsRegister<XRegister>();
642 
643   // Check if divisor is zero, bail to managed implementation to handle.
644   SlowPathCodeRISCV64* slow_path =
645       new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
646   codegen->AddSlowPath(slow_path);
647   __ Beqz(divisor, slow_path->GetEntryLabel());
648 
649   if (is_div) {
650     if (type == DataType::Type::kInt32) {
651       __ Divuw(out, dividend, divisor);
652     } else {
653       __ Divu(out, dividend, divisor);
654     }
655   } else {
656     if (type == DataType::Type::kInt32) {
657       __ Remuw(out, dividend, divisor);
658     } else {
659       __ Remu(out, dividend, divisor);
660     }
661   }
662 
663   __ Bind(slow_path->GetExitLabel());
664 }
665 
VisitIntegerDivideUnsigned(HInvoke * invoke)666 void IntrinsicLocationsBuilderRISCV64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
667   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
668 }
669 
VisitIntegerDivideUnsigned(HInvoke * invoke)670 void IntrinsicCodeGeneratorRISCV64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
671   GenerateDivRemUnsigned(invoke, /*is_div=*/true, codegen_);
672 }
673 
VisitLongDivideUnsigned(HInvoke * invoke)674 void IntrinsicLocationsBuilderRISCV64::VisitLongDivideUnsigned(HInvoke* invoke) {
675   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
676 }
677 
VisitLongDivideUnsigned(HInvoke * invoke)678 void IntrinsicCodeGeneratorRISCV64::VisitLongDivideUnsigned(HInvoke* invoke) {
679   GenerateDivRemUnsigned(invoke, /*is_div=*/true, codegen_);
680 }
681 
VisitIntegerRemainderUnsigned(HInvoke * invoke)682 void IntrinsicLocationsBuilderRISCV64::VisitIntegerRemainderUnsigned(HInvoke* invoke) {
683   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
684 }
685 
VisitIntegerRemainderUnsigned(HInvoke * invoke)686 void IntrinsicCodeGeneratorRISCV64::VisitIntegerRemainderUnsigned(HInvoke* invoke) {
687   GenerateDivRemUnsigned(invoke, /*is_div=*/false, codegen_);
688 }
689 
VisitLongRemainderUnsigned(HInvoke * invoke)690 void IntrinsicLocationsBuilderRISCV64::VisitLongRemainderUnsigned(HInvoke* invoke) {
691   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
692 }
693 
VisitLongRemainderUnsigned(HInvoke * invoke)694 void IntrinsicCodeGeneratorRISCV64::VisitLongRemainderUnsigned(HInvoke* invoke) {
695   GenerateDivRemUnsigned(invoke, /*is_div=*/false, codegen_);
696 }
697 
698 #define VISIT_INTRINSIC(name, low, high, type, start_index)                              \
699   void IntrinsicLocationsBuilderRISCV64::Visit##name##ValueOf(HInvoke* invoke) {         \
700     InvokeRuntimeCallingConvention calling_convention;                                   \
701     IntrinsicVisitor::ComputeValueOfLocations(                                           \
702         invoke,                                                                          \
703         codegen_,                                                                        \
704         low,                                                                             \
705         (high) - (low) + 1,                                                              \
706         calling_convention.GetReturnLocation(DataType::Type::kReference),                \
707         Location::RegisterLocation(calling_convention.GetRegisterAt(0)));                \
708   }                                                                                      \
709   void IntrinsicCodeGeneratorRISCV64::Visit##name##ValueOf(HInvoke* invoke) {            \
710     IntrinsicVisitor::ValueOfInfo info =                                                 \
711         IntrinsicVisitor::ComputeValueOfInfo(invoke,                                     \
712                                              codegen_->GetCompilerOptions(),             \
713                                              WellKnownClasses::java_lang_##name##_value, \
714                                              low,                                        \
715                                              (high) - (low) + 1,                         \
716                                              start_index);                               \
717     HandleValueOf(invoke, info, type);                                                   \
718   }
BOXED_TYPES(VISIT_INTRINSIC)719   BOXED_TYPES(VISIT_INTRINSIC)
720 #undef VISIT_INTRINSIC
721 
722 void IntrinsicCodeGeneratorRISCV64::HandleValueOf(HInvoke* invoke,
723                                                   const IntrinsicVisitor::ValueOfInfo& info,
724                                                   DataType::Type type) {
725   Riscv64Assembler* assembler = codegen_->GetAssembler();
726   LocationSummary* locations = invoke->GetLocations();
727   XRegister out = locations->Out().AsRegister<XRegister>();
728   ScratchRegisterScope srs(assembler);
729   XRegister temp = srs.AllocateXRegister();
730   auto allocate_instance = [&]() {
731     DCHECK_EQ(out, InvokeRuntimeCallingConvention().GetRegisterAt(0));
732     codegen_->LoadIntrinsicDeclaringClass(out, invoke);
733     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke);
734     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
735   };
736   if (invoke->InputAt(0)->IsIntConstant()) {
737     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
738     if (static_cast<uint32_t>(value - info.low) < info.length) {
739       // Just embed the object in the code.
740       DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
741       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
742     } else {
743       DCHECK(locations->CanCall());
744       // Allocate and initialize a new object.
745       // TODO: If we JIT, we could allocate the object now, and store it in the
746       // JIT object table.
747       allocate_instance();
748       __ Li(temp, value);
749       codegen_->GetInstructionVisitor()->Store(
750           Location::RegisterLocation(temp), out, info.value_offset, type);
751       // Class pointer and `value` final field stores require a barrier before publication.
752       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
753     }
754   } else {
755     DCHECK(locations->CanCall());
756     XRegister in = locations->InAt(0).AsRegister<XRegister>();
757     Riscv64Label allocate, done;
758     // Check bounds of our cache.
759     __ AddConst32(out, in, -info.low);
760     __ Li(temp, info.length);
761     __ Bgeu(out, temp, &allocate);
762     // If the value is within the bounds, load the object directly from the array.
763     codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
764     __ Sh2Add(temp, out, temp);
765     __ Loadwu(out, temp, 0);
766     codegen_->MaybeUnpoisonHeapReference(out);
767     __ J(&done);
768     __ Bind(&allocate);
769     // Otherwise allocate and initialize a new object.
770     allocate_instance();
771     codegen_->GetInstructionVisitor()->Store(
772         Location::RegisterLocation(in), out, info.value_offset, type);
773     // Class pointer and `value` final field stores require a barrier before publication.
774     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
775     __ Bind(&done);
776   }
777 }
778 
VisitReferenceGetReferent(HInvoke * invoke)779 void IntrinsicLocationsBuilderRISCV64::VisitReferenceGetReferent(HInvoke* invoke) {
780   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
781 
782   if (codegen_->EmitBakerReadBarrier() && invoke->GetLocations() != nullptr) {
783     invoke->GetLocations()->AddTemp(Location::RequiresRegister());
784   }
785 }
786 
VisitReferenceGetReferent(HInvoke * invoke)787 void IntrinsicCodeGeneratorRISCV64::VisitReferenceGetReferent(HInvoke* invoke) {
788   Riscv64Assembler* assembler = GetAssembler();
789   LocationSummary* locations = invoke->GetLocations();
790   Location obj = locations->InAt(0);
791   Location out = locations->Out();
792 
793   SlowPathCodeRISCV64* slow_path =
794       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
795   codegen_->AddSlowPath(slow_path);
796 
797   if (codegen_->EmitReadBarrier()) {
798     // Check self->GetWeakRefAccessEnabled().
799     ScratchRegisterScope srs(assembler);
800     XRegister temp = srs.AllocateXRegister();
801     __ Loadwu(temp, TR, Thread::WeakRefAccessEnabledOffset<kRiscv64PointerSize>().Int32Value());
802     static_assert(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled) == 0);
803     __ Bnez(temp, slow_path->GetEntryLabel());
804   }
805 
806   {
807     // Load the java.lang.ref.Reference class.
808     ScratchRegisterScope srs(assembler);
809     XRegister temp = srs.AllocateXRegister();
810     codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
811 
812     // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
813     MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
814     DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
815     DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
816               IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
817     __ Loadhu(temp, temp, disable_intrinsic_offset.Int32Value());
818     __ Bnez(temp, slow_path->GetEntryLabel());
819   }
820 
821   // Load the value from the field.
822   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
823   if (codegen_->EmitBakerReadBarrier()) {
824     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
825                                                     out,
826                                                     obj.AsRegister<XRegister>(),
827                                                     referent_offset,
828                                                     /*temp=*/locations->GetTemp(0),
829                                                     /*needs_null_check=*/false);
830   } else {
831     codegen_->GetInstructionVisitor()->Load(
832         out, obj.AsRegister<XRegister>(), referent_offset, DataType::Type::kReference);
833     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
834   }
835   // Emit memory barrier for load-acquire.
836   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
837   __ Bind(slow_path->GetExitLabel());
838 }
839 
VisitReferenceRefersTo(HInvoke * invoke)840 void IntrinsicLocationsBuilderRISCV64::VisitReferenceRefersTo(HInvoke* invoke) {
841   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
842 }
843 
VisitReferenceRefersTo(HInvoke * invoke)844 void IntrinsicCodeGeneratorRISCV64::VisitReferenceRefersTo(HInvoke* invoke) {
845   Riscv64Assembler* assembler = GetAssembler();
846   LocationSummary* locations = invoke->GetLocations();
847   XRegister obj = locations->InAt(0).AsRegister<XRegister>();
848   XRegister other = locations->InAt(1).AsRegister<XRegister>();
849   XRegister out = locations->Out().AsRegister<XRegister>();
850 
851   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
852   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
853 
854   codegen_->GetInstructionVisitor()->Load(
855       Location::RegisterLocation(out), obj, referent_offset, DataType::Type::kReference);
856   codegen_->MaybeRecordImplicitNullCheck(invoke);
857   codegen_->MaybeUnpoisonHeapReference(out);
858 
859   // Emit memory barrier for load-acquire.
860   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
861 
862   if (codegen_->EmitReadBarrier()) {
863     DCHECK(kUseBakerReadBarrier);
864 
865     Riscv64Label calculate_result;
866 
867     // If equal to `other`, the loaded reference is final (it cannot be a from-space reference).
868     __ Beq(out, other, &calculate_result);
869 
870     // If the GC is not marking, the loaded reference is final.
871     ScratchRegisterScope srs(assembler);
872     XRegister tmp = srs.AllocateXRegister();
873     __ Loadwu(tmp, TR, Thread::IsGcMarkingOffset<kRiscv64PointerSize>().Int32Value());
874     __ Beqz(tmp, &calculate_result);
875 
876     // Check if the loaded reference is null.
877     __ Beqz(out, &calculate_result);
878 
879     // For correct memory visibility, we need a barrier before loading the lock word to
880     // synchronize with the publishing of `other` by the CC GC. However, as long as the
881     // load-acquire above is implemented as a plain load followed by a barrier (rather
882     // than an atomic load-acquire instruction which synchronizes only with other
883     // instructions on the same memory location), that barrier is sufficient.
884 
885     // Load the lockword and check if it is a forwarding address.
886     static_assert(LockWord::kStateShift == 30u);
887     static_assert(LockWord::kStateForwardingAddress == 3u);
888     // Load the lock word sign-extended. Comparing it to the sign-extended forwarding
889     // address bits as unsigned is the same as comparing both zero-extended.
890     __ Loadw(tmp, out, monitor_offset);
891     // Materialize sign-extended forwarding address bits. This is a single LUI instruction.
892     XRegister tmp2 = srs.AllocateXRegister();
893     __ Li(tmp2, INT64_C(-1) & ~static_cast<int64_t>((1 << LockWord::kStateShift) - 1));
894     // If we do not have a forwarding address, the loaded reference cannot be the same as `other`,
895     // so we proceed to calculate the result with `out != other`.
896     __ Bltu(tmp, tmp2, &calculate_result);
897 
898     // Extract the forwarding address for comparison with `other`.
899     // Note that the high 32 bits shall not be used for the result calculation.
900     __ Slliw(out, tmp, LockWord::kForwardingAddressShift);
901 
902     __ Bind(&calculate_result);
903   }
904 
905   // Calculate the result `out == other`.
906   __ Subw(out, out, other);
907   __ Seqz(out, out);
908 }
909 
GenerateVisitStringIndexOf(HInvoke * invoke,Riscv64Assembler * assembler,CodeGeneratorRISCV64 * codegen,bool start_at_zero)910 static void GenerateVisitStringIndexOf(HInvoke* invoke,
911                                        Riscv64Assembler* assembler,
912                                        CodeGeneratorRISCV64* codegen,
913                                        bool start_at_zero) {
914   LocationSummary* locations = invoke->GetLocations();
915 
916   // Note that the null check must have been done earlier.
917   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
918 
919   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
920   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
921   SlowPathCodeRISCV64* slow_path = nullptr;
922   HInstruction* code_point = invoke->InputAt(1);
923   if (code_point->IsIntConstant()) {
924     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
925       // Always needs the slow-path. We could directly dispatch to it, but this case should be
926       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
927       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
928       codegen->AddSlowPath(slow_path);
929       __ J(slow_path->GetEntryLabel());
930       __ Bind(slow_path->GetExitLabel());
931       return;
932     }
933   } else if (code_point->GetType() != DataType::Type::kUint16) {
934     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
935     codegen->AddSlowPath(slow_path);
936     ScratchRegisterScope srs(assembler);
937     XRegister tmp = srs.AllocateXRegister();
938     __ Srliw(tmp, locations->InAt(1).AsRegister<XRegister>(), 16);
939     __ Bnez(tmp, slow_path->GetEntryLabel());
940   }
941 
942   if (start_at_zero) {
943     // Start-index = 0.
944     XRegister tmp_reg = locations->GetTemp(0).AsRegister<XRegister>();
945     __ Li(tmp_reg, 0);
946   }
947 
948   codegen->InvokeRuntime(kQuickIndexOf, invoke, slow_path);
949   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
950 
951   if (slow_path != nullptr) {
952     __ Bind(slow_path->GetExitLabel());
953   }
954 }
955 
VisitStringIndexOf(HInvoke * invoke)956 void IntrinsicLocationsBuilderRISCV64::VisitStringIndexOf(HInvoke* invoke) {
957   LocationSummary* locations = new (allocator_) LocationSummary(
958       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
959   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
960   // best to align the inputs accordingly.
961   InvokeRuntimeCallingConvention calling_convention;
962   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
963   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
964   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
965 
966   // Need to send start_index=0.
967   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
968 }
969 
VisitStringIndexOf(HInvoke * invoke)970 void IntrinsicCodeGeneratorRISCV64::VisitStringIndexOf(HInvoke* invoke) {
971   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
972 }
973 
VisitStringIndexOfAfter(HInvoke * invoke)974 void IntrinsicLocationsBuilderRISCV64::VisitStringIndexOfAfter(HInvoke* invoke) {
975   LocationSummary* locations = new (allocator_) LocationSummary(
976       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
977   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
978   // best to align the inputs accordingly.
979   InvokeRuntimeCallingConvention calling_convention;
980   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
981   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
982   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
983   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
984 }
985 
VisitStringIndexOfAfter(HInvoke * invoke)986 void IntrinsicCodeGeneratorRISCV64::VisitStringIndexOfAfter(HInvoke* invoke) {
987   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
988 }
989 
VisitStringNewStringFromBytes(HInvoke * invoke)990 void IntrinsicLocationsBuilderRISCV64::VisitStringNewStringFromBytes(HInvoke* invoke) {
991   LocationSummary* locations = new (allocator_) LocationSummary(
992       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
993   InvokeRuntimeCallingConvention calling_convention;
994   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
995   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
996   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
997   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
998   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
999 }
1000 
VisitStringNewStringFromBytes(HInvoke * invoke)1001 void IntrinsicCodeGeneratorRISCV64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1002   Riscv64Assembler* assembler = GetAssembler();
1003   LocationSummary* locations = invoke->GetLocations();
1004   XRegister byte_array = locations->InAt(0).AsRegister<XRegister>();
1005 
1006   SlowPathCodeRISCV64* slow_path =
1007       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
1008   codegen_->AddSlowPath(slow_path);
1009   __ Beqz(byte_array, slow_path->GetEntryLabel());
1010 
1011   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, slow_path);
1012   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1013   __ Bind(slow_path->GetExitLabel());
1014 }
1015 
VisitStringNewStringFromChars(HInvoke * invoke)1016 void IntrinsicLocationsBuilderRISCV64::VisitStringNewStringFromChars(HInvoke* invoke) {
1017   LocationSummary* locations =
1018       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1019   InvokeRuntimeCallingConvention calling_convention;
1020   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1021   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1022   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1023   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1024 }
1025 
VisitStringNewStringFromChars(HInvoke * invoke)1026 void IntrinsicCodeGeneratorRISCV64::VisitStringNewStringFromChars(HInvoke* invoke) {
1027   // No need to emit code checking whether `locations->InAt(2)` is a null
1028   // pointer, as callers of the native method
1029   //
1030   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1031   //
1032   // all include a null check on `data` before calling that method.
1033   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke);
1034   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1035 }
1036 
VisitStringNewStringFromString(HInvoke * invoke)1037 void IntrinsicLocationsBuilderRISCV64::VisitStringNewStringFromString(HInvoke* invoke) {
1038   LocationSummary* locations = new (allocator_) LocationSummary(
1039       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1040   InvokeRuntimeCallingConvention calling_convention;
1041   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1042   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1043 }
1044 
VisitStringNewStringFromString(HInvoke * invoke)1045 void IntrinsicCodeGeneratorRISCV64::VisitStringNewStringFromString(HInvoke* invoke) {
1046   Riscv64Assembler* assembler = GetAssembler();
1047   LocationSummary* locations = invoke->GetLocations();
1048   XRegister string_to_copy = locations->InAt(0).AsRegister<XRegister>();
1049 
1050   SlowPathCodeRISCV64* slow_path =
1051       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
1052   codegen_->AddSlowPath(slow_path);
1053   __ Beqz(string_to_copy, slow_path->GetEntryLabel());
1054 
1055   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, slow_path);
1056   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1057   __ Bind(slow_path->GetExitLabel());
1058 }
1059 
GenerateSet(CodeGeneratorRISCV64 * codegen,std::memory_order order,Location value,XRegister rs1,int32_t offset,DataType::Type type)1060 static void GenerateSet(CodeGeneratorRISCV64* codegen,
1061                         std::memory_order order,
1062                         Location value,
1063                         XRegister rs1,
1064                         int32_t offset,
1065                         DataType::Type type) {
1066   if (order == std::memory_order_seq_cst) {
1067     codegen->GetInstructionVisitor()->StoreSeqCst(value, rs1, offset, type);
1068   } else {
1069     if (order == std::memory_order_release) {
1070       codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
1071     } else {
1072       DCHECK(order == std::memory_order_relaxed);
1073     }
1074     codegen->GetInstructionVisitor()->Store(value, rs1, offset, type);
1075   }
1076 }
1077 
GetLrScAqRl(std::memory_order order)1078 std::pair<AqRl, AqRl> GetLrScAqRl(std::memory_order order) {
1079   AqRl load_aqrl = AqRl::kNone;
1080   AqRl store_aqrl = AqRl::kNone;
1081   if (order == std::memory_order_acquire) {
1082     load_aqrl = AqRl::kAcquire;
1083   } else if (order == std::memory_order_release) {
1084     store_aqrl = AqRl::kRelease;
1085   } else if (order == std::memory_order_seq_cst) {
1086     load_aqrl = AqRl::kAqRl;
1087     store_aqrl = AqRl::kRelease;
1088   } else {
1089     DCHECK(order == std::memory_order_relaxed);
1090   }
1091   return {load_aqrl, store_aqrl};
1092 }
1093 
GetAmoAqRl(std::memory_order order)1094 AqRl GetAmoAqRl(std::memory_order order) {
1095   AqRl amo_aqrl = AqRl::kNone;
1096   if (order == std::memory_order_acquire) {
1097     amo_aqrl = AqRl::kAcquire;
1098   } else if (order == std::memory_order_release) {
1099     amo_aqrl = AqRl::kRelease;
1100   } else {
1101     DCHECK(order == std::memory_order_seq_cst);
1102     amo_aqrl = AqRl::kAqRl;
1103   }
1104   return amo_aqrl;
1105 }
1106 
EmitLoadReserved(Riscv64Assembler * assembler,DataType::Type type,XRegister ptr,XRegister old_value,AqRl aqrl)1107 static void EmitLoadReserved(Riscv64Assembler* assembler,
1108                              DataType::Type type,
1109                              XRegister ptr,
1110                              XRegister old_value,
1111                              AqRl aqrl) {
1112   switch (type) {
1113     case DataType::Type::kInt32:
1114       __ LrW(old_value, ptr, aqrl);
1115       break;
1116     case DataType::Type::kReference:
1117       __ LrW(old_value, ptr, aqrl);
1118       // TODO(riscv64): The `ZextW()` macro currently emits `SLLI+SRLI` which are from the
1119       // base "I" instruction set. When the assembler is updated to use a single-instruction
1120       // `ZextW()` macro, either the ADD.UW, or the C.ZEXT.W (16-bit encoding), we need to
1121       // rewrite this to avoid these non-"I" instructions. We could, for example, sign-extend
1122       // the reference and do the CAS as `Int32`.
1123       __ ZextW(old_value, old_value);
1124       break;
1125     case DataType::Type::kInt64:
1126       __ LrD(old_value, ptr, aqrl);
1127       break;
1128     default:
1129       LOG(FATAL) << "Unexpected type: " << type;
1130       UNREACHABLE();
1131   }
1132 }
1133 
VisitStringEquals(HInvoke * invoke)1134 void IntrinsicLocationsBuilderRISCV64::VisitStringEquals(HInvoke* invoke) {
1135   LocationSummary* locations =
1136       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1137   locations->SetInAt(0, Location::RequiresRegister());
1138   locations->SetInAt(1, Location::RequiresRegister());
1139   locations->AddTemp(Location::RequiresRegister());
1140   // TODO: If the String.equals() is used only for an immediately following HIf, we can
1141   // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1142   // Then we shall need an extra temporary register instead of the output register.
1143   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1144 }
1145 
VisitStringEquals(HInvoke * invoke)1146 void IntrinsicCodeGeneratorRISCV64::VisitStringEquals(HInvoke* invoke) {
1147   Riscv64Assembler* assembler = GetAssembler();
1148   LocationSummary* locations = invoke->GetLocations();
1149 
1150   // Get offsets of count, value, and class fields within a string object.
1151   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1152   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1153   const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1154 
1155   XRegister str = locations->InAt(0).AsRegister<XRegister>();
1156   XRegister arg = locations->InAt(1).AsRegister<XRegister>();
1157   XRegister out = locations->Out().AsRegister<XRegister>();
1158 
1159   ScratchRegisterScope srs(assembler);
1160   XRegister temp = srs.AllocateXRegister();
1161   XRegister temp1 = locations->GetTemp(0).AsRegister<XRegister>();
1162 
1163   Riscv64Label loop;
1164   Riscv64Label end;
1165   Riscv64Label return_true;
1166   Riscv64Label return_false;
1167 
1168   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1169 
1170   StringEqualsOptimizations optimizations(invoke);
1171   if (!optimizations.GetArgumentNotNull()) {
1172     // Check if input is null, return false if it is.
1173     __ Beqz(arg, &return_false);
1174   }
1175 
1176   // Reference equality check, return true if same reference.
1177   __ Beq(str, arg, &return_true);
1178 
1179   if (!optimizations.GetArgumentIsString()) {
1180     // Instanceof check for the argument by comparing class fields.
1181     // All string objects must have the same type since String cannot be subclassed.
1182     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1183     // If the argument is a string object, its class field must be equal to receiver's class field.
1184     //
1185     // As the String class is expected to be non-movable, we can read the class
1186     // field from String.equals' arguments without read barriers.
1187     AssertNonMovableStringClass();
1188     // /* HeapReference<Class> */ temp = str->klass_
1189     __ Loadwu(temp, str, class_offset);
1190     // /* HeapReference<Class> */ temp1 = arg->klass_
1191     __ Loadwu(temp1, arg, class_offset);
1192     // Also, because we use the previously loaded class references only in the
1193     // following comparison, we don't need to unpoison them.
1194     __ Bne(temp, temp1, &return_false);
1195   }
1196 
1197   // Load `count` fields of this and argument strings.
1198   __ Loadwu(temp, str, count_offset);
1199   __ Loadwu(temp1, arg, count_offset);
1200   // Check if `count` fields are equal, return false if they're not.
1201   // Also compares the compression style, if differs return false.
1202   __ Bne(temp, temp1, &return_false);
1203 
1204   // Assertions that must hold in order to compare strings 8 bytes at a time.
1205   // Ok to do this because strings are zero-padded to kObjectAlignment.
1206   DCHECK_ALIGNED(value_offset, 8);
1207   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1208 
1209   // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1210   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1211                 "Expecting 0=compressed, 1=uncompressed");
1212   __ Beqz(temp, &return_true);
1213 
1214   if (mirror::kUseStringCompression) {
1215     // For string compression, calculate the number of bytes to compare (not chars).
1216     // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1217     __ Andi(temp1, temp, 1);     // Extract compression flag.
1218     __ Srliw(temp, temp, 1u);    // Extract length.
1219     __ Sllw(temp, temp, temp1);  // Calculate number of bytes to compare.
1220   }
1221 
1222   // Store offset of string value in preparation for comparison loop
1223   __ Li(temp1, value_offset);
1224 
1225   XRegister temp2 = srs.AllocateXRegister();
1226   // Loop to compare strings 8 bytes at a time starting at the front of the string.
1227   __ Bind(&loop);
1228   __ Add(out, str, temp1);
1229   __ Ld(out, out, 0);
1230   __ Add(temp2, arg, temp1);
1231   __ Ld(temp2, temp2, 0);
1232   __ Addi(temp1, temp1, sizeof(uint64_t));
1233   __ Bne(out, temp2, &return_false);
1234   // With string compression, we have compared 8 bytes, otherwise 4 chars.
1235   __ Addi(temp, temp, mirror::kUseStringCompression ? -8 : -4);
1236   __ Bgt(temp, Zero, &loop);
1237 
1238   // Return true and exit the function.
1239   // If loop does not result in returning false, we return true.
1240   __ Bind(&return_true);
1241   __ Li(out, 1);
1242   __ J(&end);
1243 
1244   // Return false and exit the function.
1245   __ Bind(&return_false);
1246   __ Li(out, 0);
1247   __ Bind(&end);
1248 }
1249 
EmitStoreConditional(Riscv64Assembler * assembler,DataType::Type type,XRegister ptr,XRegister store_result,XRegister to_store,AqRl aqrl)1250 static void EmitStoreConditional(Riscv64Assembler* assembler,
1251                                  DataType::Type type,
1252                                  XRegister ptr,
1253                                  XRegister store_result,
1254                                  XRegister to_store,
1255                                  AqRl aqrl) {
1256   switch (type) {
1257     case DataType::Type::kInt32:
1258     case DataType::Type::kReference:
1259       __ ScW(store_result, to_store, ptr, aqrl);
1260       break;
1261     case DataType::Type::kInt64:
1262       __ ScD(store_result, to_store, ptr, aqrl);
1263       break;
1264     default:
1265       LOG(FATAL) << "Unexpected type: " << type;
1266       UNREACHABLE();
1267   }
1268 }
1269 
GenerateCompareAndSet(Riscv64Assembler * assembler,DataType::Type type,std::memory_order order,bool strong,Riscv64Label * cmp_failure,XRegister ptr,XRegister new_value,XRegister old_value,XRegister mask,XRegister masked,XRegister store_result,XRegister expected,XRegister expected2=kNoXRegister)1270 static void GenerateCompareAndSet(Riscv64Assembler* assembler,
1271                                   DataType::Type type,
1272                                   std::memory_order order,
1273                                   bool strong,
1274                                   Riscv64Label* cmp_failure,
1275                                   XRegister ptr,
1276                                   XRegister new_value,
1277                                   XRegister old_value,
1278                                   XRegister mask,
1279                                   XRegister masked,
1280                                   XRegister store_result,
1281                                   XRegister expected,
1282                                   XRegister expected2 = kNoXRegister) {
1283   DCHECK(!DataType::IsFloatingPointType(type));
1284   DCHECK_GE(DataType::Size(type), 4u);
1285 
1286   // The `expected2` is valid only for reference slow path and represents the unmarked old value
1287   // from the main path attempt to emit CAS when the marked old value matched `expected`.
1288   DCHECK_IMPLIES(expected2 != kNoXRegister, type == DataType::Type::kReference);
1289 
1290   auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
1291 
1292   // repeat: {
1293   //   old_value = [ptr];  // Load exclusive.
1294   //   cmp_value = old_value & mask;  // Extract relevant bits if applicable.
1295   //   if (cmp_value != expected && cmp_value != expected2) goto cmp_failure;
1296   //   store_result = failed([ptr] <- new_value);  // Store exclusive.
1297   // }
1298   // if (strong) {
1299   //   if (store_result) goto repeat;  // Repeat until compare fails or store exclusive succeeds.
1300   // } else {
1301   //   store_result = store_result ^ 1;  // Report success as 1, failure as 0.
1302   // }
1303   //
1304   // (If `mask` is not valid, `expected` is compared with `old_value` instead of `cmp_value`.)
1305   // (If `expected2` is not valid, the `cmp_value == expected2` part is not emitted.)
1306 
1307   // Note: We're using "bare" local branches to enforce that they shall not be expanded
1308   // and the scrach register `TMP` shall not be clobbered if taken. Taking the branch to
1309   // `cmp_failure` can theoretically clobber `TMP` (if outside the 1 MiB range).
1310   Riscv64Label loop;
1311   if (strong) {
1312     __ Bind(&loop);
1313   }
1314   EmitLoadReserved(assembler, type, ptr, old_value, load_aqrl);
1315   XRegister to_store = new_value;
1316   {
1317     ScopedLrScExtensionsRestriction slser(assembler);
1318     if (mask != kNoXRegister) {
1319       DCHECK_EQ(expected2, kNoXRegister);
1320       DCHECK_NE(masked, kNoXRegister);
1321       __ And(masked, old_value, mask);
1322       __ Bne(masked, expected, cmp_failure);
1323       // The `old_value` does not need to be preserved as the caller shall use `masked`
1324       // to return the old value if needed.
1325       to_store = old_value;
1326       // TODO(riscv64): We could XOR the old and new value before the loop and use a single XOR here
1327       // instead of the XOR+OR. (The `new_value` is either Zero or a temporary we can clobber.)
1328       __ Xor(to_store, old_value, masked);
1329       __ Or(to_store, to_store, new_value);
1330     } else if (expected2 != kNoXRegister) {
1331       Riscv64Label match2;
1332       __ Beq(old_value, expected2, &match2, /*is_bare=*/ true);
1333       __ Bne(old_value, expected, cmp_failure);
1334       __ Bind(&match2);
1335     } else {
1336       __ Bne(old_value, expected, cmp_failure);
1337     }
1338   }
1339   EmitStoreConditional(assembler, type, ptr, store_result, to_store, store_aqrl);
1340   if (strong) {
1341     __ Bnez(store_result, &loop, /*is_bare=*/ true);
1342   } else {
1343     // Flip the `store_result` register to indicate success by 1 and failure by 0.
1344     __ Xori(store_result, store_result, 1);
1345   }
1346 }
1347 
1348 class ReadBarrierCasSlowPathRISCV64 : public SlowPathCodeRISCV64 {
1349  public:
ReadBarrierCasSlowPathRISCV64(HInvoke * invoke,std::memory_order order,bool strong,XRegister base,XRegister offset,XRegister expected,XRegister new_value,XRegister old_value,XRegister old_value_temp,XRegister store_result,bool update_old_value,CodeGeneratorRISCV64 * riscv64_codegen)1350   ReadBarrierCasSlowPathRISCV64(HInvoke* invoke,
1351                                 std::memory_order order,
1352                                 bool strong,
1353                                 XRegister base,
1354                                 XRegister offset,
1355                                 XRegister expected,
1356                                 XRegister new_value,
1357                                 XRegister old_value,
1358                                 XRegister old_value_temp,
1359                                 XRegister store_result,
1360                                 bool update_old_value,
1361                                 CodeGeneratorRISCV64* riscv64_codegen)
1362       : SlowPathCodeRISCV64(invoke),
1363         order_(order),
1364         strong_(strong),
1365         base_(base),
1366         offset_(offset),
1367         expected_(expected),
1368         new_value_(new_value),
1369         old_value_(old_value),
1370         old_value_temp_(old_value_temp),
1371         store_result_(store_result),
1372         update_old_value_(update_old_value),
1373         mark_old_value_slow_path_(nullptr),
1374         update_old_value_slow_path_(nullptr) {
1375     // We need to add slow paths now, it is too late when emitting slow path code.
1376     Location old_value_loc = Location::RegisterLocation(old_value);
1377     Location old_value_temp_loc = Location::RegisterLocation(old_value_temp);
1378     if (kUseBakerReadBarrier) {
1379       mark_old_value_slow_path_ = riscv64_codegen->AddGcRootBakerBarrierBarrierSlowPath(
1380           invoke, old_value_temp_loc, kBakerReadBarrierTemp);
1381       if (update_old_value_) {
1382         update_old_value_slow_path_ = riscv64_codegen->AddGcRootBakerBarrierBarrierSlowPath(
1383             invoke, old_value_loc, kBakerReadBarrierTemp);
1384       }
1385     } else {
1386       Location base_loc = Location::RegisterLocation(base);
1387       Location index = Location::RegisterLocation(offset);
1388       mark_old_value_slow_path_ = riscv64_codegen->AddReadBarrierSlowPath(
1389           invoke, old_value_temp_loc, old_value_loc, base_loc, /*offset=*/ 0u, index);
1390       if (update_old_value_) {
1391         update_old_value_slow_path_ = riscv64_codegen->AddReadBarrierSlowPath(
1392             invoke, old_value_loc, old_value_temp_loc, base_loc, /*offset=*/ 0u, index);
1393       }
1394     }
1395   }
1396 
GetDescription() const1397   const char* GetDescription() const override { return "ReadBarrierCasSlowPathRISCV64"; }
1398 
1399   // We return to a different label on success for a strong CAS that does not return old value.
GetSuccessExitLabel()1400   Riscv64Label* GetSuccessExitLabel() {
1401     return &success_exit_label_;
1402   }
1403 
EmitNativeCode(CodeGenerator * codegen)1404   void EmitNativeCode(CodeGenerator* codegen) override {
1405     CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
1406     Riscv64Assembler* assembler = riscv64_codegen->GetAssembler();
1407     __ Bind(GetEntryLabel());
1408 
1409     // Mark the `old_value_` from the main path and compare with `expected_`.
1410     DCHECK(mark_old_value_slow_path_ != nullptr);
1411     if (kUseBakerReadBarrier) {
1412       __ Mv(old_value_temp_, old_value_);
1413       riscv64_codegen->EmitBakerReadBarierMarkingCheck(mark_old_value_slow_path_,
1414                                                        Location::RegisterLocation(old_value_temp_),
1415                                                        kBakerReadBarrierTemp);
1416     } else {
1417       __ J(mark_old_value_slow_path_->GetEntryLabel());
1418       __ Bind(mark_old_value_slow_path_->GetExitLabel());
1419     }
1420     Riscv64Label move_marked_old_value;
1421     __ Bne(old_value_temp_, expected_, update_old_value_ ? &move_marked_old_value : GetExitLabel());
1422 
1423     // The `old_value` we have read did not match `expected` (which is always a to-space
1424     // reference) but after the read barrier the marked to-space value matched, so the
1425     // `old_value` must be a from-space reference to the same object. Do the same CAS loop
1426     // as the main path but check for both `expected` and the unmarked old value
1427     // representing the to-space and from-space references for the same object.
1428 
1429     ScratchRegisterScope srs(assembler);
1430     XRegister tmp_ptr = srs.AllocateXRegister();
1431     XRegister store_result =
1432         store_result_ != kNoXRegister ? store_result_ : srs.AllocateXRegister();
1433 
1434     // Recalculate the `tmp_ptr` from main path potentially clobbered by the read barrier above
1435     // or by an expanded conditional branch (clobbers `TMP` if beyond 1MiB).
1436     __ Add(tmp_ptr, base_, offset_);
1437 
1438     Riscv64Label mark_old_value;
1439     GenerateCompareAndSet(riscv64_codegen->GetAssembler(),
1440                           DataType::Type::kReference,
1441                           order_,
1442                           strong_,
1443                           /*cmp_failure=*/ update_old_value_ ? &mark_old_value : GetExitLabel(),
1444                           tmp_ptr,
1445                           new_value_,
1446                           /*old_value=*/ old_value_temp_,
1447                           /*mask=*/ kNoXRegister,
1448                           /*masked=*/ kNoXRegister,
1449                           store_result,
1450                           expected_,
1451                           /*expected2=*/ old_value_);
1452     if (update_old_value_) {
1453       // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
1454       // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
1455       __ Mv(old_value_, expected_);
1456     }
1457     if (!update_old_value_ && strong_) {
1458       // Load success value to the result register.
1459       // We must jump to the instruction that loads the success value in the main path.
1460       // Note that a SC failure in the CAS loop sets the `store_result` to 1, so the main
1461       // path must not use the `store_result` as an indication of success.
1462       __ J(GetSuccessExitLabel());
1463     } else {
1464       __ J(GetExitLabel());
1465     }
1466 
1467     if (update_old_value_) {
1468       // TODO(riscv64): If we initially saw a from-space reference and then saw
1469       // a different reference, can the latter be also a from-space reference?
1470       // (Shouldn't every reference write store a to-space reference?)
1471       DCHECK(update_old_value_slow_path_ != nullptr);
1472       __ Bind(&mark_old_value);
1473       if (kUseBakerReadBarrier) {
1474         __ Mv(old_value_, old_value_temp_);
1475         riscv64_codegen->EmitBakerReadBarierMarkingCheck(update_old_value_slow_path_,
1476                                                          Location::RegisterLocation(old_value_),
1477                                                          kBakerReadBarrierTemp);
1478       } else {
1479         // Note: We could redirect the `failure` above directly to the entry label and bind
1480         // the exit label in the main path, but the main path would need to access the
1481         // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
1482         __ J(update_old_value_slow_path_->GetEntryLabel());
1483         __ Bind(update_old_value_slow_path_->GetExitLabel());
1484       }
1485       __ J(GetExitLabel());
1486 
1487       __ Bind(&move_marked_old_value);
1488       __ Mv(old_value_, old_value_temp_);
1489       __ J(GetExitLabel());
1490     }
1491   }
1492 
1493  private:
1494   // Use RA as temp. It is clobbered in the slow path anyway.
1495   static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA);
1496 
1497   std::memory_order order_;
1498   bool strong_;
1499   XRegister base_;
1500   XRegister offset_;
1501   XRegister expected_;
1502   XRegister new_value_;
1503   XRegister old_value_;
1504   XRegister old_value_temp_;
1505   XRegister store_result_;
1506   bool update_old_value_;
1507   SlowPathCodeRISCV64* mark_old_value_slow_path_;
1508   SlowPathCodeRISCV64* update_old_value_slow_path_;
1509   Riscv64Label success_exit_label_;
1510 };
1511 
EmitBlt32(Riscv64Assembler * assembler,XRegister rs1,Location rs2,Riscv64Label * label,XRegister temp)1512 static void EmitBlt32(Riscv64Assembler* assembler,
1513                       XRegister rs1,
1514                       Location rs2,
1515                       Riscv64Label* label,
1516                       XRegister temp) {
1517   if (rs2.IsConstant()) {
1518     __ Li(temp, rs2.GetConstant()->AsIntConstant()->GetValue());
1519     __ Blt(rs1, temp, label);
1520   } else {
1521     __ Blt(rs1, rs2.AsRegister<XRegister>(), label);
1522   }
1523 }
1524 
CheckSystemArrayCopyPosition(Riscv64Assembler * assembler,XRegister array,Location pos,Location length,SlowPathCodeRISCV64 * slow_path,XRegister temp1,XRegister temp2,bool length_is_array_length,bool position_sign_checked)1525 static void CheckSystemArrayCopyPosition(Riscv64Assembler* assembler,
1526                                          XRegister array,
1527                                          Location pos,
1528                                          Location length,
1529                                          SlowPathCodeRISCV64* slow_path,
1530                                          XRegister temp1,
1531                                          XRegister temp2,
1532                                          bool length_is_array_length,
1533                                          bool position_sign_checked) {
1534   const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
1535   if (pos.IsConstant()) {
1536     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
1537     DCHECK_GE(pos_const, 0);  // Checked in location builder.
1538     if (pos_const == 0) {
1539       if (!length_is_array_length) {
1540         // Check that length(array) >= length.
1541         __ Loadw(temp1, array, length_offset);
1542         EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2);
1543       }
1544     } else {
1545       // Calculate length(array) - pos.
1546       // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
1547       // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
1548       __ Loadw(temp1, array, length_offset);
1549       __ AddConst32(temp1, temp1, -pos_const);
1550 
1551       // Check that (length(array) - pos) >= length.
1552       EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2);
1553     }
1554   } else if (length_is_array_length) {
1555     // The only way the copy can succeed is if pos is zero.
1556     __ Bnez(pos.AsRegister<XRegister>(), slow_path->GetEntryLabel());
1557   } else {
1558     // Check that pos >= 0.
1559     XRegister pos_reg = pos.AsRegister<XRegister>();
1560     if (!position_sign_checked) {
1561       __ Bltz(pos_reg, slow_path->GetEntryLabel());
1562     }
1563 
1564     // Calculate length(array) - pos.
1565     // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
1566     // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
1567     __ Loadw(temp1, array, length_offset);
1568     __ Sub(temp1, temp1, pos_reg);
1569 
1570     // Check that (length(array) - pos) >= length.
1571     EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2);
1572   }
1573 }
1574 
GenArrayAddress(CodeGeneratorRISCV64 * codegen,XRegister dest,XRegister base,Location pos,DataType::Type type,int32_t data_offset)1575 static void GenArrayAddress(CodeGeneratorRISCV64* codegen,
1576                             XRegister dest,
1577                             XRegister base,
1578                             Location pos,
1579                             DataType::Type type,
1580                             int32_t data_offset) {
1581   Riscv64Assembler* assembler = codegen->GetAssembler();
1582   if (pos.IsConstant()) {
1583     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
1584     __ AddConst64(dest, base, DataType::Size(type) * constant + data_offset);
1585   } else {
1586     codegen->GetInstructionVisitor()->ShNAdd(dest, pos.AsRegister<XRegister>(), base, type);
1587     if (data_offset != 0) {
1588       __ AddConst64(dest, dest, data_offset);
1589     }
1590   }
1591 }
1592 
1593 // Compute base source address, base destination address, and end
1594 // source address for System.arraycopy* intrinsics in `src_base`,
1595 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(CodeGeneratorRISCV64 * codegen,DataType::Type type,XRegister src,Location src_pos,XRegister dst,Location dst_pos,Location copy_length,XRegister src_base,XRegister dst_base,XRegister src_end)1596 static void GenSystemArrayCopyAddresses(CodeGeneratorRISCV64* codegen,
1597                                         DataType::Type type,
1598                                         XRegister src,
1599                                         Location src_pos,
1600                                         XRegister dst,
1601                                         Location dst_pos,
1602                                         Location copy_length,
1603                                         XRegister src_base,
1604                                         XRegister dst_base,
1605                                         XRegister src_end) {
1606   // This routine is used by the SystemArrayCopyX intrinsics.
1607   DCHECK(type == DataType::Type::kReference || type == DataType::Type::kInt8 ||
1608          type == DataType::Type::kUint16 || type == DataType::Type::kInt32)
1609       << "Unexpected element type: " << type;
1610   const int32_t element_size = DataType::Size(type);
1611   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
1612 
1613   GenArrayAddress(codegen, src_base, src, src_pos, type, data_offset);
1614   GenArrayAddress(codegen, dst_base, dst, dst_pos, type, data_offset);
1615   GenArrayAddress(codegen, src_end, src_base, copy_length, type, /*data_offset=*/ 0);
1616 }
1617 
LocationForSystemArrayCopyInput(HInstruction * input)1618 static Location LocationForSystemArrayCopyInput(HInstruction* input) {
1619   HIntConstant* const_input = input->AsIntConstantOrNull();
1620   if (const_input != nullptr && IsInt<12>(const_input->GetValue())) {
1621     return Location::ConstantLocation(const_input);
1622   } else {
1623     return Location::RequiresRegister();
1624   }
1625 }
1626 
1627 // We can choose to use the native implementation there for longer copy lengths.
1628 static constexpr int32_t kSystemArrayCopyThreshold = 128;
1629 
VisitSystemArrayCopy(HInvoke * invoke)1630 void IntrinsicLocationsBuilderRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
1631   // The only read barrier implementation supporting the
1632   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1633   if (codegen_->EmitNonBakerReadBarrier()) {
1634     return;
1635   }
1636 
1637   size_t num_temps = codegen_->EmitBakerReadBarrier() ? 4u : 2u;
1638   LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
1639       invoke, kSystemArrayCopyThreshold, num_temps);
1640   if (locations != nullptr) {
1641     // We request position and length as constants only for small integral values.
1642     locations->SetInAt(1, LocationForSystemArrayCopyInput(invoke->InputAt(1)));
1643     locations->SetInAt(3, LocationForSystemArrayCopyInput(invoke->InputAt(3)));
1644     locations->SetInAt(4, LocationForSystemArrayCopyInput(invoke->InputAt(4)));
1645   }
1646 }
1647 
VisitSystemArrayCopy(HInvoke * invoke)1648 void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
1649   // The only read barrier implementation supporting the
1650   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1651   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
1652 
1653   Riscv64Assembler* assembler = GetAssembler();
1654   LocationSummary* locations = invoke->GetLocations();
1655 
1656   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1657   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1658   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1659   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1660   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1661 
1662   XRegister src = locations->InAt(0).AsRegister<XRegister>();
1663   Location src_pos = locations->InAt(1);
1664   XRegister dest = locations->InAt(2).AsRegister<XRegister>();
1665   Location dest_pos = locations->InAt(3);
1666   Location length = locations->InAt(4);
1667   XRegister temp1 = locations->GetTemp(0).AsRegister<XRegister>();
1668   XRegister temp2 = locations->GetTemp(1).AsRegister<XRegister>();
1669 
1670   SlowPathCodeRISCV64* intrinsic_slow_path =
1671       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
1672   codegen_->AddSlowPath(intrinsic_slow_path);
1673 
1674   Riscv64Label conditions_on_positions_validated;
1675   SystemArrayCopyOptimizations optimizations(invoke);
1676 
1677   // If source and destination are the same, we go to slow path if we need to do forward copying.
1678   // We do not need to do this check if the source and destination positions are the same.
1679   if (!optimizations.GetSourcePositionIsDestinationPosition()) {
1680     if (src_pos.IsConstant()) {
1681       int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
1682       if (dest_pos.IsConstant()) {
1683         int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1684         if (optimizations.GetDestinationIsSource()) {
1685           // Checked when building locations.
1686           DCHECK_GE(src_pos_constant, dest_pos_constant);
1687         } else if (src_pos_constant < dest_pos_constant) {
1688           __ Beq(src, dest, intrinsic_slow_path->GetEntryLabel());
1689         }
1690       } else {
1691         if (!optimizations.GetDestinationIsSource()) {
1692           __ Bne(src, dest, &conditions_on_positions_validated);
1693         }
1694         __ Li(temp1, src_pos_constant);
1695         __ Bgt(dest_pos.AsRegister<XRegister>(), temp1, intrinsic_slow_path->GetEntryLabel());
1696       }
1697     } else {
1698       if (!optimizations.GetDestinationIsSource()) {
1699         __ Bne(src, dest, &conditions_on_positions_validated);
1700       }
1701       XRegister src_pos_reg = src_pos.AsRegister<XRegister>();
1702       EmitBlt32(assembler, src_pos_reg, dest_pos, intrinsic_slow_path->GetEntryLabel(), temp2);
1703     }
1704   }
1705 
1706   __ Bind(&conditions_on_positions_validated);
1707 
1708   if (!optimizations.GetSourceIsNotNull()) {
1709     // Bail out if the source is null.
1710     __ Beqz(src, intrinsic_slow_path->GetEntryLabel());
1711   }
1712 
1713   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1714     // Bail out if the destination is null.
1715     __ Beqz(dest, intrinsic_slow_path->GetEntryLabel());
1716   }
1717 
1718   // We have already checked in the LocationsBuilder for the constant case.
1719   if (!length.IsConstant()) {
1720     // Merge the following two comparisons into one:
1721     //   If the length is negative, bail out (delegate to libcore's native implementation).
1722     //   If the length >= 128 then (currently) prefer native implementation.
1723     __ Li(temp1, kSystemArrayCopyThreshold);
1724     __ Bgeu(length.AsRegister<XRegister>(), temp1, intrinsic_slow_path->GetEntryLabel());
1725   }
1726   // Validity checks: source.
1727   CheckSystemArrayCopyPosition(assembler,
1728                                src,
1729                                src_pos,
1730                                length,
1731                                intrinsic_slow_path,
1732                                temp1,
1733                                temp2,
1734                                optimizations.GetCountIsSourceLength(),
1735                                /*position_sign_checked=*/ false);
1736 
1737   // Validity checks: dest.
1738   bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
1739   CheckSystemArrayCopyPosition(assembler,
1740                                dest,
1741                                dest_pos,
1742                                length,
1743                                intrinsic_slow_path,
1744                                temp1,
1745                                temp2,
1746                                optimizations.GetCountIsDestinationLength(),
1747                                dest_position_sign_checked);
1748 
1749   auto check_non_primitive_array_class = [&](XRegister klass, XRegister temp) {
1750     // No read barrier is needed for reading a chain of constant references for comparing
1751     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1752     // /* HeapReference<Class> */ temp = klass->component_type_
1753     __ Loadwu(temp, klass, component_offset);
1754     codegen_->MaybeUnpoisonHeapReference(temp);
1755     // Check that the component type is not null.
1756     __ Beqz(temp, intrinsic_slow_path->GetEntryLabel());
1757     // Check that the component type is not a primitive.
1758     // /* uint16_t */ temp = static_cast<uint16>(klass->primitive_type_);
1759     __ Loadhu(temp, temp, primitive_offset);
1760     static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1761     __ Bnez(temp, intrinsic_slow_path->GetEntryLabel());
1762   };
1763 
1764   if (!optimizations.GetDoesNotNeedTypeCheck()) {
1765     // Check whether all elements of the source array are assignable to the component
1766     // type of the destination array. We do two checks: the classes are the same,
1767     // or the destination is Object[]. If none of these checks succeed, we go to the
1768     // slow path.
1769 
1770     if (codegen_->EmitBakerReadBarrier()) {
1771       XRegister temp3 = locations->GetTemp(2).AsRegister<XRegister>();
1772       // /* HeapReference<Class> */ temp1 = dest->klass_
1773       codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
1774                                                       Location::RegisterLocation(temp1),
1775                                                       dest,
1776                                                       class_offset,
1777                                                       Location::RegisterLocation(temp3),
1778                                                       /* needs_null_check= */ false);
1779       // /* HeapReference<Class> */ temp2 = src->klass_
1780       codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
1781                                                       Location::RegisterLocation(temp2),
1782                                                       src,
1783                                                       class_offset,
1784                                                       Location::RegisterLocation(temp3),
1785                                                       /* needs_null_check= */ false);
1786     } else {
1787       // /* HeapReference<Class> */ temp1 = dest->klass_
1788       __ Loadwu(temp1, dest, class_offset);
1789       codegen_->MaybeUnpoisonHeapReference(temp1);
1790       // /* HeapReference<Class> */ temp2 = src->klass_
1791       __ Loadwu(temp2, src, class_offset);
1792       codegen_->MaybeUnpoisonHeapReference(temp2);
1793     }
1794 
1795     if (optimizations.GetDestinationIsTypedObjectArray()) {
1796       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1797       Riscv64Label do_copy;
1798       // For class match, we can skip the source type check regardless of the optimization flag.
1799       __ Beq(temp1, temp2, &do_copy);
1800       // No read barrier is needed for reading a chain of constant references
1801       // for comparing with null, see `ReadBarrierOption`.
1802       // /* HeapReference<Class> */ temp1 = temp1->component_type_
1803       __ Loadwu(temp1, temp1, component_offset);
1804       codegen_->MaybeUnpoisonHeapReference(temp1);
1805       // /* HeapReference<Class> */ temp1 = temp1->super_class_
1806       __ Loadwu(temp1, temp1, super_offset);
1807       // No need to unpoison the result, we're comparing against null.
1808       __ Bnez(temp1, intrinsic_slow_path->GetEntryLabel());
1809       // Bail out if the source is not a non primitive array.
1810       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1811         check_non_primitive_array_class(temp2, temp2);
1812       }
1813       __ Bind(&do_copy);
1814     } else {
1815       DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
1816       // For class match, we can skip the array type check completely if at least one of source
1817       // and destination is known to be a non primitive array, otherwise one check is enough.
1818       __ Bne(temp1, temp2, intrinsic_slow_path->GetEntryLabel());
1819       if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
1820           !optimizations.GetSourceIsNonPrimitiveArray()) {
1821         check_non_primitive_array_class(temp2, temp2);
1822       }
1823     }
1824   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1825     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1826     // Bail out if the source is not a non primitive array.
1827     // No read barrier is needed for reading a chain of constant references for comparing
1828     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1829     // /* HeapReference<Class> */ temp2 = src->klass_
1830     __ Loadwu(temp2, src, class_offset);
1831     codegen_->MaybeUnpoisonHeapReference(temp2);
1832     check_non_primitive_array_class(temp2, temp2);
1833   }
1834 
1835   if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
1836     // Null constant length: not need to emit the loop code at all.
1837   } else {
1838     Riscv64Label skip_copy_and_write_barrier;
1839     if (length.IsRegister()) {
1840       // Don't enter the copy loop if the length is null.
1841       __ Beqz(length.AsRegister<XRegister>(), &skip_copy_and_write_barrier);
1842     }
1843 
1844     {
1845       // We use a block to end the scratch scope before the write barrier, thus
1846       // freeing the scratch registers so they can be used in `MarkGCCard`.
1847       ScratchRegisterScope srs(assembler);
1848       bool emit_rb = codegen_->EmitBakerReadBarrier();
1849       XRegister temp3 =
1850           emit_rb ? locations->GetTemp(2).AsRegister<XRegister>() : srs.AllocateXRegister();
1851 
1852       XRegister src_curr_addr = temp1;
1853       XRegister dst_curr_addr = temp2;
1854       XRegister src_stop_addr = temp3;
1855       const DataType::Type type = DataType::Type::kReference;
1856       const int32_t element_size = DataType::Size(type);
1857 
1858       XRegister tmp = kNoXRegister;
1859       SlowPathCodeRISCV64* read_barrier_slow_path = nullptr;
1860       if (emit_rb) {
1861         // TODO: Also convert this intrinsic to the IsGcMarking strategy?
1862 
1863         // SystemArrayCopy implementation for Baker read barriers (see
1864         // also CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier):
1865         //
1866         //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1867         //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
1868         //   bool is_gray = (rb_state == ReadBarrier::GrayState());
1869         //   if (is_gray) {
1870         //     // Slow-path copy.
1871         //     do {
1872         //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1873         //     } while (src_ptr != end_ptr)
1874         //   } else {
1875         //     // Fast-path copy.
1876         //     do {
1877         //       *dest_ptr++ = *src_ptr++;
1878         //     } while (src_ptr != end_ptr)
1879         //   }
1880 
1881         // /* uint32_t */ monitor = src->monitor_
1882         tmp = locations->GetTemp(3).AsRegister<XRegister>();
1883         __ Loadwu(tmp, src, monitor_offset);
1884         // /* LockWord */ lock_word = LockWord(monitor)
1885         static_assert(sizeof(LockWord) == sizeof(int32_t),
1886                       "art::LockWord and int32_t have different sizes.");
1887 
1888         // Shift the RB state bit to the sign bit while also clearing the low 32 bits
1889         // for the fake dependency below.
1890         static_assert(LockWord::kReadBarrierStateShift < 31);
1891         __ Slli(tmp, tmp, 63 - LockWord::kReadBarrierStateShift);
1892 
1893         // Introduce a dependency on the lock_word including rb_state, to prevent load-load
1894         // reordering, and without using a memory barrier (which would be more expensive).
1895         // `src` is unchanged by this operation (since Adduw adds low 32 bits
1896         // which are zero after left shift), but its value now depends on `tmp`.
1897         __ AddUw(src, tmp, src);
1898 
1899         // Slow path used to copy array when `src` is gray.
1900         read_barrier_slow_path = new (codegen_->GetScopedAllocator())
1901             ReadBarrierSystemArrayCopySlowPathRISCV64(invoke, Location::RegisterLocation(tmp));
1902         codegen_->AddSlowPath(read_barrier_slow_path);
1903       }
1904 
1905       // Compute base source address, base destination address, and end source address for
1906       // System.arraycopy* intrinsics in `src_base`, `dst_base` and `src_end` respectively.
1907       // Note that `src_curr_addr` is computed from from `src` (and `src_pos`) here, and
1908       // thus honors the artificial dependency of `src` on `tmp` for read barriers.
1909       GenSystemArrayCopyAddresses(codegen_,
1910                                   type,
1911                                   src,
1912                                   src_pos,
1913                                   dest,
1914                                   dest_pos,
1915                                   length,
1916                                   src_curr_addr,
1917                                   dst_curr_addr,
1918                                   src_stop_addr);
1919 
1920       if (emit_rb) {
1921         // Given the numeric representation, it's enough to check the low bit of the RB state.
1922         static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1923         static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1924         DCHECK_NE(tmp, kNoXRegister);
1925         __ Bltz(tmp, read_barrier_slow_path->GetEntryLabel());
1926       } else {
1927         // After allocating the last scrach register, we cannot use macro load/store instructions
1928         // such as `Loadwu()` and need to use raw instructions. However, all offsets below are 0.
1929         DCHECK_EQ(tmp, kNoXRegister);
1930         tmp = srs.AllocateXRegister();
1931       }
1932 
1933       // Iterate over the arrays and do a raw copy of the objects. We don't need to
1934       // poison/unpoison.
1935       Riscv64Label loop;
1936       __ Bind(&loop);
1937       __ Lwu(tmp, src_curr_addr, 0);
1938       __ Sw(tmp, dst_curr_addr, 0);
1939       __ Addi(src_curr_addr, src_curr_addr, element_size);
1940       __ Addi(dst_curr_addr, dst_curr_addr, element_size);
1941       // Bare: `TMP` shall not be clobbered.
1942       __ Bne(src_curr_addr, src_stop_addr, &loop, /*is_bare=*/ true);
1943 
1944       if (emit_rb) {
1945         DCHECK(read_barrier_slow_path != nullptr);
1946         __ Bind(read_barrier_slow_path->GetExitLabel());
1947       }
1948     }
1949 
1950     // We only need one card marking on the destination array.
1951     codegen_->MarkGCCard(dest);
1952 
1953     __ Bind(&skip_copy_and_write_barrier);
1954   }
1955 
1956   __ Bind(intrinsic_slow_path->GetExitLabel());
1957 }
1958 
1959 // This value is in bytes and greater than ARRAYCOPY_SHORT_XXX_ARRAY_THRESHOLD
1960 // in libcore, so if we choose to jump to the slow path we will end up
1961 // in the native implementation.
1962 static constexpr int32_t kSystemArrayCopyPrimThreshold = 384;
1963 
CreateSystemArrayCopyLocations(HInvoke * invoke,DataType::Type type)1964 static void CreateSystemArrayCopyLocations(HInvoke* invoke, DataType::Type type) {
1965   int32_t copy_threshold = kSystemArrayCopyPrimThreshold / DataType::Size(type);
1966 
1967   // Check to see if we have known failures that will cause us to have to bail out
1968   // to the runtime, and just generate the runtime call directly.
1969   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
1970   HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstantOrNull();
1971 
1972   // The positions must be non-negative.
1973   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
1974       (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
1975     // We will have to fail anyways.
1976     return;
1977   }
1978 
1979   // The length must be >= 0 and not so long that we would (currently) prefer libcore's
1980   // native implementation.
1981   HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
1982   if (length != nullptr) {
1983     int32_t len = length->GetValue();
1984     if (len < 0 || len > copy_threshold) {
1985       // Just call as normal.
1986       return;
1987     }
1988   }
1989 
1990   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
1991   LocationSummary* locations =
1992       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
1993   // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
1994   locations->SetInAt(0, Location::RequiresRegister());
1995   locations->SetInAt(1, LocationForSystemArrayCopyInput(invoke->InputAt(1)));
1996   locations->SetInAt(2, Location::RequiresRegister());
1997   locations->SetInAt(3, LocationForSystemArrayCopyInput(invoke->InputAt(3)));
1998   locations->SetInAt(4, LocationForSystemArrayCopyInput(invoke->InputAt(4)));
1999 
2000   locations->AddRegisterTemps(3);
2001 }
2002 
VisitSystemArrayCopyByte(HInvoke * invoke)2003 void IntrinsicLocationsBuilderRISCV64::VisitSystemArrayCopyByte(HInvoke* invoke) {
2004   CreateSystemArrayCopyLocations(invoke, DataType::Type::kInt8);
2005 }
2006 
VisitSystemArrayCopyChar(HInvoke * invoke)2007 void IntrinsicLocationsBuilderRISCV64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2008   CreateSystemArrayCopyLocations(invoke, DataType::Type::kUint16);
2009 }
2010 
VisitSystemArrayCopyInt(HInvoke * invoke)2011 void IntrinsicLocationsBuilderRISCV64::VisitSystemArrayCopyInt(HInvoke* invoke) {
2012   CreateSystemArrayCopyLocations(invoke, DataType::Type::kInt32);
2013 }
2014 
GenerateUnsignedLoad(Riscv64Assembler * assembler,XRegister rd,XRegister rs1,int32_t offset,size_t type_size)2015 static void GenerateUnsignedLoad(
2016     Riscv64Assembler* assembler, XRegister rd, XRegister rs1, int32_t offset, size_t type_size) {
2017   switch (type_size) {
2018     case 1:
2019       __ Lbu(rd, rs1, offset);
2020       break;
2021     case 2:
2022       __ Lhu(rd, rs1, offset);
2023       break;
2024     case 4:
2025       __ Lwu(rd, rs1, offset);
2026       break;
2027     case 8:
2028       __ Ld(rd, rs1, offset);
2029       break;
2030     default:
2031       LOG(FATAL) << "Unexpected data type";
2032   }
2033 }
2034 
GenerateStore(Riscv64Assembler * assembler,XRegister rs2,XRegister rs1,int32_t offset,size_t type_size)2035 static void GenerateStore(
2036     Riscv64Assembler* assembler, XRegister rs2, XRegister rs1, int32_t offset, size_t type_size) {
2037   switch (type_size) {
2038     case 1:
2039       __ Sb(rs2, rs1, offset);
2040       break;
2041     case 2:
2042       __ Sh(rs2, rs1, offset);
2043       break;
2044     case 4:
2045       __ Sw(rs2, rs1, offset);
2046       break;
2047     case 8:
2048       __ Sd(rs2, rs1, offset);
2049       break;
2050     default:
2051       LOG(FATAL) << "Unexpected data type";
2052   }
2053 }
2054 
SystemArrayCopyPrimitive(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,DataType::Type type)2055 static void SystemArrayCopyPrimitive(HInvoke* invoke,
2056                                      CodeGeneratorRISCV64* codegen,
2057                                      DataType::Type type) {
2058   Riscv64Assembler* assembler = codegen->GetAssembler();
2059   LocationSummary* locations = invoke->GetLocations();
2060   XRegister src = locations->InAt(0).AsRegister<XRegister>();
2061   Location src_pos = locations->InAt(1);
2062   XRegister dst = locations->InAt(2).AsRegister<XRegister>();
2063   Location dst_pos = locations->InAt(3);
2064   Location length = locations->InAt(4);
2065 
2066   SlowPathCodeRISCV64* slow_path =
2067       new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
2068   codegen->AddSlowPath(slow_path);
2069 
2070   SystemArrayCopyOptimizations optimizations(invoke);
2071 
2072   // If source and destination are the same, take the slow path. Overlapping copy regions must be
2073   // copied in reverse and we can't know in all cases if it's needed.
2074   __ Beq(src, dst, slow_path->GetEntryLabel());
2075 
2076   if (!optimizations.GetSourceIsNotNull()) {
2077     // Bail out if the source is null.
2078     __ Beqz(src, slow_path->GetEntryLabel());
2079   }
2080 
2081   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2082     // Bail out if the destination is null.
2083     __ Beqz(dst, slow_path->GetEntryLabel());
2084   }
2085 
2086   int32_t copy_threshold = kSystemArrayCopyPrimThreshold / DataType::Size(type);
2087   XRegister tmp = locations->GetTemp(0).AsRegister<XRegister>();
2088   if (!length.IsConstant()) {
2089     // Merge the following two comparisons into one:
2090     //   If the length is negative, bail out (delegate to libcore's native implementation).
2091     //   If the length >= kSystemArrayCopyPrimThreshold then (currently) prefer libcore's
2092     //   native implementation.
2093     __ Li(tmp, copy_threshold);
2094     __ Bgeu(length.AsRegister<XRegister>(), tmp, slow_path->GetEntryLabel());
2095   } else {
2096     // We have already checked in the LocationsBuilder for the constant case.
2097     DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
2098     DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), copy_threshold);
2099   }
2100 
2101   XRegister src_curr_addr = locations->GetTemp(1).AsRegister<XRegister>();
2102   XRegister dst_curr_addr = locations->GetTemp(2).AsRegister<XRegister>();
2103 
2104   CheckSystemArrayCopyPosition(assembler,
2105                                src,
2106                                src_pos,
2107                                length,
2108                                slow_path,
2109                                src_curr_addr,
2110                                dst_curr_addr,
2111                                /*length_is_array_length=*/ false,
2112                                /*position_sign_checked=*/ false);
2113 
2114   CheckSystemArrayCopyPosition(assembler,
2115                                dst,
2116                                dst_pos,
2117                                length,
2118                                slow_path,
2119                                src_curr_addr,
2120                                dst_curr_addr,
2121                                /*length_is_array_length=*/ false,
2122                                /*position_sign_checked=*/ false);
2123 
2124   const int32_t element_size = DataType::Size(type);
2125   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2126 
2127   GenArrayAddress(codegen, src_curr_addr, src, src_pos, type, data_offset);
2128   GenArrayAddress(codegen, dst_curr_addr, dst, dst_pos, type, data_offset);
2129 
2130   // We split processing of the array in two parts: head and tail.
2131   // A first loop handles the head by copying a block of elements per
2132   // iteration (see: elements_per_block).
2133   // A second loop handles the tail by copying the remaining elements.
2134   // If the copy length is not constant, we copy them one-by-one.
2135   //
2136   // Both loops are inverted for better performance, meaning they are
2137   // implemented as conditional do-while loops.
2138   // Here, the loop condition is first checked to determine if there are
2139   // sufficient elements to run an iteration, then we enter the do-while: an
2140   // iteration is performed followed by a conditional branch only if another
2141   // iteration is necessary. As opposed to a standard while-loop, this inversion
2142   // can save some branching (e.g. we don't branch back to the initial condition
2143   // at the end of every iteration only to potentially immediately branch
2144   // again).
2145   //
2146   // A full block of elements is subtracted and added before and after the head
2147   // loop, respectively. This ensures that any remaining length after each
2148   // head loop iteration means there is a full block remaining, reducing the
2149   // number of conditional checks required on every iteration.
2150   ScratchRegisterScope temps(assembler);
2151   constexpr int32_t bytes_copied_per_iteration = 16;
2152   DCHECK_EQ(bytes_copied_per_iteration % element_size, 0);
2153   int32_t elements_per_block = bytes_copied_per_iteration / element_size;
2154   Riscv64Label done;
2155 
2156   XRegister length_tmp = temps.AllocateXRegister();
2157 
2158   auto emit_head_loop = [&]() {
2159     ScratchRegisterScope local_temps(assembler);
2160     XRegister tmp2 = local_temps.AllocateXRegister();
2161 
2162     Riscv64Label loop;
2163     __ Bind(&loop);
2164     __ Ld(tmp, src_curr_addr, 0);
2165     __ Ld(tmp2, src_curr_addr, 8);
2166     __ Sd(tmp, dst_curr_addr, 0);
2167     __ Sd(tmp2, dst_curr_addr, 8);
2168     __ Addi(length_tmp, length_tmp, -elements_per_block);
2169     __ Addi(src_curr_addr, src_curr_addr, bytes_copied_per_iteration);
2170     __ Addi(dst_curr_addr, dst_curr_addr, bytes_copied_per_iteration);
2171     __ Bgez(length_tmp, &loop);
2172   };
2173 
2174   auto emit_tail_loop = [&]() {
2175     Riscv64Label loop;
2176     __ Bind(&loop);
2177     GenerateUnsignedLoad(assembler, tmp, src_curr_addr, 0, element_size);
2178     GenerateStore(assembler, tmp, dst_curr_addr, 0, element_size);
2179     __ Addi(length_tmp, length_tmp, -1);
2180     __ Addi(src_curr_addr, src_curr_addr, element_size);
2181     __ Addi(dst_curr_addr, dst_curr_addr, element_size);
2182     __ Bgtz(length_tmp, &loop);
2183   };
2184 
2185   auto emit_unrolled_tail_loop = [&](int32_t tail_length) {
2186     DCHECK_LT(tail_length, elements_per_block);
2187 
2188     int32_t length_in_bytes = tail_length * element_size;
2189     size_t offset = 0;
2190     for (size_t operation_size = 8; operation_size > 0; operation_size >>= 1) {
2191       if ((length_in_bytes & operation_size) != 0) {
2192         GenerateUnsignedLoad(assembler, tmp, src_curr_addr, offset, operation_size);
2193         GenerateStore(assembler, tmp, dst_curr_addr, offset, operation_size);
2194         offset += operation_size;
2195       }
2196     }
2197   };
2198 
2199   if (length.IsConstant()) {
2200     const int32_t constant_length = length.GetConstant()->AsIntConstant()->GetValue();
2201     if (constant_length >= elements_per_block) {
2202       __ Li(length_tmp, constant_length - elements_per_block);
2203       emit_head_loop();
2204     }
2205     emit_unrolled_tail_loop(constant_length % elements_per_block);
2206   } else {
2207     Riscv64Label tail_loop;
2208     XRegister length_reg = length.AsRegister<XRegister>();
2209     __ Addi(length_tmp, length_reg, -elements_per_block);
2210     __ Bltz(length_tmp, &tail_loop);
2211 
2212     emit_head_loop();
2213 
2214     __ Bind(&tail_loop);
2215     __ Addi(length_tmp, length_tmp, elements_per_block);
2216     __ Beqz(length_tmp, &done);
2217 
2218     emit_tail_loop();
2219   }
2220 
2221   __ Bind(&done);
2222   __ Bind(slow_path->GetExitLabel());
2223 }
2224 
VisitSystemArrayCopyByte(HInvoke * invoke)2225 void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopyByte(HInvoke* invoke) {
2226   SystemArrayCopyPrimitive(invoke, codegen_, DataType::Type::kInt8);
2227 }
2228 
VisitSystemArrayCopyChar(HInvoke * invoke)2229 void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2230   SystemArrayCopyPrimitive(invoke, codegen_, DataType::Type::kUint16);
2231 }
2232 
VisitSystemArrayCopyInt(HInvoke * invoke)2233 void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopyInt(HInvoke* invoke) {
2234   SystemArrayCopyPrimitive(invoke, codegen_, DataType::Type::kInt32);
2235 }
2236 
2237 enum class GetAndUpdateOp {
2238   kSet,
2239   kAdd,
2240   kAnd,
2241   kOr,
2242   kXor
2243 };
2244 
2245 // Generate a GetAndUpdate operation.
2246 //
2247 // Only 32-bit and 64-bit atomics are currently supported, therefore smaller types need
2248 // special handling. The caller emits code to prepare aligned `ptr` and adjusted `arg`
2249 // and extract the needed bits from `old_value`. For bitwise operations, no extra
2250 // handling is needed here. For `GetAndUpdateOp::kSet` and `GetAndUpdateOp::kAdd` we
2251 // also use a special LR/SC sequence that uses a `mask` to update only the desired bits.
2252 // Note: The `mask` must contain the bits to keep for `GetAndUpdateOp::kSet` and
2253 // the bits to replace for `GetAndUpdateOp::kAdd`.
GenerateGetAndUpdate(CodeGeneratorRISCV64 * codegen,GetAndUpdateOp get_and_update_op,DataType::Type type,std::memory_order order,XRegister ptr,XRegister arg,XRegister old_value,XRegister mask,XRegister temp)2254 static void GenerateGetAndUpdate(CodeGeneratorRISCV64* codegen,
2255                                  GetAndUpdateOp get_and_update_op,
2256                                  DataType::Type type,
2257                                  std::memory_order order,
2258                                  XRegister ptr,
2259                                  XRegister arg,
2260                                  XRegister old_value,
2261                                  XRegister mask,
2262                                  XRegister temp) {
2263   DCHECK_EQ(mask != kNoXRegister, temp != kNoXRegister);
2264   DCHECK_IMPLIES(mask != kNoXRegister, type == DataType::Type::kInt32);
2265   DCHECK_IMPLIES(
2266       mask != kNoXRegister,
2267       (get_and_update_op == GetAndUpdateOp::kSet) || (get_and_update_op == GetAndUpdateOp::kAdd));
2268   Riscv64Assembler* assembler = codegen->GetAssembler();
2269   AqRl amo_aqrl = GetAmoAqRl(order);
2270   switch (get_and_update_op) {
2271     case GetAndUpdateOp::kSet:
2272       if (type == DataType::Type::kInt64) {
2273         __ AmoSwapD(old_value, arg, ptr, amo_aqrl);
2274       } else if (mask == kNoXRegister) {
2275         DCHECK_EQ(type, DataType::Type::kInt32);
2276         __ AmoSwapW(old_value, arg, ptr, amo_aqrl);
2277       } else {
2278         DCHECK_EQ(type, DataType::Type::kInt32);
2279         DCHECK_NE(temp, kNoXRegister);
2280         auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
2281         Riscv64Label retry;
2282         __ Bind(&retry);
2283         __ LrW(old_value, ptr, load_aqrl);
2284         {
2285           ScopedLrScExtensionsRestriction slser(assembler);
2286           __ And(temp, old_value, mask);
2287           __ Or(temp, temp, arg);
2288         }
2289         __ ScW(temp, temp, ptr, store_aqrl);
2290         __ Bnez(temp, &retry, /*is_bare=*/ true);  // Bare: `TMP` shall not be clobbered.
2291       }
2292       break;
2293     case GetAndUpdateOp::kAdd:
2294       if (type == DataType::Type::kInt64) {
2295         __ AmoAddD(old_value, arg, ptr, amo_aqrl);
2296       } else if (mask == kNoXRegister) {
2297         DCHECK_EQ(type, DataType::Type::kInt32);
2298          __ AmoAddW(old_value, arg, ptr, amo_aqrl);
2299       } else {
2300         DCHECK_EQ(type, DataType::Type::kInt32);
2301         DCHECK_NE(temp, kNoXRegister);
2302         auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
2303         Riscv64Label retry;
2304         __ Bind(&retry);
2305         __ LrW(old_value, ptr, load_aqrl);
2306         {
2307           ScopedLrScExtensionsRestriction slser(assembler);
2308           __ Add(temp, old_value, arg);
2309           // We use `(A ^ B) ^ A == B` and with the masking `((A ^ B) & mask) ^ A`, the result
2310           // contains bits from `B` for bits specified in `mask` and bits from `A` elsewhere.
2311           // Note: These instructions directly depend on each other, so it's not necessarily the
2312           // fastest approach but for `(A ^ ~mask) | (B & mask)` we would need an extra register
2313           // for `~mask` because ANDN is not in the "I" instruction set as required for a LR/SC
2314           // sequence.
2315           __ Xor(temp, temp, old_value);
2316           __ And(temp, temp, mask);
2317           __ Xor(temp, temp, old_value);
2318         }
2319         __ ScW(temp, temp, ptr, store_aqrl);
2320         __ Bnez(temp, &retry, /*is_bare=*/ true);  // Bare: `TMP` shall not be clobbered.
2321       }
2322       break;
2323     case GetAndUpdateOp::kAnd:
2324       if (type == DataType::Type::kInt64) {
2325         __ AmoAndD(old_value, arg, ptr, amo_aqrl);
2326       } else {
2327         DCHECK_EQ(type, DataType::Type::kInt32);
2328         __ AmoAndW(old_value, arg, ptr, amo_aqrl);
2329       }
2330       break;
2331     case GetAndUpdateOp::kOr:
2332       if (type == DataType::Type::kInt64) {
2333         __ AmoOrD(old_value, arg, ptr, amo_aqrl);
2334       } else {
2335         DCHECK_EQ(type, DataType::Type::kInt32);
2336         __ AmoOrW(old_value, arg, ptr, amo_aqrl);
2337       }
2338       break;
2339     case GetAndUpdateOp::kXor:
2340       if (type == DataType::Type::kInt64) {
2341         __ AmoXorD(old_value, arg, ptr, amo_aqrl);
2342       } else {
2343         DCHECK_EQ(type, DataType::Type::kInt32);
2344         __ AmoXorW(old_value, arg, ptr, amo_aqrl);
2345       }
2346       break;
2347   }
2348 }
2349 
CreateUnsafeGetLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorRISCV64 * codegen)2350 static void CreateUnsafeGetLocations(ArenaAllocator* allocator,
2351                                      HInvoke* invoke,
2352                                      CodeGeneratorRISCV64* codegen) {
2353   bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
2354   LocationSummary* locations = new (allocator) LocationSummary(
2355       invoke,
2356       can_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall,
2357       kIntrinsified);
2358   if (can_call && kUseBakerReadBarrier) {
2359     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2360   }
2361   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2362   locations->SetInAt(1, Location::RequiresRegister());
2363   locations->SetInAt(2, Location::RequiresRegister());
2364   locations->SetOut(Location::RequiresRegister(),
2365                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
2366 }
2367 
CreateUnsafeGetAbsoluteLocations(ArenaAllocator * allocator,HInvoke * invoke)2368 static void CreateUnsafeGetAbsoluteLocations(ArenaAllocator* allocator,
2369                                              HInvoke* invoke) {
2370   LocationSummary* locations =
2371       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2372   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2373   locations->SetInAt(1, Location::RequiresRegister());
2374   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2375 }
2376 
GenUnsafeGet(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,DataType::Type type)2377 static void GenUnsafeGet(HInvoke* invoke,
2378                          CodeGeneratorRISCV64* codegen,
2379                          std::memory_order order,
2380                          DataType::Type type) {
2381   DCHECK((type == DataType::Type::kInt8) ||
2382          (type == DataType::Type::kInt32) ||
2383          (type == DataType::Type::kInt64) ||
2384          (type == DataType::Type::kReference));
2385   LocationSummary* locations = invoke->GetLocations();
2386   Location object_loc = locations->InAt(1);
2387   XRegister object = object_loc.AsRegister<XRegister>();  // Object pointer.
2388   Location offset_loc = locations->InAt(2);
2389   XRegister offset = offset_loc.AsRegister<XRegister>();  // Long offset.
2390   Location out_loc = locations->Out();
2391   XRegister out = out_loc.AsRegister<XRegister>();
2392 
2393   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
2394   bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
2395   DCHECK(acquire_barrier || order == std::memory_order_relaxed);
2396 
2397   if (seq_cst_barrier) {
2398     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
2399   }
2400 
2401   if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
2402     // JdkUnsafeGetReference/JdkUnsafeGetReferenceVolatile with Baker's read barrier case.
2403     // TODO(riscv64): Revisit when we add checking if the holder is black.
2404     Location temp = Location::NoLocation();
2405     codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
2406                                                        out_loc,
2407                                                        object,
2408                                                        /*offset=*/ 0,
2409                                                        /*index=*/ offset_loc,
2410                                                        temp,
2411                                                        /*needs_null_check=*/ false);
2412   } else {
2413     // Other cases.
2414     Riscv64Assembler* assembler = codegen->GetAssembler();
2415     __ Add(out, object, offset);
2416     codegen->GetInstructionVisitor()->Load(out_loc, out, /*offset=*/ 0, type);
2417 
2418     if (type == DataType::Type::kReference) {
2419       codegen->MaybeGenerateReadBarrierSlow(
2420           invoke, out_loc, out_loc, object_loc, /*offset=*/ 0u, /*index=*/ offset_loc);
2421     }
2422   }
2423 
2424   if (acquire_barrier) {
2425     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
2426   }
2427 }
2428 
GenUnsafeGetAbsolute(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,DataType::Type type)2429 static void GenUnsafeGetAbsolute(HInvoke* invoke,
2430                                  CodeGeneratorRISCV64* codegen,
2431                                  std::memory_order order,
2432                                  DataType::Type type) {
2433   DCHECK((type == DataType::Type::kInt8) ||
2434          (type == DataType::Type::kInt32) ||
2435          (type == DataType::Type::kInt64));
2436   LocationSummary* locations = invoke->GetLocations();
2437   Location address_loc = locations->InAt(1);
2438   XRegister address = address_loc.AsRegister<XRegister>();
2439   Location out_loc = locations->Out();
2440 
2441   bool seq_cst_barrier = order == std::memory_order_seq_cst;
2442   bool acquire_barrier = seq_cst_barrier || order == std::memory_order_acquire;
2443   DCHECK(acquire_barrier || order == std::memory_order_relaxed);
2444 
2445   if (seq_cst_barrier) {
2446     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
2447   }
2448 
2449   codegen->GetInstructionVisitor()->Load(out_loc, address, /*offset=*/ 0, type);
2450 
2451   if (acquire_barrier) {
2452     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
2453   }
2454 }
2455 
VisitUnsafeGet(HInvoke * invoke)2456 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGet(HInvoke* invoke) {
2457   VisitJdkUnsafeGet(invoke);
2458 }
2459 
VisitUnsafeGetAbsolute(HInvoke * invoke)2460 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAbsolute(HInvoke* invoke) {
2461   VisitJdkUnsafeGetAbsolute(invoke);
2462 }
2463 
VisitUnsafeGet(HInvoke * invoke)2464 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGet(HInvoke* invoke) {
2465   VisitJdkUnsafeGet(invoke);
2466 }
2467 
VisitUnsafeGetAbsolute(HInvoke * invoke)2468 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAbsolute(HInvoke* invoke) {
2469   VisitJdkUnsafeGetAbsolute(invoke);
2470 }
2471 
VisitUnsafeGetVolatile(HInvoke * invoke)2472 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2473   VisitJdkUnsafeGetVolatile(invoke);
2474 }
2475 
VisitUnsafeGetVolatile(HInvoke * invoke)2476 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2477   VisitJdkUnsafeGetVolatile(invoke);
2478 }
2479 
VisitUnsafeGetObject(HInvoke * invoke)2480 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetObject(HInvoke* invoke) {
2481   VisitJdkUnsafeGetReference(invoke);
2482 }
2483 
VisitUnsafeGetObject(HInvoke * invoke)2484 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetObject(HInvoke* invoke) {
2485   VisitJdkUnsafeGetReference(invoke);
2486 }
2487 
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2488 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2489   VisitJdkUnsafeGetReferenceVolatile(invoke);
2490 }
2491 
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2492 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2493   VisitJdkUnsafeGetReferenceVolatile(invoke);
2494 }
2495 
VisitUnsafeGetLong(HInvoke * invoke)2496 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetLong(HInvoke* invoke) {
2497   VisitJdkUnsafeGetLong(invoke);
2498 }
2499 
VisitUnsafeGetLong(HInvoke * invoke)2500 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetLong(HInvoke* invoke) {
2501   VisitJdkUnsafeGetLong(invoke);
2502 }
2503 
VisitUnsafeGetLongVolatile(HInvoke * invoke)2504 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2505   VisitJdkUnsafeGetLongVolatile(invoke);
2506 }
2507 
VisitUnsafeGetLongVolatile(HInvoke * invoke)2508 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2509   VisitJdkUnsafeGetLongVolatile(invoke);
2510 }
2511 
VisitUnsafeGetByte(HInvoke * invoke)2512 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetByte(HInvoke* invoke) {
2513   VisitJdkUnsafeGetByte(invoke);
2514 }
2515 
VisitUnsafeGetByte(HInvoke * invoke)2516 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetByte(HInvoke* invoke) {
2517   VisitJdkUnsafeGetByte(invoke);
2518 }
2519 
VisitJdkUnsafeGet(HInvoke * invoke)2520 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGet(HInvoke* invoke) {
2521   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2522 }
2523 
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)2524 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
2525   CreateUnsafeGetAbsoluteLocations(allocator_, invoke);
2526 }
2527 
VisitJdkUnsafeGet(HInvoke * invoke)2528 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGet(HInvoke* invoke) {
2529   GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt32);
2530 }
2531 
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)2532 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
2533   GenUnsafeGetAbsolute(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt32);
2534 }
2535 
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2536 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2537   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2538 }
2539 
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2540 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2541   GenUnsafeGet(invoke, codegen_, std::memory_order_acquire, DataType::Type::kInt32);
2542 }
2543 
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2544 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2545   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2546 }
2547 
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2548 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2549   GenUnsafeGet(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt32);
2550 }
2551 
VisitJdkUnsafeGetReference(HInvoke * invoke)2552 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2553   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2554 }
2555 
VisitJdkUnsafeGetReference(HInvoke * invoke)2556 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2557   GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kReference);
2558 }
2559 
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2560 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2561   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2562 }
2563 
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2564 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2565   GenUnsafeGet(invoke, codegen_, std::memory_order_acquire, DataType::Type::kReference);
2566 }
2567 
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2568 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2569   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2570 }
2571 
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2572 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2573   GenUnsafeGet(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kReference);
2574 }
2575 
VisitJdkUnsafeGetLong(HInvoke * invoke)2576 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2577   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2578 }
2579 
VisitJdkUnsafeGetLong(HInvoke * invoke)2580 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2581   GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt64);
2582 }
2583 
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2584 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2585   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2586 }
2587 
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2588 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2589   GenUnsafeGet(invoke, codegen_, std::memory_order_acquire, DataType::Type::kInt64);
2590 }
2591 
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2592 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2593   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2594 }
2595 
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2596 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2597   GenUnsafeGet(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt64);
2598 }
2599 
VisitJdkUnsafeGetByte(HInvoke * invoke)2600 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2601   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2602 }
2603 
VisitJdkUnsafeGetByte(HInvoke * invoke)2604 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2605   GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt8);
2606 }
2607 
CreateUnsafePutLocations(ArenaAllocator * allocator,HInvoke * invoke)2608 static void CreateUnsafePutLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2609   LocationSummary* locations =
2610       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2611   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2612   locations->SetInAt(1, Location::RequiresRegister());
2613   locations->SetInAt(2, Location::RequiresRegister());
2614   locations->SetInAt(3, Location::RequiresRegister());
2615   if (kPoisonHeapReferences && invoke->InputAt(3)->GetType() == DataType::Type::kReference) {
2616     locations->AddTemp(Location::RequiresRegister());
2617   }
2618 }
2619 
CreateUnsafePutAbsoluteLocations(ArenaAllocator * allocator,HInvoke * invoke)2620 static void CreateUnsafePutAbsoluteLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2621   LocationSummary* locations =
2622       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2623   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2624   locations->SetInAt(1, Location::RequiresRegister());
2625   locations->SetInAt(2, Location::RequiresRegister());
2626 }
2627 
GenUnsafePut(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,DataType::Type type)2628 static void GenUnsafePut(HInvoke* invoke,
2629                          CodeGeneratorRISCV64* codegen,
2630                          std::memory_order order,
2631                          DataType::Type type) {
2632   Riscv64Assembler* assembler = codegen->GetAssembler();
2633   LocationSummary* locations = invoke->GetLocations();
2634   XRegister base = locations->InAt(1).AsRegister<XRegister>();    // Object pointer.
2635   XRegister offset = locations->InAt(2).AsRegister<XRegister>();  // Long offset.
2636   Location value = locations->InAt(3);
2637 
2638   {
2639     // We use a block to end the scratch scope before the write barrier, thus
2640     // freeing the temporary registers so they can be used in `MarkGCCard()`.
2641     ScratchRegisterScope srs(assembler);
2642     // Heap poisoning needs two scratch registers in `Store()`.
2643     XRegister address = (kPoisonHeapReferences && type == DataType::Type::kReference)
2644         ? locations->GetTemp(0).AsRegister<XRegister>()
2645         : srs.AllocateXRegister();
2646     __ Add(address, base, offset);
2647     GenerateSet(codegen, order, value, address, /*offset=*/ 0, type);
2648   }
2649 
2650   if (type == DataType::Type::kReference) {
2651     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2652     codegen->MaybeMarkGCCard(base, value.AsRegister<XRegister>(), value_can_be_null);
2653   }
2654 }
2655 
GenUnsafePutAbsolute(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,DataType::Type type)2656 static void GenUnsafePutAbsolute(HInvoke* invoke,
2657                                  CodeGeneratorRISCV64* codegen,
2658                                  std::memory_order order,
2659                                  DataType::Type type) {
2660   LocationSummary* locations = invoke->GetLocations();
2661   XRegister address = locations->InAt(1).AsRegister<XRegister>();
2662   Location value = locations->InAt(2);
2663 
2664   GenerateSet(codegen, order, value, address, /*offset=*/ 0, type);
2665 }
2666 
VisitUnsafePut(HInvoke * invoke)2667 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePut(HInvoke* invoke) {
2668   VisitJdkUnsafePut(invoke);
2669 }
2670 
VisitUnsafePutAbsolute(HInvoke * invoke)2671 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutAbsolute(HInvoke* invoke) {
2672   VisitJdkUnsafePutAbsolute(invoke);
2673 }
2674 
VisitUnsafePut(HInvoke * invoke)2675 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePut(HInvoke* invoke) {
2676   VisitJdkUnsafePut(invoke);
2677 }
2678 
VisitUnsafePutAbsolute(HInvoke * invoke)2679 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutAbsolute(HInvoke* invoke) {
2680   VisitJdkUnsafePutAbsolute(invoke);
2681 }
2682 
VisitUnsafePutOrderedInt(HInvoke * invoke)2683 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutOrderedInt(HInvoke* invoke) {
2684   VisitJdkUnsafePutOrderedInt(invoke);
2685 }
2686 
VisitUnsafePutOrderedInt(HInvoke * invoke)2687 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutOrderedInt(HInvoke* invoke) {
2688   VisitJdkUnsafePutOrderedInt(invoke);
2689 }
2690 
VisitUnsafePutVolatile(HInvoke * invoke)2691 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutVolatile(HInvoke* invoke) {
2692   VisitJdkUnsafePutVolatile(invoke);
2693 }
2694 
VisitUnsafePutVolatile(HInvoke * invoke)2695 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutVolatile(HInvoke* invoke) {
2696   VisitJdkUnsafePutVolatile(invoke);
2697 }
2698 
VisitUnsafePutObject(HInvoke * invoke)2699 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutObject(HInvoke* invoke) {
2700   VisitJdkUnsafePutReference(invoke);
2701 }
2702 
VisitUnsafePutObject(HInvoke * invoke)2703 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutObject(HInvoke* invoke) {
2704   VisitJdkUnsafePutReference(invoke);
2705 }
2706 
VisitUnsafePutOrderedObject(HInvoke * invoke)2707 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutOrderedObject(HInvoke* invoke) {
2708   VisitJdkUnsafePutOrderedObject(invoke);
2709 }
2710 
VisitUnsafePutOrderedObject(HInvoke * invoke)2711 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutOrderedObject(HInvoke* invoke) {
2712   VisitJdkUnsafePutOrderedObject(invoke);
2713 }
2714 
VisitUnsafePutObjectVolatile(HInvoke * invoke)2715 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2716   VisitJdkUnsafePutReferenceVolatile(invoke);
2717 }
2718 
VisitUnsafePutObjectVolatile(HInvoke * invoke)2719 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2720   VisitJdkUnsafePutReferenceVolatile(invoke);
2721 }
2722 
VisitUnsafePutLong(HInvoke * invoke)2723 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutLong(HInvoke* invoke) {
2724   VisitJdkUnsafePutLong(invoke);
2725 }
2726 
VisitUnsafePutLong(HInvoke * invoke)2727 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutLong(HInvoke* invoke) {
2728   VisitJdkUnsafePutLong(invoke);
2729 }
2730 
VisitUnsafePutLongOrdered(HInvoke * invoke)2731 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2732   VisitJdkUnsafePutLongOrdered(invoke);
2733 }
2734 
VisitUnsafePutLongOrdered(HInvoke * invoke)2735 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2736   VisitJdkUnsafePutLongOrdered(invoke);
2737 }
2738 
VisitUnsafePutLongVolatile(HInvoke * invoke)2739 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2740   VisitJdkUnsafePutLongVolatile(invoke);
2741 }
2742 
VisitUnsafePutLongVolatile(HInvoke * invoke)2743 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2744   VisitJdkUnsafePutLongVolatile(invoke);
2745 }
2746 
VisitUnsafePutByte(HInvoke * invoke)2747 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutByte(HInvoke* invoke) {
2748   VisitJdkUnsafePutByte(invoke);
2749 }
2750 
VisitUnsafePutByte(HInvoke * invoke)2751 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutByte(HInvoke* invoke) {
2752   VisitJdkUnsafePutByte(invoke);
2753 }
2754 
VisitJdkUnsafePut(HInvoke * invoke)2755 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePut(HInvoke* invoke) {
2756   CreateUnsafePutLocations(allocator_, invoke);
2757 }
2758 
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2759 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2760   CreateUnsafePutAbsoluteLocations(allocator_, invoke);
2761 }
2762 
VisitJdkUnsafePut(HInvoke * invoke)2763 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePut(HInvoke* invoke) {
2764   GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt32);
2765 }
2766 
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2767 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2768   GenUnsafePutAbsolute(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt32);
2769 }
2770 
VisitJdkUnsafePutOrderedInt(HInvoke * invoke)2771 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutOrderedInt(HInvoke* invoke) {
2772   CreateUnsafePutLocations(allocator_, invoke);
2773 }
2774 
VisitJdkUnsafePutOrderedInt(HInvoke * invoke)2775 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutOrderedInt(HInvoke* invoke) {
2776   GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt32);
2777 }
2778 
VisitJdkUnsafePutRelease(HInvoke * invoke)2779 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2780   CreateUnsafePutLocations(allocator_, invoke);
2781 }
2782 
VisitJdkUnsafePutRelease(HInvoke * invoke)2783 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2784   GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt32);
2785 }
2786 
VisitJdkUnsafePutVolatile(HInvoke * invoke)2787 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2788   CreateUnsafePutLocations(allocator_, invoke);
2789 }
2790 
VisitJdkUnsafePutVolatile(HInvoke * invoke)2791 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2792   GenUnsafePut(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt32);
2793 }
2794 
VisitJdkUnsafePutReference(HInvoke * invoke)2795 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2796   CreateUnsafePutLocations(allocator_, invoke);
2797 }
2798 
VisitJdkUnsafePutReference(HInvoke * invoke)2799 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2800   GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kReference);
2801 }
2802 
VisitJdkUnsafePutOrderedObject(HInvoke * invoke)2803 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutOrderedObject(HInvoke* invoke) {
2804   CreateUnsafePutLocations(allocator_, invoke);
2805 }
2806 
VisitJdkUnsafePutOrderedObject(HInvoke * invoke)2807 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutOrderedObject(HInvoke* invoke) {
2808   GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kReference);
2809 }
2810 
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2811 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2812   CreateUnsafePutLocations(allocator_, invoke);
2813 }
2814 
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2815 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2816   GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kReference);
2817 }
2818 
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2819 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2820   CreateUnsafePutLocations(allocator_, invoke);
2821 }
2822 
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2823 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2824   GenUnsafePut(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kReference);
2825 }
2826 
VisitJdkUnsafePutLong(HInvoke * invoke)2827 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2828   CreateUnsafePutLocations(allocator_, invoke);
2829 }
2830 
VisitJdkUnsafePutLong(HInvoke * invoke)2831 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2832   GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt64);
2833 }
2834 
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2835 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2836   CreateUnsafePutLocations(allocator_, invoke);
2837 }
2838 
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2839 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2840   GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt64);
2841 }
2842 
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2843 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2844   CreateUnsafePutLocations(allocator_, invoke);
2845 }
2846 
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2847 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2848   GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt64);
2849 }
2850 
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2851 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2852   CreateUnsafePutLocations(allocator_, invoke);
2853 }
2854 
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2855 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2856   GenUnsafePut(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt64);
2857 }
2858 
VisitJdkUnsafePutByte(HInvoke * invoke)2859 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2860   CreateUnsafePutLocations(allocator_, invoke);
2861 }
2862 
VisitJdkUnsafePutByte(HInvoke * invoke)2863 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2864   GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt8);
2865 }
2866 
CreateUnsafeCASLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorRISCV64 * codegen)2867 static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
2868                                      HInvoke* invoke,
2869                                      CodeGeneratorRISCV64* codegen) {
2870   const bool can_call = codegen->EmitReadBarrier() && IsUnsafeCASReference(invoke);
2871   LocationSummary* locations = new (allocator) LocationSummary(
2872       invoke,
2873       can_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall,
2874       kIntrinsified);
2875   if (can_call && kUseBakerReadBarrier) {
2876     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2877   }
2878   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2879   locations->SetInAt(1, Location::RequiresRegister());
2880   locations->SetInAt(2, Location::RequiresRegister());
2881   locations->SetInAt(3, Location::RequiresRegister());
2882   locations->SetInAt(4, Location::RequiresRegister());
2883 
2884   locations->SetOut(Location::RequiresRegister());
2885 }
2886 
GenUnsafeCas(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,DataType::Type type)2887 static void GenUnsafeCas(HInvoke* invoke, CodeGeneratorRISCV64* codegen, DataType::Type type) {
2888   Riscv64Assembler* assembler = codegen->GetAssembler();
2889   LocationSummary* locations = invoke->GetLocations();
2890   XRegister out = locations->Out().AsRegister<XRegister>();            // Boolean result.
2891   XRegister object = locations->InAt(1).AsRegister<XRegister>();       // Object pointer.
2892   XRegister offset = locations->InAt(2).AsRegister<XRegister>();       // Long offset.
2893   XRegister expected = locations->InAt(3).AsRegister<XRegister>();     // Expected.
2894   XRegister new_value = locations->InAt(4).AsRegister<XRegister>();    // New value.
2895 
2896   // This needs to be before the temp registers, as MarkGCCard also uses scratch registers.
2897   if (type == DataType::Type::kReference) {
2898     // Mark card for object assuming new value is stored.
2899     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
2900     codegen->MaybeMarkGCCard(object, new_value, new_value_can_be_null);
2901   }
2902 
2903   ScratchRegisterScope srs(assembler);
2904   XRegister tmp_ptr = srs.AllocateXRegister();                         // Pointer to actual memory.
2905   XRegister old_value;                                                 // Value in memory.
2906 
2907   Riscv64Label exit_loop_label;
2908   Riscv64Label* exit_loop = &exit_loop_label;
2909   Riscv64Label* cmp_failure = &exit_loop_label;
2910 
2911   ReadBarrierCasSlowPathRISCV64* slow_path = nullptr;
2912   if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
2913     // We need to store the `old_value` in a non-scratch register to make sure
2914     // the read barrier in the slow path does not clobber it.
2915     old_value = locations->GetTemp(0).AsRegister<XRegister>();  // The old value from main path.
2916     // The `old_value_temp` is used first for marking the `old_value` and then for the unmarked
2917     // reloaded old value for subsequent CAS in the slow path. We make this a scratch register
2918     // as we do have marking entrypoints on riscv64 even for scratch registers.
2919     XRegister old_value_temp = srs.AllocateXRegister();
2920     slow_path = new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathRISCV64(
2921         invoke,
2922         std::memory_order_seq_cst,
2923         /*strong=*/ true,
2924         object,
2925         offset,
2926         expected,
2927         new_value,
2928         old_value,
2929         old_value_temp,
2930         /*store_result=*/ old_value_temp,  // Let the SC result clobber the reloaded old_value.
2931         /*update_old_value=*/ false,
2932         codegen);
2933     codegen->AddSlowPath(slow_path);
2934     exit_loop = slow_path->GetExitLabel();
2935     cmp_failure = slow_path->GetEntryLabel();
2936   } else {
2937     old_value = srs.AllocateXRegister();
2938   }
2939 
2940   __ Add(tmp_ptr, object, offset);
2941 
2942   // Pre-populate the result register with failure.
2943   __ Li(out, 0);
2944 
2945   GenerateCompareAndSet(assembler,
2946                         type,
2947                         std::memory_order_seq_cst,
2948                         /*strong=*/ true,
2949                         cmp_failure,
2950                         tmp_ptr,
2951                         new_value,
2952                         old_value,
2953                         /*mask=*/ kNoXRegister,
2954                         /*masked=*/ kNoXRegister,
2955                         /*store_result=*/ old_value,  // Let the SC result clobber the `old_value`.
2956                         expected);
2957 
2958   DCHECK_EQ(slow_path != nullptr, type == DataType::Type::kReference && codegen->EmitReadBarrier());
2959   if (slow_path != nullptr) {
2960     __ Bind(slow_path->GetSuccessExitLabel());
2961   }
2962 
2963   // Indicate success if we successfully execute the SC.
2964   __ Li(out, 1);
2965 
2966   __ Bind(exit_loop);
2967 }
2968 
VisitUnsafeCASInt(HInvoke * invoke)2969 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeCASInt(HInvoke* invoke) {
2970   VisitJdkUnsafeCASInt(invoke);
2971 }
2972 
VisitUnsafeCASInt(HInvoke * invoke)2973 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeCASInt(HInvoke* invoke) {
2974   VisitJdkUnsafeCASInt(invoke);
2975 }
2976 
VisitUnsafeCASLong(HInvoke * invoke)2977 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeCASLong(HInvoke* invoke) {
2978   VisitJdkUnsafeCASLong(invoke);
2979 }
2980 
VisitUnsafeCASLong(HInvoke * invoke)2981 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeCASLong(HInvoke* invoke) {
2982   VisitJdkUnsafeCASLong(invoke);
2983 }
2984 
VisitUnsafeCASObject(HInvoke * invoke)2985 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeCASObject(HInvoke* invoke) {
2986   VisitJdkUnsafeCASObject(invoke);
2987 }
2988 
VisitUnsafeCASObject(HInvoke * invoke)2989 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeCASObject(HInvoke* invoke) {
2990   VisitJdkUnsafeCASObject(invoke);
2991 }
2992 
VisitJdkUnsafeCASInt(HInvoke * invoke)2993 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2994   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2995   VisitJdkUnsafeCompareAndSetInt(invoke);
2996 }
2997 
VisitJdkUnsafeCASInt(HInvoke * invoke)2998 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2999   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
3000   VisitJdkUnsafeCompareAndSetInt(invoke);
3001 }
3002 
VisitJdkUnsafeCASLong(HInvoke * invoke)3003 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
3004   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
3005   VisitJdkUnsafeCompareAndSetLong(invoke);
3006 }
3007 
VisitJdkUnsafeCASLong(HInvoke * invoke)3008 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
3009   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
3010   VisitJdkUnsafeCompareAndSetLong(invoke);
3011 }
3012 
VisitJdkUnsafeCASObject(HInvoke * invoke)3013 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
3014   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
3015   VisitJdkUnsafeCompareAndSetReference(invoke);
3016 }
3017 
VisitJdkUnsafeCASObject(HInvoke * invoke)3018 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
3019   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
3020   VisitJdkUnsafeCompareAndSetReference(invoke);
3021 }
3022 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)3023 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
3024   CreateUnsafeCASLocations(allocator_, invoke, codegen_);
3025 }
3026 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)3027 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
3028   GenUnsafeCas(invoke, codegen_, DataType::Type::kInt32);
3029 }
3030 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)3031 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
3032   CreateUnsafeCASLocations(allocator_, invoke, codegen_);
3033 }
3034 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)3035 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
3036   GenUnsafeCas(invoke, codegen_, DataType::Type::kInt64);
3037 }
3038 
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)3039 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
3040   // The only supported read barrier implementation is the Baker-style read barriers.
3041   if (codegen_->EmitNonBakerReadBarrier()) {
3042     return;
3043   }
3044 
3045   // TODO(riscv64): Fix this intrinsic for heap poisoning configuration.
3046   if (kPoisonHeapReferences) {
3047     return;
3048   }
3049 
3050   CreateUnsafeCASLocations(allocator_, invoke, codegen_);
3051   if (codegen_->EmitReadBarrier()) {
3052     DCHECK(kUseBakerReadBarrier);
3053     // We need one non-scratch temporary register for read barrier.
3054     LocationSummary* locations = invoke->GetLocations();
3055     locations->AddTemp(Location::RequiresRegister());
3056   }
3057 }
3058 
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)3059 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
3060   GenUnsafeCas(invoke, codegen_, DataType::Type::kReference);
3061 }
3062 
CreateUnsafeGetAndUpdateLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorRISCV64 * codegen)3063 static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
3064                                               HInvoke* invoke,
3065                                               CodeGeneratorRISCV64* codegen) {
3066   const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
3067   LocationSummary* locations = new (allocator) LocationSummary(
3068       invoke,
3069       can_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall,
3070       kIntrinsified);
3071   if (can_call && kUseBakerReadBarrier) {
3072     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
3073   }
3074   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
3075   locations->SetInAt(1, Location::RequiresRegister());
3076   locations->SetInAt(2, Location::RequiresRegister());
3077   locations->SetInAt(3, Location::RequiresRegister());
3078 
3079   // Request another temporary register for methods that don't return a value.
3080   DataType::Type return_type = invoke->GetType();
3081   const bool is_void = return_type == DataType::Type::kVoid;
3082   if (is_void) {
3083     locations->AddTemp(Location::RequiresRegister());
3084   } else {
3085     locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3086   }
3087 }
3088 
GenUnsafeGetAndUpdate(HInvoke * invoke,DataType::Type type,CodeGeneratorRISCV64 * codegen,GetAndUpdateOp get_and_update_op)3089 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
3090                                   DataType::Type type,
3091                                   CodeGeneratorRISCV64* codegen,
3092                                   GetAndUpdateOp get_and_update_op) {
3093   // Currently only used for these GetAndUpdateOp. Might be fine for other ops but double check
3094   // before using.
3095   DCHECK(get_and_update_op == GetAndUpdateOp::kAdd || get_and_update_op == GetAndUpdateOp::kSet);
3096 
3097   Riscv64Assembler* assembler = codegen->GetAssembler();
3098   LocationSummary* locations = invoke->GetLocations();
3099   DataType::Type return_type = invoke->GetType();
3100   const bool is_void = return_type == DataType::Type::kVoid;
3101   // We use a temporary for void methods, as we don't return the value.
3102   Location out_or_temp_loc =
3103       is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out();
3104   XRegister out_or_temp = out_or_temp_loc.AsRegister<XRegister>();  // Result.
3105   XRegister base = locations->InAt(1).AsRegister<XRegister>();      // Object pointer.
3106   XRegister offset = locations->InAt(2).AsRegister<XRegister>();    // Long offset.
3107   XRegister arg = locations->InAt(3).AsRegister<XRegister>();       // New value or addend.
3108 
3109   // This needs to be before the temp registers, as MarkGCCard also uses scratch registers.
3110   if (type == DataType::Type::kReference) {
3111     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
3112     // Mark card for object as a new value shall be stored.
3113     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
3114     codegen->MaybeMarkGCCard(base, /*value=*/arg, new_value_can_be_null);
3115   }
3116 
3117   ScratchRegisterScope srs(assembler);
3118   XRegister tmp_ptr = srs.AllocateXRegister();                        // Pointer to actual memory.
3119   __ Add(tmp_ptr, base, offset);
3120   GenerateGetAndUpdate(codegen,
3121                        get_and_update_op,
3122                        (type == DataType::Type::kReference) ? DataType::Type::kInt32 : type,
3123                        std::memory_order_seq_cst,
3124                        tmp_ptr,
3125                        arg,
3126                        /*old_value=*/ out_or_temp,
3127                        /*mask=*/ kNoXRegister,
3128                        /*temp=*/ kNoXRegister);
3129 
3130   if (!is_void && type == DataType::Type::kReference) {
3131     __ ZextW(out_or_temp, out_or_temp);
3132     if (codegen->EmitReadBarrier()) {
3133       DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
3134       if (kUseBakerReadBarrier) {
3135         // Use RA as temp. It is clobbered in the slow path anyway.
3136         static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA);
3137         SlowPathCodeRISCV64* rb_slow_path = codegen->AddGcRootBakerBarrierBarrierSlowPath(
3138             invoke, out_or_temp_loc, kBakerReadBarrierTemp);
3139         codegen->EmitBakerReadBarierMarkingCheck(
3140             rb_slow_path, out_or_temp_loc, kBakerReadBarrierTemp);
3141       } else {
3142         codegen->GenerateReadBarrierSlow(invoke,
3143                                          out_or_temp_loc,
3144                                          out_or_temp_loc,
3145                                          Location::RegisterLocation(base),
3146                                          /*offset=*/ 0u,
3147                                          /*index=*/ Location::RegisterLocation(offset));
3148       }
3149     }
3150   }
3151 }
3152 
VisitUnsafeGetAndAddInt(HInvoke * invoke)3153 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
3154   VisitJdkUnsafeGetAndAddInt(invoke);
3155 }
3156 
VisitUnsafeGetAndAddInt(HInvoke * invoke)3157 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
3158   VisitJdkUnsafeGetAndAddInt(invoke);
3159 }
3160 
VisitUnsafeGetAndAddLong(HInvoke * invoke)3161 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
3162   VisitJdkUnsafeGetAndAddLong(invoke);
3163 }
3164 
VisitUnsafeGetAndAddLong(HInvoke * invoke)3165 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
3166   VisitJdkUnsafeGetAndAddLong(invoke);
3167 }
3168 
VisitUnsafeGetAndSetInt(HInvoke * invoke)3169 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
3170   VisitJdkUnsafeGetAndSetInt(invoke);
3171 }
3172 
VisitUnsafeGetAndSetInt(HInvoke * invoke)3173 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
3174   VisitJdkUnsafeGetAndSetInt(invoke);
3175 }
3176 
VisitUnsafeGetAndSetLong(HInvoke * invoke)3177 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
3178   VisitJdkUnsafeGetAndSetLong(invoke);
3179 }
3180 
VisitUnsafeGetAndSetLong(HInvoke * invoke)3181 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
3182   VisitJdkUnsafeGetAndSetLong(invoke);
3183 }
3184 
VisitUnsafeGetAndSetObject(HInvoke * invoke)3185 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
3186   VisitJdkUnsafeGetAndSetReference(invoke);
3187 }
3188 
VisitUnsafeGetAndSetObject(HInvoke * invoke)3189 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
3190   VisitJdkUnsafeGetAndSetReference(invoke);
3191 }
3192 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)3193 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
3194   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
3195 }
3196 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)3197 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
3198   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
3199 }
3200 
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)3201 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
3202   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
3203 }
3204 
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)3205 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
3206   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
3207 }
3208 
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)3209 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
3210   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
3211 }
3212 
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)3213 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
3214   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
3215 }
3216 
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)3217 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
3218   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
3219 }
3220 
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)3221 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
3222   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
3223 }
3224 
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)3225 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
3226   // TODO(riscv64): Fix this intrinsic for heap poisoning configuration.
3227   if (kPoisonHeapReferences) {
3228     return;
3229   }
3230 
3231   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
3232 }
3233 
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)3234 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
3235   GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
3236 }
3237 
VisitStringCompareTo(HInvoke * invoke)3238 void IntrinsicLocationsBuilderRISCV64::VisitStringCompareTo(HInvoke* invoke) {
3239   LocationSummary* locations =
3240       new (allocator_) LocationSummary(invoke,
3241                                        invoke->InputAt(1)->CanBeNull()
3242                                            ? LocationSummary::kCallOnSlowPath
3243                                            : LocationSummary::kNoCall,
3244                                        kIntrinsified);
3245   locations->SetInAt(0, Location::RequiresRegister());
3246   locations->SetInAt(1, Location::RequiresRegister());
3247   locations->AddRegisterTemps(3);
3248   // Need temporary registers for String compression's feature.
3249   if (mirror::kUseStringCompression) {
3250     locations->AddTemp(Location::RequiresRegister());
3251   }
3252   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3253 }
3254 
VisitStringCompareTo(HInvoke * invoke)3255 void IntrinsicCodeGeneratorRISCV64::VisitStringCompareTo(HInvoke* invoke) {
3256   Riscv64Assembler* assembler = GetAssembler();
3257   DCHECK(assembler->IsExtensionEnabled(Riscv64Extension::kZbb));
3258   LocationSummary* locations = invoke->GetLocations();
3259 
3260   XRegister str = locations->InAt(0).AsRegister<XRegister>();
3261   XRegister arg = locations->InAt(1).AsRegister<XRegister>();
3262   XRegister out = locations->Out().AsRegister<XRegister>();
3263 
3264   XRegister temp0 = locations->GetTemp(0).AsRegister<XRegister>();
3265   XRegister temp1 = locations->GetTemp(1).AsRegister<XRegister>();
3266   XRegister temp2 = locations->GetTemp(2).AsRegister<XRegister>();
3267   XRegister temp3 = kNoXRegister;
3268   if (mirror::kUseStringCompression) {
3269     temp3 = locations->GetTemp(3).AsRegister<XRegister>();
3270   }
3271 
3272   Riscv64Label loop;
3273   Riscv64Label find_char_diff;
3274   Riscv64Label end;
3275   Riscv64Label different_compression;
3276 
3277   // Get offsets of count and value fields within a string object.
3278   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
3279   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
3280 
3281   // Note that the null check must have been done earlier.
3282   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
3283 
3284   // Take slow path and throw if input can be and is null.
3285   SlowPathCodeRISCV64* slow_path = nullptr;
3286   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
3287   if (can_slow_path) {
3288     slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
3289     codegen_->AddSlowPath(slow_path);
3290     __ Beqz(arg, slow_path->GetEntryLabel());
3291   }
3292 
3293   // Reference equality check, return 0 if same reference.
3294   __ Sub(out, str, arg);
3295   __ Beqz(out, &end);
3296 
3297   if (mirror::kUseStringCompression) {
3298     // Load `count` fields of this and argument strings.
3299     __ Loadwu(temp3, str, count_offset);
3300     __ Loadwu(temp2, arg, count_offset);
3301     // Clean out compression flag from lengths.
3302     __ Srliw(temp0, temp3, 1u);
3303     __ Srliw(temp1, temp2, 1u);
3304   } else {
3305     // Load lengths of this and argument strings.
3306     __ Loadwu(temp0, str, count_offset);
3307     __ Loadwu(temp1, arg, count_offset);
3308   }
3309   // out = length diff.
3310   __ Subw(out, temp0, temp1);
3311 
3312   // Find the length of the shorter string
3313   __ Minu(temp0, temp0, temp1);
3314   // Shorter string is empty?
3315   __ Beqz(temp0, &end);
3316 
3317   if (mirror::kUseStringCompression) {
3318     // Extract both compression flags
3319     __ Andi(temp3, temp3, 1);
3320     __ Andi(temp2, temp2, 1);
3321     __ Bne(temp2, temp3, &different_compression);
3322   }
3323   // Store offset of string value in preparation for comparison loop.
3324   __ Li(temp1, value_offset);
3325   if (mirror::kUseStringCompression) {
3326     // For string compression, calculate the number of bytes to compare (not chars).
3327     __ Sll(temp0, temp0, temp3);
3328   }
3329 
3330   // Assertions that must hold in order to compare strings 8 bytes at a time.
3331   DCHECK_ALIGNED(value_offset, 8);
3332   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
3333 
3334   constexpr size_t char_size = DataType::Size(DataType::Type::kUint16);
3335   static_assert(char_size == 2u, "Char expected to be 2 bytes wide");
3336 
3337   ScratchRegisterScope scratch_scope(assembler);
3338   XRegister temp4 = scratch_scope.AllocateXRegister();
3339 
3340   // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
3341   __ Bind(&loop);
3342   __ Add(temp4, str, temp1);
3343   __ Ld(temp4, temp4, 0);
3344   __ Add(temp2, arg, temp1);
3345   __ Ld(temp2, temp2, 0);
3346   __ Bne(temp4, temp2, &find_char_diff);
3347   __ Addi(temp1, temp1, char_size * 4);
3348   // With string compression, we have compared 8 bytes, otherwise 4 chars.
3349   __ Addi(temp0, temp0, (mirror::kUseStringCompression) ? -8 : -4);
3350   __ Bgtz(temp0, &loop);
3351   __ J(&end);
3352 
3353   // Find the single character difference.
3354   __ Bind(&find_char_diff);
3355   // Get the bit position of the first character that differs.
3356   __ Xor(temp1, temp2, temp4);
3357   __ Ctz(temp1, temp1);
3358 
3359   // If the number of chars remaining <= the index where the difference occurs (0-3), then
3360   // the difference occurs outside the remaining string data, so just return length diff (out).
3361   __ Srliw(temp1, temp1, (mirror::kUseStringCompression) ? 3 : 4);
3362   __ Ble(temp0, temp1, &end);
3363 
3364   // Extract the characters and calculate the difference.
3365   __ Slliw(temp1, temp1, (mirror::kUseStringCompression) ? 3 : 4);
3366   if (mirror:: kUseStringCompression) {
3367     __ Slliw(temp3, temp3, 3u);
3368     __ Andn(temp1, temp1, temp3);
3369   }
3370   __ Srl(temp2, temp2, temp1);
3371   __ Srl(temp4, temp4, temp1);
3372   if (mirror::kUseStringCompression) {
3373     __ Li(temp0, -256);           // ~0xff
3374     __ Sllw(temp0, temp0, temp3);  // temp3 = 0 or 8, temp0 := ~0xff or ~0xffff
3375     __ Andn(temp4, temp4, temp0);  // Extract 8 or 16 bits.
3376     __ Andn(temp2, temp2, temp0);  // Extract 8 or 16 bits.
3377   } else {
3378     __ ZextH(temp4, temp4);
3379     __ ZextH(temp2, temp2);
3380   }
3381 
3382   __ Subw(out, temp4, temp2);
3383 
3384   if (mirror::kUseStringCompression) {
3385     __ J(&end);
3386     __ Bind(&different_compression);
3387 
3388     // Comparison for different compression style.
3389     constexpr size_t c_char_size = DataType::Size(DataType::Type::kInt8);
3390     static_assert(c_char_size == 1u, "Compressed char expected to be 1 byte wide");
3391 
3392     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
3393     __ Xor(temp4, str, arg);
3394     __ Addi(temp3, temp3, -1);    // -1 if str is compressed, 0 otherwise
3395     __ And(temp2, temp4, temp3);  // str^arg if str is compressed, 0 otherwise
3396     __ Xor(temp1, temp2, arg);    // str if str is compressed, arg otherwise
3397     __ Xor(temp2, temp2, str);    // arg if str is compressed, str otherwise
3398 
3399     // We want to free up the temp3, currently holding `str` compression flag, for comparison.
3400     // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
3401     // as unsigned. This will allow `addi temp0, temp0, -2; bgtz different_compression_loop`
3402     // to serve as the loop condition.
3403     __ Sh1Add(temp0, temp0, temp3);
3404 
3405     // Adjust temp1 and temp2 from string pointers to data pointers.
3406     __ Addi(temp1, temp1, value_offset);
3407     __ Addi(temp2, temp2, value_offset);
3408 
3409     Riscv64Label different_compression_loop;
3410     Riscv64Label different_compression_diff;
3411 
3412     __ Bind(&different_compression_loop);
3413     __ Lbu(temp4, temp1, 0);
3414     __ Addiw(temp1, temp1, c_char_size);
3415     __ Lhu(temp3, temp2, 0);
3416     __ Addi(temp2, temp2, char_size);
3417     __ Sub(temp4, temp4, temp3);
3418     __ Bnez(temp4, &different_compression_diff);
3419     __ Addi(temp0, temp0, -2);
3420     __ Bgtz(temp0, &different_compression_loop);
3421     __ J(&end);
3422 
3423     // Calculate the difference.
3424     __ Bind(&different_compression_diff);
3425     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
3426                   "Expecting 0=compressed, 1=uncompressed");
3427     __ Andi(temp0, temp0, 1);
3428     __ Addi(temp0, temp0, -1);
3429     __ Xor(out, temp4, temp0);
3430     __ Sub(out, out, temp0);
3431   }
3432 
3433   __ Bind(&end);
3434 
3435   if (can_slow_path) {
3436     __ Bind(slow_path->GetExitLabel());
3437   }
3438 }
3439 
3440 class VarHandleSlowPathRISCV64 : public IntrinsicSlowPathRISCV64 {
3441  public:
VarHandleSlowPathRISCV64(HInvoke * invoke,std::memory_order order)3442   VarHandleSlowPathRISCV64(HInvoke* invoke, std::memory_order order)
3443       : IntrinsicSlowPathRISCV64(invoke),
3444         order_(order),
3445         return_success_(false),
3446         strong_(false),
3447         get_and_update_op_(GetAndUpdateOp::kAdd) {
3448   }
3449 
GetByteArrayViewCheckLabel()3450   Riscv64Label* GetByteArrayViewCheckLabel() {
3451     return &byte_array_view_check_label_;
3452   }
3453 
GetNativeByteOrderLabel()3454   Riscv64Label* GetNativeByteOrderLabel() {
3455     return &native_byte_order_label_;
3456   }
3457 
SetCompareAndSetOrExchangeArgs(bool return_success,bool strong)3458   void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
3459     if (return_success) {
3460       DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
3461     } else {
3462       DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
3463     }
3464     return_success_ = return_success;
3465     strong_ = strong;
3466   }
3467 
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)3468   void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
3469     DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
3470     get_and_update_op_ = get_and_update_op;
3471   }
3472 
EmitNativeCode(CodeGenerator * codegen_in)3473   void EmitNativeCode(CodeGenerator* codegen_in) override {
3474     if (GetByteArrayViewCheckLabel()->IsLinked()) {
3475       EmitByteArrayViewCode(codegen_in);
3476     }
3477     IntrinsicSlowPathRISCV64::EmitNativeCode(codegen_in);
3478   }
3479 
3480  private:
GetInvoke() const3481   HInvoke* GetInvoke() const {
3482     return GetInstruction()->AsInvoke();
3483   }
3484 
GetAccessModeTemplate() const3485   mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
3486     return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
3487   }
3488 
3489   void EmitByteArrayViewCode(CodeGenerator* codegen_in);
3490 
3491   Riscv64Label byte_array_view_check_label_;
3492   Riscv64Label native_byte_order_label_;
3493   // Shared parameter for all VarHandle intrinsics.
3494   std::memory_order order_;
3495   // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
3496   bool return_success_;
3497   bool strong_;
3498   // Extra argument for GenerateVarHandleGetAndUpdate().
3499   GetAndUpdateOp get_and_update_op_;
3500 };
3501 
3502 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorRISCV64 * codegen,SlowPathCodeRISCV64 * slow_path,XRegister object,XRegister type,bool object_can_be_null=true)3503 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorRISCV64* codegen,
3504                                                     SlowPathCodeRISCV64* slow_path,
3505                                                     XRegister object,
3506                                                     XRegister type,
3507                                                     bool object_can_be_null = true) {
3508   Riscv64Assembler* assembler = codegen->GetAssembler();
3509 
3510   const MemberOffset class_offset = mirror::Object::ClassOffset();
3511   const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
3512 
3513   Riscv64Label success;
3514   if (object_can_be_null) {
3515     __ Beqz(object, &success);
3516   }
3517 
3518   ScratchRegisterScope srs(assembler);
3519   XRegister temp = srs.AllocateXRegister();
3520 
3521   // Note: The `type` can be `TMP`. We're using "bare" local branches to enforce that they shall
3522   // not be expanded and the scrach register `TMP` shall not be clobbered if taken. Taking the
3523   // branch to the slow path can theoretically clobber `TMP` (if outside the 1 MiB range).
3524   __ Loadwu(temp, object, class_offset.Int32Value());
3525   codegen->MaybeUnpoisonHeapReference(temp);
3526   Riscv64Label loop;
3527   __ Bind(&loop);
3528   __ Beq(type, temp, &success, /*is_bare=*/ true);
3529   // We may not have another scratch register for `Loadwu()`. Use `Lwu()` directly.
3530   DCHECK(IsInt<12>(super_class_offset.Int32Value()));
3531   __ Lwu(temp, temp, super_class_offset.Int32Value());
3532   codegen->MaybeUnpoisonHeapReference(temp);
3533   __ Beqz(temp, slow_path->GetEntryLabel());
3534   __ J(&loop, /*is_bare=*/ true);
3535   __ Bind(&success);
3536 }
3537 
3538 // Check access mode and the primitive type from VarHandle.varType.
3539 // Check reference arguments against the VarHandle.varType; for references this is a subclass
3540 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,SlowPathCodeRISCV64 * slow_path,DataType::Type type)3541 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
3542                                                         CodeGeneratorRISCV64* codegen,
3543                                                         SlowPathCodeRISCV64* slow_path,
3544                                                         DataType::Type type) {
3545   mirror::VarHandle::AccessMode access_mode =
3546       mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3547   Primitive::Type primitive_type = DataTypeToPrimitive(type);
3548 
3549   Riscv64Assembler* assembler = codegen->GetAssembler();
3550   LocationSummary* locations = invoke->GetLocations();
3551   XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
3552 
3553   const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
3554   const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
3555   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3556 
3557   ScratchRegisterScope srs(assembler);
3558   XRegister temp = srs.AllocateXRegister();
3559   XRegister temp2 = srs.AllocateXRegister();
3560 
3561   // Check that the operation is permitted.
3562   __ Loadw(temp, varhandle, access_mode_bit_mask_offset.Int32Value());
3563   DCHECK_LT(enum_cast<uint32_t>(access_mode), 31u);  // We cannot avoid the shift below.
3564   __ Slliw(temp, temp, 31 - enum_cast<uint32_t>(access_mode));  // Shift tested bit to sign bit.
3565   __ Bgez(temp, slow_path->GetEntryLabel());  // If not permitted, go to slow path.
3566 
3567   // For primitive types, we do not need a read barrier when loading a reference only for loading
3568   // constant field through the reference. For reference types, we deliberately avoid the read
3569   // barrier, letting the slow path handle the false negatives.
3570   __ Loadwu(temp, varhandle, var_type_offset.Int32Value());
3571   codegen->MaybeUnpoisonHeapReference(temp);
3572 
3573   // Check the varType.primitiveType field against the type we're trying to use.
3574   __ Loadhu(temp2, temp, primitive_type_offset.Int32Value());
3575   if (primitive_type == Primitive::kPrimNot) {
3576     static_assert(Primitive::kPrimNot == 0);
3577     __ Bnez(temp2, slow_path->GetEntryLabel());
3578   } else {
3579     __ Li(temp, enum_cast<int32_t>(primitive_type));  // `temp` can be clobbered.
3580     __ Bne(temp2, temp, slow_path->GetEntryLabel());
3581   }
3582 
3583   srs.FreeXRegister(temp2);
3584 
3585   if (type == DataType::Type::kReference) {
3586     // Check reference arguments against the varType.
3587     // False negatives due to varType being an interface or array type
3588     // or due to the missing read barrier are handled by the slow path.
3589     size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3590     uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3591     uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3592     for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3593       HInstruction* arg = invoke->InputAt(arg_index);
3594       DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
3595       if (!arg->IsNullConstant()) {
3596         XRegister arg_reg = locations->InAt(arg_index).AsRegister<XRegister>();
3597         GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, temp);
3598       }
3599     }
3600   }
3601 }
3602 
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,SlowPathCodeRISCV64 * slow_path)3603 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
3604                                               CodeGeneratorRISCV64* codegen,
3605                                               SlowPathCodeRISCV64* slow_path) {
3606   Riscv64Assembler* assembler = codegen->GetAssembler();
3607   XRegister varhandle = invoke->GetLocations()->InAt(0).AsRegister<XRegister>();
3608 
3609   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3610 
3611   ScratchRegisterScope srs(assembler);
3612   XRegister temp = srs.AllocateXRegister();
3613 
3614   // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3615   // Do not emit read barrier (or unpoison the reference) for comparing to null.
3616   __ Loadwu(temp, varhandle, coordinate_type0_offset.Int32Value());
3617   __ Bnez(temp, slow_path->GetEntryLabel());
3618 }
3619 
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,SlowPathCodeRISCV64 * slow_path)3620 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3621                                                  CodeGeneratorRISCV64* codegen,
3622                                                  SlowPathCodeRISCV64* slow_path) {
3623   VarHandleOptimizations optimizations(invoke);
3624   Riscv64Assembler* assembler = codegen->GetAssembler();
3625   LocationSummary* locations = invoke->GetLocations();
3626   XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
3627   XRegister object = locations->InAt(1).AsRegister<XRegister>();
3628 
3629   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3630   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3631 
3632   // Null-check the object.
3633   if (!optimizations.GetSkipObjectNullCheck()) {
3634     __ Beqz(object, slow_path->GetEntryLabel());
3635   }
3636 
3637   if (!optimizations.GetUseKnownImageVarHandle()) {
3638     ScratchRegisterScope srs(assembler);
3639     XRegister temp = srs.AllocateXRegister();
3640 
3641     // Check that the VarHandle references an instance field by checking that
3642     // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
3643     // type compatibility check with the source object's type, which will fail for null.
3644     __ Loadwu(temp, varhandle, coordinate_type1_offset.Int32Value());
3645     // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3646     __ Bnez(temp, slow_path->GetEntryLabel());
3647 
3648     // Check that the object has the correct type.
3649     // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3650     __ Loadwu(temp, varhandle, coordinate_type0_offset.Int32Value());
3651     codegen->MaybeUnpoisonHeapReference(temp);
3652     GenerateSubTypeObjectCheckNoReadBarrier(
3653         codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
3654   }
3655 }
3656 
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,VarHandleSlowPathRISCV64 * slow_path)3657 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
3658                                          CodeGeneratorRISCV64* codegen,
3659                                          VarHandleSlowPathRISCV64* slow_path) {
3660   VarHandleOptimizations optimizations(invoke);
3661   Riscv64Assembler* assembler = codegen->GetAssembler();
3662   LocationSummary* locations = invoke->GetLocations();
3663   XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
3664   XRegister object = locations->InAt(1).AsRegister<XRegister>();
3665   XRegister index = locations->InAt(2).AsRegister<XRegister>();
3666   DataType::Type value_type =
3667       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3668   Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
3669 
3670   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3671   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3672   const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
3673   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3674   const MemberOffset class_offset = mirror::Object::ClassOffset();
3675   const MemberOffset array_length_offset = mirror::Array::LengthOffset();
3676 
3677   // Null-check the object.
3678   if (!optimizations.GetSkipObjectNullCheck()) {
3679     __ Beqz(object, slow_path->GetEntryLabel());
3680   }
3681 
3682   ScratchRegisterScope srs(assembler);
3683   XRegister temp = srs.AllocateXRegister();
3684   XRegister temp2 = srs.AllocateXRegister();
3685 
3686   // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
3687   // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
3688   // coordinateType0 shall not be null but we do not explicitly verify that.
3689   __ Loadwu(temp, varhandle, coordinate_type1_offset.Int32Value());
3690   // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3691   __ Beqz(temp, slow_path->GetEntryLabel());
3692 
3693   // Check object class against componentType0.
3694   //
3695   // This is an exact check and we defer other cases to the runtime. This includes
3696   // conversion to array of superclass references, which is valid but subsequently
3697   // requires all update operations to check that the value can indeed be stored.
3698   // We do not want to perform such extra checks in the intrinsified code.
3699   //
3700   // We do this check without read barrier, so there can be false negatives which we
3701   // defer to the slow path. There shall be no false negatives for array classes in the
3702   // boot image (including Object[] and primitive arrays) because they are non-movable.
3703   __ Loadwu(temp, varhandle, coordinate_type0_offset.Int32Value());
3704   __ Loadwu(temp2, object, class_offset.Int32Value());
3705   __ Bne(temp, temp2, slow_path->GetEntryLabel());
3706 
3707   // Check that the coordinateType0 is an array type. We do not need a read barrier
3708   // for loading constant reference fields (or chains of them) for comparison with null,
3709   // nor for finally loading a constant primitive field (primitive type) below.
3710   codegen->MaybeUnpoisonHeapReference(temp);
3711   __ Loadwu(temp2, temp, component_type_offset.Int32Value());
3712   codegen->MaybeUnpoisonHeapReference(temp2);
3713   __ Beqz(temp2, slow_path->GetEntryLabel());
3714 
3715   // Check that the array component type matches the primitive type.
3716   __ Loadhu(temp, temp2, primitive_type_offset.Int32Value());
3717   if (primitive_type == Primitive::kPrimNot) {
3718     static_assert(Primitive::kPrimNot == 0);
3719     __ Bnez(temp, slow_path->GetEntryLabel());
3720   } else {
3721     // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
3722     // we shall check for a byte array view in the slow path.
3723     // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
3724     // so we cannot emit that if we're JITting without boot image.
3725     bool boot_image_available =
3726         codegen->GetCompilerOptions().IsBootImage() ||
3727         !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
3728     bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
3729     Riscv64Label* slow_path_label =
3730         can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
3731     __ Li(temp2, enum_cast<int32_t>(primitive_type));
3732     __ Bne(temp, temp2, slow_path_label);
3733   }
3734 
3735   // Check for array index out of bounds.
3736   __ Loadw(temp, object, array_length_offset.Int32Value());
3737   __ Bgeu(index, temp, slow_path->GetEntryLabel());
3738 }
3739 
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,VarHandleSlowPathRISCV64 * slow_path)3740 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
3741                                               CodeGeneratorRISCV64* codegen,
3742                                               VarHandleSlowPathRISCV64* slow_path) {
3743   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3744   if (expected_coordinates_count == 0u) {
3745     GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
3746   } else if (expected_coordinates_count == 1u) {
3747     GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
3748   } else {
3749     DCHECK_EQ(expected_coordinates_count, 2u);
3750     GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
3751   }
3752 }
3753 
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,DataType::Type type)3754 static VarHandleSlowPathRISCV64* GenerateVarHandleChecks(HInvoke* invoke,
3755                                                          CodeGeneratorRISCV64* codegen,
3756                                                          std::memory_order order,
3757                                                          DataType::Type type) {
3758   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3759   VarHandleOptimizations optimizations(invoke);
3760   if (optimizations.GetUseKnownImageVarHandle()) {
3761     DCHECK_NE(expected_coordinates_count, 2u);
3762     if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
3763       return nullptr;
3764     }
3765   }
3766 
3767   VarHandleSlowPathRISCV64* slow_path =
3768       new (codegen->GetScopedAllocator()) VarHandleSlowPathRISCV64(invoke, order);
3769   codegen->AddSlowPath(slow_path);
3770 
3771   if (!optimizations.GetUseKnownImageVarHandle()) {
3772     GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
3773   }
3774   GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
3775 
3776   return slow_path;
3777 }
3778 
3779 struct VarHandleTarget {
3780   XRegister object;  // The object holding the value to operate on.
3781   XRegister offset;  // The offset of the value to operate on.
3782 };
3783 
GetVarHandleTarget(HInvoke * invoke)3784 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
3785   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3786   LocationSummary* locations = invoke->GetLocations();
3787 
3788   VarHandleTarget target;
3789   // The temporary allocated for loading the offset.
3790   target.offset = locations->GetTemp(0u).AsRegister<XRegister>();
3791   // The reference to the object that holds the value to operate on.
3792   target.object = (expected_coordinates_count == 0u)
3793       ? locations->GetTemp(1u).AsRegister<XRegister>()
3794       : locations->InAt(1).AsRegister<XRegister>();
3795   return target;
3796 }
3797 
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorRISCV64 * codegen)3798 static void GenerateVarHandleTarget(HInvoke* invoke,
3799                                     const VarHandleTarget& target,
3800                                     CodeGeneratorRISCV64* codegen) {
3801   Riscv64Assembler* assembler = codegen->GetAssembler();
3802   LocationSummary* locations = invoke->GetLocations();
3803   XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
3804   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3805 
3806   if (expected_coordinates_count <= 1u) {
3807     if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
3808       ScopedObjectAccess soa(Thread::Current());
3809       ArtField* target_field = GetImageVarHandleField(invoke);
3810       if (expected_coordinates_count == 0u) {
3811         ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
3812         if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
3813           uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(declaring_class);
3814           codegen->LoadBootImageRelRoEntry(target.object, boot_image_offset);
3815         } else {
3816           codegen->LoadTypeForBootImageIntrinsic(
3817               target.object,
3818               TypeReference(&declaring_class->GetDexFile(), declaring_class->GetDexTypeIndex()));
3819         }
3820       }
3821       __ Li(target.offset, target_field->GetOffset().Uint32Value());
3822     } else {
3823       // For static fields, we need to fill the `target.object` with the declaring class,
3824       // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
3825       // we do not need the declaring class, so we can forget the `ArtField*` when
3826       // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
3827       XRegister field = (expected_coordinates_count == 0) ? target.object : target.offset;
3828 
3829       const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
3830       const MemberOffset offset_offset = ArtField::OffsetOffset();
3831 
3832       // Load the ArtField*, the offset and, if needed, declaring class.
3833       __ Loadd(field, varhandle, art_field_offset.Int32Value());
3834       __ Loadwu(target.offset, field, offset_offset.Int32Value());
3835       if (expected_coordinates_count == 0u) {
3836         codegen->GenerateGcRootFieldLoad(
3837             invoke,
3838             Location::RegisterLocation(target.object),
3839             field,
3840             ArtField::DeclaringClassOffset().Int32Value(),
3841             codegen->GetCompilerReadBarrierOption());
3842       }
3843     }
3844   } else {
3845     DCHECK_EQ(expected_coordinates_count, 2u);
3846     DataType::Type value_type =
3847         GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3848     MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
3849 
3850     XRegister index = locations->InAt(2).AsRegister<XRegister>();
3851     __ Li(target.offset, data_offset.Int32Value());
3852     codegen->GetInstructionVisitor()->ShNAdd(target.offset, index, target.offset, value_type);
3853   }
3854 }
3855 
CreateVarHandleCommonLocations(HInvoke * invoke,CodeGeneratorRISCV64 * codegen)3856 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke,
3857                                                        CodeGeneratorRISCV64* codegen) {
3858   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3859   DataType::Type return_type = invoke->GetType();
3860 
3861   ArenaAllocator* allocator = codegen->GetGraph()->GetAllocator();
3862   LocationSummary* locations =
3863       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3864   locations->SetInAt(0, Location::RequiresRegister());
3865   // Require coordinates in registers. These are the object holding the value
3866   // to operate on (except for static fields) and index (for arrays and views).
3867   for (size_t i = 0; i != expected_coordinates_count; ++i) {
3868     locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
3869   }
3870   if (return_type != DataType::Type::kVoid) {
3871     if (DataType::IsFloatingPointType(return_type)) {
3872       locations->SetOut(Location::RequiresFpuRegister());
3873     } else {
3874       locations->SetOut(Location::RequiresRegister());
3875     }
3876   }
3877   uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3878   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3879   for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3880     HInstruction* arg = invoke->InputAt(arg_index);
3881     if (IsZeroBitPattern(arg)) {
3882       locations->SetInAt(arg_index, Location::ConstantLocation(arg));
3883     } else if (DataType::IsFloatingPointType(arg->GetType())) {
3884       locations->SetInAt(arg_index, Location::RequiresFpuRegister());
3885     } else {
3886       locations->SetInAt(arg_index, Location::RequiresRegister());
3887     }
3888   }
3889 
3890   // Add a temporary for offset.
3891   if (codegen->EmitNonBakerReadBarrier() &&
3892       GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {  // For static fields.
3893     // To preserve the offset value across the non-Baker read barrier slow path
3894     // for loading the declaring class, use a fixed callee-save register.
3895     constexpr int first_callee_save = CTZ(kRiscv64CalleeSaveRefSpills);
3896     locations->AddTemp(Location::RegisterLocation(first_callee_save));
3897   } else {
3898     locations->AddTemp(Location::RequiresRegister());
3899   }
3900   if (expected_coordinates_count == 0u) {
3901     // Add a temporary to hold the declaring class.
3902     locations->AddTemp(Location::RequiresRegister());
3903   }
3904 
3905   return locations;
3906 }
3907 
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorRISCV64 * codegen)3908 static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorRISCV64* codegen) {
3909   VarHandleOptimizations optimizations(invoke);
3910   if (optimizations.GetDoNotIntrinsify()) {
3911     return;
3912   }
3913 
3914   if (codegen->EmitNonBakerReadBarrier() &&
3915       invoke->GetType() == DataType::Type::kReference &&
3916       invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
3917       invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
3918     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
3919     // the passed reference and reloads it from the field. This gets the memory visibility
3920     // wrong for Acquire/Volatile operations. b/173104084
3921     return;
3922   }
3923 
3924   CreateVarHandleCommonLocations(invoke, codegen);
3925 }
3926 
IntTypeForFloatingPointType(DataType::Type fp_type)3927 DataType::Type IntTypeForFloatingPointType(DataType::Type fp_type) {
3928   DCHECK(DataType::IsFloatingPointType(fp_type));
3929   return (fp_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64;
3930 }
3931 
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,bool byte_swap=false)3932 static void GenerateVarHandleGet(HInvoke* invoke,
3933                                  CodeGeneratorRISCV64* codegen,
3934                                  std::memory_order order,
3935                                  bool byte_swap = false) {
3936   DataType::Type type = invoke->GetType();
3937   DCHECK_NE(type, DataType::Type::kVoid);
3938 
3939   LocationSummary* locations = invoke->GetLocations();
3940   Riscv64Assembler* assembler = codegen->GetAssembler();
3941   Location out = locations->Out();
3942 
3943   VarHandleTarget target = GetVarHandleTarget(invoke);
3944   VarHandleSlowPathRISCV64* slow_path = nullptr;
3945   if (!byte_swap) {
3946     slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
3947     GenerateVarHandleTarget(invoke, target, codegen);
3948     if (slow_path != nullptr) {
3949       __ Bind(slow_path->GetNativeByteOrderLabel());
3950     }
3951   }
3952 
3953   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
3954   bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
3955   DCHECK(acquire_barrier || order == std::memory_order_relaxed);
3956 
3957   if (seq_cst_barrier) {
3958     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3959   }
3960 
3961   // Load the value from the target location.
3962   if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
3963     Location index = Location::RegisterLocation(target.offset);
3964     // TODO(riscv64): Revisit when we add checking if the holder is black.
3965     Location temp = Location::NoLocation();
3966     codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
3967                                                        out,
3968                                                        target.object,
3969                                                        /*offset=*/ 0,
3970                                                        index,
3971                                                        temp,
3972                                                        /*needs_null_check=*/ false);
3973     DCHECK(!byte_swap);
3974   } else {
3975     ScratchRegisterScope srs(assembler);
3976     XRegister address = srs.AllocateXRegister();
3977     __ Add(address, target.object, target.offset);
3978     Location load_loc = out;
3979     DataType::Type load_type = type;
3980     if (byte_swap && DataType::IsFloatingPointType(type)) {
3981       load_loc = Location::RegisterLocation(target.offset);  // Load to the offset temporary.
3982       load_type = IntTypeForFloatingPointType(type);
3983     }
3984     codegen->GetInstructionVisitor()->Load(load_loc, address, /*offset=*/ 0, load_type);
3985     if (type == DataType::Type::kReference) {
3986       DCHECK(!byte_swap);
3987       Location object_loc = Location::RegisterLocation(target.object);
3988       Location offset_loc = Location::RegisterLocation(target.offset);
3989       codegen->MaybeGenerateReadBarrierSlow(
3990           invoke, out, out, object_loc, /*offset=*/ 0u, /*index=*/ offset_loc);
3991     } else if (byte_swap) {
3992       GenerateReverseBytes(codegen, out, load_loc.AsRegister<XRegister>(), type);
3993     }
3994   }
3995 
3996   if (acquire_barrier) {
3997     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3998   }
3999 
4000   if (slow_path != nullptr) {
4001     DCHECK(!byte_swap);
4002     __ Bind(slow_path->GetExitLabel());
4003   }
4004 }
4005 
VisitVarHandleGet(HInvoke * invoke)4006 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGet(HInvoke* invoke) {
4007   CreateVarHandleGetLocations(invoke, codegen_);
4008 }
4009 
VisitVarHandleGet(HInvoke * invoke)4010 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGet(HInvoke* invoke) {
4011   GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
4012 }
4013 
VisitVarHandleGetOpaque(HInvoke * invoke)4014 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4015   CreateVarHandleGetLocations(invoke, codegen_);
4016 }
4017 
VisitVarHandleGetOpaque(HInvoke * invoke)4018 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4019   GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
4020 }
4021 
VisitVarHandleGetAcquire(HInvoke * invoke)4022 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4023   CreateVarHandleGetLocations(invoke, codegen_);
4024 }
4025 
VisitVarHandleGetAcquire(HInvoke * invoke)4026 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4027   GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire);
4028 }
4029 
VisitVarHandleGetVolatile(HInvoke * invoke)4030 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4031   CreateVarHandleGetLocations(invoke, codegen_);
4032 }
4033 
VisitVarHandleGetVolatile(HInvoke * invoke)4034 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4035   GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst);
4036 }
4037 
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorRISCV64 * codegen)4038 static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorRISCV64* codegen) {
4039   VarHandleOptimizations optimizations(invoke);
4040   if (optimizations.GetDoNotIntrinsify()) {
4041     return;
4042   }
4043 
4044   CreateVarHandleCommonLocations(invoke, codegen);
4045   if (kPoisonHeapReferences && invoke->GetLocations() != nullptr) {
4046     LocationSummary* locations = invoke->GetLocations();
4047     uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4048     DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4049     if (value_type == DataType::Type::kReference && !locations->InAt(value_index).IsConstant()) {
4050       locations->AddTemp(Location::RequiresRegister());
4051     }
4052   }
4053 }
4054 
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,bool byte_swap=false)4055 static void GenerateVarHandleSet(HInvoke* invoke,
4056                                  CodeGeneratorRISCV64* codegen,
4057                                  std::memory_order order,
4058                                  bool byte_swap = false) {
4059   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4060   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4061 
4062   Riscv64Assembler* assembler = codegen->GetAssembler();
4063   Location value = invoke->GetLocations()->InAt(value_index);
4064 
4065   VarHandleTarget target = GetVarHandleTarget(invoke);
4066   VarHandleSlowPathRISCV64* slow_path = nullptr;
4067   if (!byte_swap) {
4068     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4069     GenerateVarHandleTarget(invoke, target, codegen);
4070     if (slow_path != nullptr) {
4071       __ Bind(slow_path->GetNativeByteOrderLabel());
4072     }
4073   }
4074 
4075   {
4076     ScratchRegisterScope srs(assembler);
4077     // Heap poisoning needs two scratch registers in `Store()`, except for null constants.
4078     XRegister address =
4079         (kPoisonHeapReferences && value_type == DataType::Type::kReference && !value.IsConstant())
4080             ? invoke->GetLocations()->GetTemp(0).AsRegister<XRegister>()
4081             : srs.AllocateXRegister();
4082     __ Add(address, target.object, target.offset);
4083 
4084     if (byte_swap) {
4085       DCHECK(!value.IsConstant());  // Zero uses the main path as it does not need a byte swap.
4086       // The offset is no longer needed, so reuse the offset temporary for the byte-swapped value.
4087       Location new_value = Location::RegisterLocation(target.offset);
4088       if (DataType::IsFloatingPointType(value_type)) {
4089         value_type = IntTypeForFloatingPointType(value_type);
4090         codegen->MoveLocation(new_value, value, value_type);
4091         value = new_value;
4092       }
4093       GenerateReverseBytes(codegen, new_value, value.AsRegister<XRegister>(), value_type);
4094       value = new_value;
4095     }
4096 
4097     GenerateSet(codegen, order, value, address, /*offset=*/ 0, value_type);
4098   }
4099 
4100   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
4101     codegen->MaybeMarkGCCard(
4102         target.object, value.AsRegister<XRegister>(), /* emit_null_check= */ true);
4103   }
4104 
4105   if (slow_path != nullptr) {
4106     DCHECK(!byte_swap);
4107     __ Bind(slow_path->GetExitLabel());
4108   }
4109 }
4110 
VisitVarHandleSet(HInvoke * invoke)4111 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSet(HInvoke* invoke) {
4112   CreateVarHandleSetLocations(invoke, codegen_);
4113 }
4114 
VisitVarHandleSet(HInvoke * invoke)4115 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSet(HInvoke* invoke) {
4116   GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
4117 }
4118 
VisitVarHandleSetOpaque(HInvoke * invoke)4119 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4120   CreateVarHandleSetLocations(invoke, codegen_);
4121 }
4122 
VisitVarHandleSetOpaque(HInvoke * invoke)4123 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4124   GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
4125 }
4126 
VisitVarHandleSetRelease(HInvoke * invoke)4127 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSetRelease(HInvoke* invoke) {
4128   CreateVarHandleSetLocations(invoke, codegen_);
4129 }
4130 
VisitVarHandleSetRelease(HInvoke * invoke)4131 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSetRelease(HInvoke* invoke) {
4132   GenerateVarHandleSet(invoke, codegen_, std::memory_order_release);
4133 }
4134 
VisitVarHandleSetVolatile(HInvoke * invoke)4135 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4136   CreateVarHandleSetLocations(invoke, codegen_);
4137 }
4138 
VisitVarHandleSetVolatile(HInvoke * invoke)4139 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4140   GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst);
4141 }
4142 
ScratchXRegisterNeeded(Location loc,DataType::Type type,bool byte_swap)4143 static bool ScratchXRegisterNeeded(Location loc, DataType::Type type, bool byte_swap) {
4144   if (loc.IsConstant()) {
4145     DCHECK(loc.GetConstant()->IsZeroBitPattern());
4146     return false;
4147   }
4148   return DataType::IsFloatingPointType(type) || DataType::Size(type) < 4u || byte_swap;
4149 }
4150 
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,bool return_success)4151 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
4152                                                             CodeGeneratorRISCV64* codegen,
4153                                                             bool return_success) {
4154   VarHandleOptimizations optimizations(invoke);
4155   if (optimizations.GetDoNotIntrinsify()) {
4156     return;
4157   }
4158 
4159   uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
4160   uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
4161   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4162   DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
4163 
4164   bool is_reference = (value_type == DataType::Type::kReference);
4165   if (is_reference && codegen->EmitNonBakerReadBarrier()) {
4166     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4167     // the passed reference and reloads it from the field. This breaks the read barriers
4168     // in slow path in different ways. The marked old value may not actually be a to-space
4169     // reference to the same object as `old_value`, breaking slow path assumptions. And
4170     // for CompareAndExchange, marking the old value after comparison failure may actually
4171     // return the reference to `expected`, erroneously indicating success even though we
4172     // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
4173     return;
4174   }
4175 
4176   // TODO(riscv64): Fix this intrinsic for heap poisoning configuration.
4177   if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
4178     return;
4179   }
4180 
4181   LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
4182   DCHECK_EQ(expected_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
4183 
4184   if (codegen->EmitNonBakerReadBarrier()) {
4185     // We need callee-save registers for both the class object and offset instead of
4186     // the temporaries reserved in CreateVarHandleCommonLocations().
4187     static_assert(POPCOUNT(kRiscv64CalleeSaveRefSpills) >= 2u);
4188     uint32_t first_callee_save = CTZ(kRiscv64CalleeSaveRefSpills);
4189     uint32_t second_callee_save = CTZ(kRiscv64CalleeSaveRefSpills ^ (1u << first_callee_save));
4190     if (expected_index == 1u) {  // For static fields.
4191       DCHECK_EQ(locations->GetTempCount(), 2u);
4192       DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4193       DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
4194       locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
4195     } else {
4196       DCHECK_EQ(locations->GetTempCount(), 1u);
4197       DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4198       locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
4199     }
4200   }
4201 
4202   size_t old_temp_count = locations->GetTempCount();
4203   DCHECK_EQ(old_temp_count, (expected_index == 1u) ? 2u : 1u);
4204   Location expected = locations->InAt(expected_index);
4205   Location new_value = locations->InAt(new_value_index);
4206   size_t data_size = DataType::Size(value_type);
4207   bool is_small = (data_size < 4u);
4208   bool can_byte_swap =
4209       (expected_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u);
4210   bool is_fp = DataType::IsFloatingPointType(value_type);
4211   size_t temps_needed =
4212       // The offset temp is used for the `tmp_ptr`, except for the read barrier case. For read
4213       // barrier we must preserve the offset and class pointer (if any) for the slow path and
4214       // use a separate temp for `tmp_ptr` and we also need another temp for `old_value_temp`.
4215       ((is_reference && codegen->EmitReadBarrier()) ? old_temp_count + 2u : 1u) +
4216       // For small values, we need a temp for the `mask`, `masked` and maybe also for the `shift`.
4217       (is_small ? (return_success ? 2u : 3u) : 0u) +
4218       // Some cases need modified copies of `new_value` and `expected`.
4219       (ScratchXRegisterNeeded(expected, value_type, can_byte_swap) ? 1u : 0u) +
4220       (ScratchXRegisterNeeded(new_value, value_type, can_byte_swap) ? 1u : 0u) +
4221       // We need a scratch register either for the old value or for the result of SC.
4222       // If we need to return a floating point old value, we need a temp for each.
4223       ((!return_success && is_fp) ? 2u : 1u);
4224   size_t scratch_registers_available = 2u;
4225   DCHECK_EQ(scratch_registers_available,
4226             ScratchRegisterScope(codegen->GetAssembler()).AvailableXRegisters());
4227   if (temps_needed > old_temp_count + scratch_registers_available) {
4228     locations->AddRegisterTemps(temps_needed - (old_temp_count + scratch_registers_available));
4229   }
4230 }
4231 
PrepareXRegister(CodeGeneratorRISCV64 * codegen,Location loc,DataType::Type type,XRegister shift,XRegister mask,bool byte_swap,ScratchRegisterScope * srs)4232 static XRegister PrepareXRegister(CodeGeneratorRISCV64* codegen,
4233                                   Location loc,
4234                                   DataType::Type type,
4235                                   XRegister shift,
4236                                   XRegister mask,
4237                                   bool byte_swap,
4238                                   ScratchRegisterScope* srs) {
4239   DCHECK_IMPLIES(mask != kNoXRegister, shift != kNoXRegister);
4240   DCHECK_EQ(shift == kNoXRegister, DataType::Size(type) >= 4u);
4241   if (loc.IsConstant()) {
4242     // The `shift`/`mask` and `byte_swap` are irrelevant for zero input.
4243     DCHECK(loc.GetConstant()->IsZeroBitPattern());
4244     return Zero;
4245   }
4246 
4247   Location result = loc;
4248   if (DataType::IsFloatingPointType(type)) {
4249     type = IntTypeForFloatingPointType(type);
4250     result = Location::RegisterLocation(srs->AllocateXRegister());
4251     codegen->MoveLocation(result, loc, type);
4252     loc = result;
4253   } else if (byte_swap || shift != kNoXRegister) {
4254     result = Location::RegisterLocation(srs->AllocateXRegister());
4255   }
4256   if (byte_swap) {
4257     if (type == DataType::Type::kInt16) {
4258       type = DataType::Type::kUint16;  // Do the masking as part of the byte swap.
4259     }
4260     GenerateReverseBytes(codegen, result, loc.AsRegister<XRegister>(), type);
4261     loc = result;
4262   }
4263   if (shift != kNoXRegister) {
4264     Riscv64Assembler* assembler = codegen->GetAssembler();
4265     __ Sllw(result.AsRegister<XRegister>(), loc.AsRegister<XRegister>(), shift);
4266     DCHECK_NE(type, DataType::Type::kUint8);
4267     if (mask != kNoXRegister && type != DataType::Type::kUint16 && type != DataType::Type::kBool) {
4268       __ And(result.AsRegister<XRegister>(), result.AsRegister<XRegister>(), mask);
4269     }
4270   }
4271   return result.AsRegister<XRegister>();
4272 }
4273 
GenerateByteSwapAndExtract(CodeGeneratorRISCV64 * codegen,Location rd,XRegister rs1,XRegister shift,DataType::Type type)4274 static void GenerateByteSwapAndExtract(CodeGeneratorRISCV64* codegen,
4275                                        Location rd,
4276                                        XRegister rs1,
4277                                        XRegister shift,
4278                                        DataType::Type type) {
4279   // Apply shift before `GenerateReverseBytes()` for small types.
4280   DCHECK_EQ(shift != kNoXRegister, DataType::Size(type) < 4u);
4281   if (shift != kNoXRegister) {
4282     Riscv64Assembler* assembler = codegen->GetAssembler();
4283     __ Srlw(rd.AsRegister<XRegister>(), rs1, shift);
4284     rs1 = rd.AsRegister<XRegister>();
4285   }
4286   // Also handles moving to FP registers.
4287   GenerateReverseBytes(codegen, rd, rs1, type);
4288 }
4289 
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,bool return_success,bool strong,bool byte_swap=false)4290 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4291                                                      CodeGeneratorRISCV64* codegen,
4292                                                      std::memory_order order,
4293                                                      bool return_success,
4294                                                      bool strong,
4295                                                      bool byte_swap = false) {
4296   DCHECK(return_success || strong);
4297 
4298   uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
4299   uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
4300   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4301   DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
4302 
4303   Riscv64Assembler* assembler = codegen->GetAssembler();
4304   LocationSummary* locations = invoke->GetLocations();
4305   Location expected = locations->InAt(expected_index);
4306   Location new_value = locations->InAt(new_value_index);
4307   Location out = locations->Out();
4308 
4309   VarHandleTarget target = GetVarHandleTarget(invoke);
4310   VarHandleSlowPathRISCV64* slow_path = nullptr;
4311   if (!byte_swap) {
4312     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4313     GenerateVarHandleTarget(invoke, target, codegen);
4314     if (slow_path != nullptr) {
4315       slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
4316       __ Bind(slow_path->GetNativeByteOrderLabel());
4317     }
4318   }
4319 
4320   // This needs to be before we allocate the scratch registers, as MarkGCCard also uses them.
4321   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
4322     // Mark card for object assuming new value is stored.
4323     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
4324     codegen->MaybeMarkGCCard(
4325         target.object, new_value.AsRegister<XRegister>(), new_value_can_be_null);
4326   }
4327 
4328   // Scratch registers may be needed for `new_value` and `expected`.
4329   ScratchRegisterScope srs(assembler);
4330   DCHECK_EQ(srs.AvailableXRegisters(), 2u);
4331   size_t available_scratch_registers =
4332       (ScratchXRegisterNeeded(expected, value_type, byte_swap) ? 0u : 1u) +
4333       (ScratchXRegisterNeeded(new_value, value_type, byte_swap) ? 0u : 1u);
4334 
4335   // Reuse the `offset` temporary for the pointer to the target location,
4336   // except for references that need the offset for the read barrier.
4337   DCHECK_EQ(target.offset, locations->GetTemp(0u).AsRegister<XRegister>());
4338   size_t next_temp = 1u;
4339   XRegister tmp_ptr = target.offset;
4340   bool is_reference = (value_type == DataType::Type::kReference);
4341   if (is_reference && codegen->EmitReadBarrier()) {
4342     // Reserve scratch registers for `tmp_ptr` and `old_value_temp`.
4343     DCHECK_EQ(available_scratch_registers, 2u);
4344     available_scratch_registers = 0u;
4345     DCHECK_EQ(expected_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
4346     next_temp = expected_index == 1u ? 2u : 1u;  // Preserve the class register for static field.
4347     tmp_ptr = srs.AllocateXRegister();
4348   }
4349   __ Add(tmp_ptr, target.object, target.offset);
4350 
4351   auto get_temp = [&]() {
4352     if (available_scratch_registers != 0u) {
4353       available_scratch_registers -= 1u;
4354       return srs.AllocateXRegister();
4355     } else {
4356       XRegister temp = locations->GetTemp(next_temp).AsRegister<XRegister>();
4357       next_temp += 1u;
4358       return temp;
4359     }
4360   };
4361 
4362   XRegister shift = kNoXRegister;
4363   XRegister mask = kNoXRegister;
4364   XRegister masked = kNoXRegister;
4365   size_t data_size = DataType::Size(value_type);
4366   bool is_small = (data_size < 4u);
4367   if (is_small) {
4368     // When returning "success" and not the old value, we shall not need the `shift` after
4369     // the raw CAS operation, so use the output register as a temporary here.
4370     shift = return_success ? locations->Out().AsRegister<XRegister>() : get_temp();
4371     mask = get_temp();
4372     masked = get_temp();
4373     // Upper bits of the shift are not used, so we do not need to clear them.
4374     __ Slli(shift, tmp_ptr, WhichPowerOf2(kBitsPerByte));
4375     __ Andi(tmp_ptr, tmp_ptr, -4);
4376     __ Li(mask, (1 << (data_size * kBitsPerByte)) - 1);
4377     __ Sllw(mask, mask, shift);
4378   }
4379 
4380   // Move floating point values to scratch registers and apply shift, mask and byte swap if needed.
4381   // Note that float/double CAS uses bitwise comparison, rather than the operator==.
4382   XRegister expected_reg =
4383       PrepareXRegister(codegen, expected, value_type, shift, mask, byte_swap, &srs);
4384   XRegister new_value_reg =
4385       PrepareXRegister(codegen, new_value, value_type, shift, mask, byte_swap, &srs);
4386   bool is_fp = DataType::IsFloatingPointType(value_type);
4387   DataType::Type cas_type = is_fp
4388       ? IntTypeForFloatingPointType(value_type)
4389       : (is_small ? DataType::Type::kInt32 : value_type);
4390 
4391   // Prepare registers for old value and the result of the store conditional.
4392   XRegister old_value;
4393   XRegister store_result;
4394   if (return_success) {
4395     // Use a temp for the old value.
4396     old_value = get_temp();
4397     // For strong CAS, use the `old_value` temp also for the SC result.
4398     // For weak CAS, put the SC result directly to `out`.
4399     store_result = strong ? old_value : out.AsRegister<XRegister>();
4400   } else if (is_fp) {
4401     // We need two temporary registers.
4402     old_value = get_temp();
4403     store_result = get_temp();
4404   } else {
4405     // Use the output register for the old value and a temp for the store conditional result.
4406     old_value = out.AsRegister<XRegister>();
4407     store_result = get_temp();
4408   }
4409 
4410   Riscv64Label exit_loop_label;
4411   Riscv64Label* exit_loop = &exit_loop_label;
4412   Riscv64Label* cmp_failure = &exit_loop_label;
4413 
4414   ReadBarrierCasSlowPathRISCV64* rb_slow_path = nullptr;
4415   if (is_reference && codegen->EmitReadBarrier()) {
4416     // The `old_value_temp` is used first for marking the `old_value` and then for the unmarked
4417     // reloaded old value for subsequent CAS in the slow path. We make this a scratch register
4418     // as we do have marking entrypoints on riscv64 even for scratch registers.
4419     XRegister old_value_temp = srs.AllocateXRegister();
4420     // For strong CAS, use the `old_value_temp` also for the SC result as the reloaded old value
4421     // is no longer needed after the comparison. For weak CAS, store the SC result in the same
4422     // result register as the main path.
4423     // Note that for a strong CAS, a SC failure in the slow path can set the register to 1, so
4424     // we cannot use that register to indicate success without resetting it to 0 at the start of
4425     // the retry loop. Instead, we return to the success indicating instruction in the main path.
4426     XRegister slow_path_store_result = strong ? old_value_temp : store_result;
4427     rb_slow_path = new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathRISCV64(
4428         invoke,
4429         order,
4430         strong,
4431         target.object,
4432         target.offset,
4433         expected_reg,
4434         new_value_reg,
4435         old_value,
4436         old_value_temp,
4437         slow_path_store_result,
4438         /*update_old_value=*/ !return_success,
4439         codegen);
4440     codegen->AddSlowPath(rb_slow_path);
4441     exit_loop = rb_slow_path->GetExitLabel();
4442     cmp_failure = rb_slow_path->GetEntryLabel();
4443   }
4444 
4445   if (return_success) {
4446     // Pre-populate the output register with failure for the case when the old value
4447     // differs and we do not execute the store conditional.
4448     __ Li(out.AsRegister<XRegister>(), 0);
4449   }
4450   GenerateCompareAndSet(codegen->GetAssembler(),
4451                         cas_type,
4452                         order,
4453                         strong,
4454                         cmp_failure,
4455                         tmp_ptr,
4456                         new_value_reg,
4457                         old_value,
4458                         mask,
4459                         masked,
4460                         store_result,
4461                         expected_reg);
4462   if (return_success && strong) {
4463     if (rb_slow_path != nullptr) {
4464       // Slow path returns here on success.
4465       __ Bind(rb_slow_path->GetSuccessExitLabel());
4466     }
4467     // Load success value to the output register.
4468     // `GenerateCompareAndSet()` does not emit code to indicate success for a strong CAS.
4469     __ Li(out.AsRegister<XRegister>(), 1);
4470   } else if (rb_slow_path != nullptr) {
4471     DCHECK(!rb_slow_path->GetSuccessExitLabel()->IsLinked());
4472   }
4473   __ Bind(exit_loop);
4474 
4475   if (return_success) {
4476     // Nothing to do, the result register already contains 1 on success and 0 on failure.
4477   } else if (byte_swap) {
4478     DCHECK_IMPLIES(is_small, out.AsRegister<XRegister>() == old_value)
4479         << " " << value_type << " " << out.AsRegister<XRegister>() << "!=" << old_value;
4480     GenerateByteSwapAndExtract(codegen, out, old_value, shift, value_type);
4481   } else if (is_fp) {
4482     codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type);
4483   } else if (is_small) {
4484     __ Srlw(old_value, masked, shift);
4485     if (value_type == DataType::Type::kInt8) {
4486       __ SextB(old_value, old_value);
4487     } else if (value_type == DataType::Type::kInt16) {
4488       __ SextH(old_value, old_value);
4489     }
4490   }
4491 
4492   if (slow_path != nullptr) {
4493     DCHECK(!byte_swap);
4494     __ Bind(slow_path->GetExitLabel());
4495   }
4496 
4497   // Check that we have allocated the right number of temps. We may need more registers
4498   // for byte swapped CAS in the slow path, so skip this check for the main path in that case.
4499   bool has_byte_swap = (expected_index == 3u) && (!is_reference && data_size != 1u);
4500   if ((!has_byte_swap || byte_swap) && next_temp != locations->GetTempCount()) {
4501     // We allocate a temporary register for the class object for a static field `VarHandle` but
4502     // we do not update the `next_temp` if it's otherwise unused after the address calculation.
4503     CHECK_EQ(expected_index, 1u);
4504     CHECK_EQ(next_temp, 1u);
4505     CHECK_EQ(locations->GetTempCount(), 2u);
4506   }
4507 }
4508 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4509 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4510   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
4511 }
4512 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4513 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4514   GenerateVarHandleCompareAndSetOrExchange(
4515       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
4516 }
4517 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4518 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4519   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
4520 }
4521 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4522 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4523   GenerateVarHandleCompareAndSetOrExchange(
4524       invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
4525 }
4526 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4527 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4528   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
4529 }
4530 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4531 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4532   GenerateVarHandleCompareAndSetOrExchange(
4533       invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
4534 }
4535 
VisitVarHandleCompareAndSet(HInvoke * invoke)4536 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4537   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
4538 }
4539 
VisitVarHandleCompareAndSet(HInvoke * invoke)4540 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4541   GenerateVarHandleCompareAndSetOrExchange(
4542       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
4543 }
4544 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4545 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4546   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
4547 }
4548 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4549 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4550   GenerateVarHandleCompareAndSetOrExchange(
4551       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
4552 }
4553 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4554 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4555   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
4556 }
4557 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4558 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4559   GenerateVarHandleCompareAndSetOrExchange(
4560       invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
4561 }
4562 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4563 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4564   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
4565 }
4566 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4567 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4568   GenerateVarHandleCompareAndSetOrExchange(
4569       invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
4570 }
4571 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4572 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4573   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
4574 }
4575 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4576 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4577   GenerateVarHandleCompareAndSetOrExchange(
4578       invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
4579 }
4580 
CreateVarHandleGetAndUpdateLocations(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,GetAndUpdateOp get_and_update_op)4581 static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
4582                                                  CodeGeneratorRISCV64* codegen,
4583                                                  GetAndUpdateOp get_and_update_op) {
4584   VarHandleOptimizations optimizations(invoke);
4585   if (optimizations.GetDoNotIntrinsify()) {
4586     return;
4587   }
4588 
4589   // Get the type from the shorty as the invokes may not return a value.
4590   uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
4591   DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
4592   DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
4593   if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
4594     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4595     // the passed reference and reloads it from the field, thus seeing the new value
4596     // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
4597     return;
4598   }
4599 
4600   // TODO(riscv64): Fix this intrinsic for heap poisoning configuration.
4601   if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
4602     return;
4603   }
4604 
4605   LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
4606   Location arg = locations->InAt(arg_index);
4607 
4608   bool is_fp = DataType::IsFloatingPointType(value_type);
4609   if (is_fp) {
4610     if (get_and_update_op == GetAndUpdateOp::kAdd) {
4611       // For ADD, do not use ZR for zero bit pattern (+0.0f or +0.0).
4612       locations->SetInAt(arg_index, Location::RequiresFpuRegister());
4613     } else {
4614       DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
4615     }
4616   }
4617 
4618   size_t data_size = DataType::Size(value_type);
4619   bool can_byte_swap =
4620       (arg_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u);
4621   bool can_use_cas = (get_and_update_op == GetAndUpdateOp::kAdd) && (can_byte_swap || is_fp);
4622   bool is_small = (data_size < 4u);
4623   bool is_small_and = is_small && (get_and_update_op == GetAndUpdateOp::kAnd);
4624   bool is_bitwise =
4625       (get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd);
4626 
4627   size_t temps_needed =
4628       // The offset temp is used for the `tmp_ptr`.
4629       1u +
4630       // For small values, we need temps for `shift` and maybe also `mask` and `temp`.
4631       (is_small ? (is_bitwise ? 1u : 3u) : 0u) +
4632       // Some cases need modified copies of `arg`.
4633       (is_small_and || ScratchXRegisterNeeded(arg, value_type, can_byte_swap) ? 1u : 0u) +
4634       // For FP types, we need a temp for `old_value` which cannot be loaded directly to `out`.
4635       (is_fp ? 1u : 0u);
4636   if (can_use_cas) {
4637     size_t cas_temps_needed =
4638         // The offset temp is used for the `tmp_ptr`.
4639         1u +
4640         // For small values, we need a temp for `shift`.
4641         (is_small ? 1u : 0u) +
4642         // And we always need temps for `old_value`, `new_value` and `reloaded_old_value`.
4643         3u;
4644     DCHECK_GE(cas_temps_needed, temps_needed);
4645     temps_needed = cas_temps_needed;
4646   }
4647 
4648   size_t scratch_registers_available = 2u;
4649   DCHECK_EQ(scratch_registers_available,
4650             ScratchRegisterScope(codegen->GetAssembler()).AvailableXRegisters());
4651   size_t old_temp_count = locations->GetTempCount();
4652   DCHECK_EQ(old_temp_count, (arg_index == 1u) ? 2u : 1u);
4653   if (temps_needed > old_temp_count + scratch_registers_available) {
4654     locations->AddRegisterTemps(temps_needed - (old_temp_count + scratch_registers_available));
4655   }
4656 
4657   // Request another temporary register for methods that don't return a value.
4658   // For the non-void case, we already set `out` in `CreateVarHandleCommonLocations`.
4659   DataType::Type return_type = invoke->GetType();
4660   const bool is_void = return_type == DataType::Type::kVoid;
4661   DCHECK_IMPLIES(!is_void, return_type == value_type);
4662   if (is_void) {
4663     if (DataType::IsFloatingPointType(value_type)) {
4664       locations->AddTemp(Location::RequiresFpuRegister());
4665     } else {
4666       locations->AddTemp(Location::RequiresRegister());
4667     }
4668   }
4669 }
4670 
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,GetAndUpdateOp get_and_update_op,std::memory_order order,bool byte_swap=false)4671 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
4672                                           CodeGeneratorRISCV64* codegen,
4673                                           GetAndUpdateOp get_and_update_op,
4674                                           std::memory_order order,
4675                                           bool byte_swap = false) {
4676   // Get the type from the shorty as the invokes may not return a value.
4677   uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
4678   DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
4679   DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
4680 
4681   Riscv64Assembler* assembler = codegen->GetAssembler();
4682   LocationSummary* locations = invoke->GetLocations();
4683   Location arg = locations->InAt(arg_index);
4684   DCHECK_IMPLIES(arg.IsConstant(), arg.GetConstant()->IsZeroBitPattern());
4685   DataType::Type return_type = invoke->GetType();
4686   const bool is_void = return_type == DataType::Type::kVoid;
4687   DCHECK_IMPLIES(!is_void, return_type == value_type);
4688   // We use a temporary for void methods, as we don't return the value.
4689   Location out_or_temp =
4690       is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out();
4691 
4692   VarHandleTarget target = GetVarHandleTarget(invoke);
4693   VarHandleSlowPathRISCV64* slow_path = nullptr;
4694   if (!byte_swap) {
4695     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4696     GenerateVarHandleTarget(invoke, target, codegen);
4697     if (slow_path != nullptr) {
4698       slow_path->SetGetAndUpdateOp(get_and_update_op);
4699       __ Bind(slow_path->GetNativeByteOrderLabel());
4700     }
4701   }
4702 
4703   // This needs to be before the temp registers, as MarkGCCard also uses scratch registers.
4704   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
4705     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
4706     // Mark card for object, the new value shall be stored.
4707     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
4708     codegen->MaybeMarkGCCard(target.object, arg.AsRegister<XRegister>(), new_value_can_be_null);
4709   }
4710 
4711   size_t data_size = DataType::Size(value_type);
4712   bool is_fp = DataType::IsFloatingPointType(value_type);
4713   bool use_cas = (get_and_update_op == GetAndUpdateOp::kAdd) && (byte_swap || is_fp);
4714   bool is_small = (data_size < 4u);
4715   bool is_small_and = is_small && (get_and_update_op == GetAndUpdateOp::kAnd);
4716   bool is_reference = (value_type == DataType::Type::kReference);
4717   DataType::Type op_type = is_fp
4718       ? IntTypeForFloatingPointType(value_type)
4719       : (is_small || is_reference ? DataType::Type::kInt32 : value_type);
4720 
4721   ScratchRegisterScope srs(assembler);
4722   DCHECK_EQ(srs.AvailableXRegisters(), 2u);
4723   size_t available_scratch_registers = use_cas
4724       // We use scratch registers differently for the CAS path.
4725       ? 0u
4726       // Reserve one scratch register for `PrepareXRegister()` or similar `arg_reg` allocation.
4727       : (is_small_and || ScratchXRegisterNeeded(arg, value_type, byte_swap) ? 1u : 2u);
4728 
4729   // Reuse the `target.offset` temporary for the pointer to the target location,
4730   // except for references that need the offset for the non-Baker read barrier.
4731   DCHECK_EQ(target.offset, locations->GetTemp(0u).AsRegister<XRegister>());
4732   size_t next_temp = 1u;
4733   XRegister tmp_ptr = target.offset;
4734   if (is_reference && codegen->EmitNonBakerReadBarrier()) {
4735     DCHECK_EQ(available_scratch_registers, 2u);
4736     available_scratch_registers -= 1u;
4737     tmp_ptr = srs.AllocateXRegister();
4738   }
4739   __ Add(tmp_ptr, target.object, target.offset);
4740 
4741   auto get_temp = [&]() {
4742     if (available_scratch_registers != 0u) {
4743       available_scratch_registers -= 1u;
4744       return srs.AllocateXRegister();
4745     } else {
4746       DCHECK_IMPLIES(is_void, next_temp != locations->GetTempCount() - 1u)
4747           << "The last temp is special for the void case, as it represents the out register.";
4748       XRegister temp = locations->GetTemp(next_temp).AsRegister<XRegister>();
4749       next_temp += 1u;
4750       return temp;
4751     }
4752   };
4753 
4754   XRegister shift = kNoXRegister;
4755   XRegister mask = kNoXRegister;
4756   XRegister prepare_mask = kNoXRegister;
4757   XRegister temp = kNoXRegister;
4758   XRegister arg_reg = kNoXRegister;
4759   if (is_small) {
4760     shift = get_temp();
4761     // Upper bits of the shift are not used, so we do not need to clear them.
4762     __ Slli(shift, tmp_ptr, WhichPowerOf2(kBitsPerByte));
4763     __ Andi(tmp_ptr, tmp_ptr, -4);
4764     switch (get_and_update_op) {
4765       case GetAndUpdateOp::kAdd:
4766         if (byte_swap) {
4767           // The mask is not needed in the CAS path.
4768           DCHECK(use_cas);
4769           break;
4770         }
4771         FALLTHROUGH_INTENDED;
4772       case GetAndUpdateOp::kSet:
4773         mask = get_temp();
4774         temp = get_temp();
4775         __ Li(mask, (1 << (data_size * kBitsPerByte)) - 1);
4776         __ Sllw(mask, mask, shift);
4777         // The argument does not need to be masked for `GetAndUpdateOp::kAdd`,
4778         // the mask shall be applied after the ADD instruction.
4779         prepare_mask = (get_and_update_op == GetAndUpdateOp::kSet) ? mask : kNoXRegister;
4780         break;
4781       case GetAndUpdateOp::kAnd:
4782         // We need to set all other bits, so we always need a temp.
4783         arg_reg = srs.AllocateXRegister();
4784         if (data_size == 1u) {
4785           __ Ori(arg_reg, InputXRegisterOrZero(arg), ~0xff);
4786           DCHECK(!byte_swap);
4787         } else {
4788           DCHECK_EQ(data_size, 2u);
4789           __ Li(arg_reg, ~0xffff);
4790           __ Or(arg_reg, InputXRegisterOrZero(arg), arg_reg);
4791           if (byte_swap) {
4792             __ Rev8(arg_reg, arg_reg);
4793             __ Rori(arg_reg, arg_reg, 48);
4794           }
4795         }
4796         __ Rolw(arg_reg, arg_reg, shift);
4797         break;
4798       case GetAndUpdateOp::kOr:
4799       case GetAndUpdateOp::kXor:
4800         // Signed values need to be truncated but we're keeping `prepare_mask == kNoXRegister`.
4801         if (value_type == DataType::Type::kInt8 && !arg.IsConstant()) {
4802           DCHECK(!byte_swap);
4803           arg_reg = srs.AllocateXRegister();
4804           __ ZextB(arg_reg, arg.AsRegister<XRegister>());
4805           __ Sllw(arg_reg, arg_reg, shift);
4806         } else if (value_type == DataType::Type::kInt16 && !arg.IsConstant() && !byte_swap) {
4807           arg_reg = srs.AllocateXRegister();
4808           __ ZextH(arg_reg, arg.AsRegister<XRegister>());
4809           __ Sllw(arg_reg, arg_reg, shift);
4810         }  // else handled by `PrepareXRegister()` below.
4811         break;
4812     }
4813   }
4814   if (arg_reg == kNoXRegister && !use_cas) {
4815     arg_reg = PrepareXRegister(codegen, arg, value_type, shift, prepare_mask, byte_swap, &srs);
4816   }
4817   if (mask != kNoXRegister && get_and_update_op == GetAndUpdateOp::kSet) {
4818     __ Not(mask, mask);  // We need to flip the mask for `kSet`, see `GenerateGetAndUpdate()`.
4819   }
4820 
4821   if (use_cas) {
4822     // Allocate scratch registers for temps that can theoretically be clobbered on retry.
4823     // (Even though the `retry` label shall never be far enough for `TMP` to be clobbered.)
4824     DCHECK_EQ(available_scratch_registers, 0u);  // Reserved for the two uses below.
4825     XRegister old_value = srs.AllocateXRegister();
4826     XRegister new_value = srs.AllocateXRegister();
4827     // Allocate other needed temporaries.
4828     XRegister reloaded_old_value = get_temp();
4829     XRegister store_result = reloaded_old_value;  // Clobber reloaded old value by store result.
4830     FRegister ftmp = is_fp ? srs.AllocateFRegister() : kNoFRegister;
4831 
4832     Riscv64Label retry;
4833     __ Bind(&retry);
4834     codegen->GetInstructionVisitor()->Load(
4835         Location::RegisterLocation(old_value), tmp_ptr, /*offset=*/ 0, op_type);
4836     if (byte_swap) {
4837       GenerateByteSwapAndExtract(codegen, out_or_temp, old_value, shift, value_type);
4838     } else {
4839       DCHECK(is_fp);
4840       codegen->MoveLocation(out_or_temp, Location::RegisterLocation(old_value), value_type);
4841     }
4842     if (is_fp) {
4843       codegen->GetInstructionVisitor()->FAdd(
4844           ftmp, out_or_temp.AsFpuRegister<FRegister>(), arg.AsFpuRegister<FRegister>(), value_type);
4845       codegen->MoveLocation(
4846           Location::RegisterLocation(new_value), Location::FpuRegisterLocation(ftmp), op_type);
4847     } else if (arg.IsConstant()) {
4848       DCHECK(arg.GetConstant()->IsZeroBitPattern());
4849       __ Mv(new_value, out_or_temp.AsRegister<XRegister>());
4850     } else if (value_type == DataType::Type::kInt64) {
4851       __ Add(new_value, out_or_temp.AsRegister<XRegister>(), arg.AsRegister<XRegister>());
4852     } else {
4853       DCHECK_EQ(op_type, DataType::Type::kInt32);
4854       __ Addw(new_value, out_or_temp.AsRegister<XRegister>(), arg.AsRegister<XRegister>());
4855     }
4856     if (byte_swap) {
4857       DataType::Type swap_type = op_type;
4858       if (is_small) {
4859         DCHECK_EQ(data_size, 2u);
4860         // We want to update only 16 bits of the 32-bit location. The 16 bits we want to replace
4861         // are present in both `old_value` and `out` but in different bits and byte order.
4862         // To update the 16 bits, we can XOR the new value with the `out`, byte swap as Uint16
4863         // (extracting only the bits we want to update), shift and XOR with the old value.
4864         swap_type = DataType::Type::kUint16;
4865         __ Xor(new_value, new_value, out_or_temp.AsRegister<XRegister>());
4866       }
4867       GenerateReverseBytes(codegen, Location::RegisterLocation(new_value), new_value, swap_type);
4868       if (is_small) {
4869         __ Sllw(new_value, new_value, shift);
4870         __ Xor(new_value, new_value, old_value);
4871       }
4872     }
4873     GenerateCompareAndSet(assembler,
4874                           op_type,
4875                           order,
4876                           /*strong=*/ true,
4877                           /*cmp_failure=*/ &retry,
4878                           tmp_ptr,
4879                           new_value,
4880                           /*old_value=*/ reloaded_old_value,
4881                           /*mask=*/ kNoXRegister,
4882                           /*masked=*/ kNoXRegister,
4883                           store_result,
4884                           /*expected=*/ old_value);
4885   } else {
4886     XRegister old_value = is_fp ? get_temp() : out_or_temp.AsRegister<XRegister>();
4887     GenerateGetAndUpdate(
4888         codegen, get_and_update_op, op_type, order, tmp_ptr, arg_reg, old_value, mask, temp);
4889     if (byte_swap) {
4890       DCHECK_IMPLIES(is_small, out_or_temp.AsRegister<XRegister>() == old_value)
4891           << " " << value_type << " " << out_or_temp.AsRegister<XRegister>() << "!=" << old_value;
4892       GenerateByteSwapAndExtract(codegen, out_or_temp, old_value, shift, value_type);
4893     } else if (is_fp) {
4894       codegen->MoveLocation(out_or_temp, Location::RegisterLocation(old_value), value_type);
4895     } else if (is_small) {
4896       __ Srlw(old_value, old_value, shift);
4897       DCHECK_NE(value_type, DataType::Type::kUint8);
4898       if (value_type == DataType::Type::kInt8) {
4899         __ SextB(old_value, old_value);
4900       } else if (value_type == DataType::Type::kBool) {
4901         __ ZextB(old_value, old_value);
4902       } else if (value_type == DataType::Type::kInt16) {
4903         __ SextH(old_value, old_value);
4904       } else {
4905         DCHECK_EQ(value_type, DataType::Type::kUint16);
4906         __ ZextH(old_value, old_value);
4907       }
4908     } else if (is_reference) {
4909       __ ZextW(old_value, old_value);
4910       if (codegen->EmitBakerReadBarrier()) {
4911         // Use RA as temp. It is clobbered in the slow path anyway.
4912         static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA);
4913         SlowPathCodeRISCV64* rb_slow_path = codegen->AddGcRootBakerBarrierBarrierSlowPath(
4914             invoke, out_or_temp, kBakerReadBarrierTemp);
4915         codegen->EmitBakerReadBarierMarkingCheck(rb_slow_path, out_or_temp, kBakerReadBarrierTemp);
4916       } else if (codegen->EmitNonBakerReadBarrier()) {
4917         Location base_loc = Location::RegisterLocation(target.object);
4918         Location index = Location::RegisterLocation(target.offset);
4919         SlowPathCodeRISCV64* rb_slow_path = codegen->AddReadBarrierSlowPath(
4920             invoke, out_or_temp, out_or_temp, base_loc, /*offset=*/ 0u, index);
4921         __ J(rb_slow_path->GetEntryLabel());
4922         __ Bind(rb_slow_path->GetExitLabel());
4923       }
4924     }
4925   }
4926 
4927   if (slow_path != nullptr) {
4928     DCHECK(!byte_swap);
4929     __ Bind(slow_path->GetExitLabel());
4930   }
4931 
4932   // Check that we have allocated the right number of temps. We may need more registers
4933   // for byte swapped CAS in the slow path, so skip this check for the main path in that case.
4934   // In the void case, we requested an extra register to mimic the `out` register.
4935   const size_t extra_temp_registers = is_void ? 1u : 0u;
4936   bool has_byte_swap = (arg_index == 3u) && (!is_reference && data_size != 1u);
4937   if ((!has_byte_swap || byte_swap) &&
4938       next_temp != locations->GetTempCount() - extra_temp_registers) {
4939     // We allocate a temporary register for the class object for a static field `VarHandle` but
4940     // we do not update the `next_temp` if it's otherwise unused after the address calculation.
4941     CHECK_EQ(arg_index, 1u);
4942     CHECK_EQ(next_temp, 1u);
4943     CHECK_EQ(locations->GetTempCount(), 2u + extra_temp_registers);
4944   }
4945 }
4946 
VisitVarHandleGetAndSet(HInvoke * invoke)4947 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSet(HInvoke* invoke) {
4948   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
4949 }
4950 
VisitVarHandleGetAndSet(HInvoke * invoke)4951 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSet(HInvoke* invoke) {
4952   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
4953 }
4954 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4955 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4956   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
4957 }
4958 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4959 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4960   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
4961 }
4962 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4963 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4964   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
4965 }
4966 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4967 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4968   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
4969 }
4970 
VisitVarHandleGetAndAdd(HInvoke * invoke)4971 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4972   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
4973 }
4974 
VisitVarHandleGetAndAdd(HInvoke * invoke)4975 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4976   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
4977 }
4978 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4979 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4980   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
4981 }
4982 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4983 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4984   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
4985 }
4986 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4987 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4988   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
4989 }
4990 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4991 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4992   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
4993 }
4994 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4995 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4996   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
4997 }
4998 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4999 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5000   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
5001 }
5002 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5003 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5004   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5005 }
5006 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5007 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5008   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
5009 }
5010 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5011 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5012   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5013 }
5014 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5015 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5016   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
5017 }
5018 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5019 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5020   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5021 }
5022 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5023 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5024   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
5025 }
5026 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5027 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5028   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5029 }
5030 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5031 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5032   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
5033 }
5034 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5035 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5036   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5037 }
5038 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5039 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5040   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
5041 }
5042 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5043 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5044   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5045 }
5046 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5047 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5048   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
5049 }
5050 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5051 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5052   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5053 }
5054 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5055 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5056   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
5057 }
5058 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5059 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5060   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5061 }
5062 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5063 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5064   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
5065 }
5066 
EmitByteArrayViewCode(CodeGenerator * codegen_in)5067 void VarHandleSlowPathRISCV64::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
5068   DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
5069   CodeGeneratorRISCV64* codegen = down_cast<CodeGeneratorRISCV64*>(codegen_in);
5070   Riscv64Assembler* assembler = codegen->GetAssembler();
5071   HInvoke* invoke = GetInvoke();
5072   mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5073   DataType::Type value_type =
5074       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5075   DCHECK_NE(value_type, DataType::Type::kReference);
5076   size_t size = DataType::Size(value_type);
5077   DCHECK_GT(size, 1u);
5078   LocationSummary* locations = invoke->GetLocations();
5079   XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
5080   XRegister object = locations->InAt(1).AsRegister<XRegister>();
5081   XRegister index = locations->InAt(2).AsRegister<XRegister>();
5082 
5083   MemberOffset class_offset = mirror::Object::ClassOffset();
5084   MemberOffset array_length_offset = mirror::Array::LengthOffset();
5085   MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5086   MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5087 
5088   __ Bind(GetByteArrayViewCheckLabel());
5089 
5090   VarHandleTarget target = GetVarHandleTarget(invoke);
5091   {
5092     ScratchRegisterScope srs(assembler);
5093     XRegister temp = srs.AllocateXRegister();
5094     XRegister temp2 = srs.AllocateXRegister();
5095 
5096     // The main path checked that the coordinateType0 is an array class that matches
5097     // the class of the actual coordinate argument but it does not match the value type.
5098     // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5099     __ Loadwu(temp, varhandle, class_offset.Int32Value());
5100     codegen->MaybeUnpoisonHeapReference(temp);
5101     codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5102     __ Bne(temp, temp2, GetEntryLabel());
5103 
5104     // Check for array index out of bounds.
5105     __ Loadw(temp, object, array_length_offset.Int32Value());
5106     __ Bgeu(index, temp, GetEntryLabel());
5107     __ Addi(temp2, index, size - 1u);
5108     __ Bgeu(temp2, temp, GetEntryLabel());
5109 
5110     // Construct the target.
5111     __ Addi(target.offset, index, data_offset.Int32Value());
5112 
5113     // Alignment check. For unaligned access, go to the runtime.
5114     DCHECK(IsPowerOfTwo(size));
5115     __ Andi(temp, target.offset, size - 1u);
5116     __ Bnez(temp, GetEntryLabel());
5117 
5118     // Byte order check. For native byte order return to the main path.
5119     if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
5120         IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5121       // There is no reason to differentiate between native byte order and byte-swap
5122       // for setting a zero bit pattern. Just return to the main path.
5123       __ J(GetNativeByteOrderLabel());
5124       return;
5125     }
5126     __ Loadbu(temp, varhandle, native_byte_order_offset.Int32Value());
5127     __ Bnez(temp, GetNativeByteOrderLabel());
5128   }
5129 
5130   switch (access_mode_template) {
5131     case mirror::VarHandle::AccessModeTemplate::kGet:
5132       GenerateVarHandleGet(invoke, codegen, order_, /*byte_swap=*/ true);
5133       break;
5134     case mirror::VarHandle::AccessModeTemplate::kSet:
5135       GenerateVarHandleSet(invoke, codegen, order_, /*byte_swap=*/ true);
5136       break;
5137     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5138     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5139       GenerateVarHandleCompareAndSetOrExchange(
5140           invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
5141       break;
5142     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5143       GenerateVarHandleGetAndUpdate(
5144           invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
5145       break;
5146   }
5147   __ J(GetExitLabel());
5148 }
5149 
VisitThreadCurrentThread(HInvoke * invoke)5150 void IntrinsicLocationsBuilderRISCV64::VisitThreadCurrentThread(HInvoke* invoke) {
5151   LocationSummary* locations =
5152       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5153   locations->SetOut(Location::RequiresRegister());
5154 }
5155 
VisitThreadCurrentThread(HInvoke * invoke)5156 void IntrinsicCodeGeneratorRISCV64::VisitThreadCurrentThread(HInvoke* invoke) {
5157   Riscv64Assembler* assembler = GetAssembler();
5158   XRegister out = invoke->GetLocations()->Out().AsRegister<XRegister>();
5159   __ Loadwu(out, TR, Thread::PeerOffset<kRiscv64PointerSize>().Int32Value());
5160 }
5161 
VisitThreadInterrupted(HInvoke * invoke)5162 void IntrinsicLocationsBuilderRISCV64::VisitThreadInterrupted(HInvoke* invoke) {
5163   LocationSummary* locations =
5164       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5165   locations->SetOut(Location::RequiresRegister());
5166 }
5167 
VisitThreadInterrupted(HInvoke * invoke)5168 void IntrinsicCodeGeneratorRISCV64::VisitThreadInterrupted(HInvoke* invoke) {
5169   LocationSummary* locations = invoke->GetLocations();
5170   Riscv64Assembler* assembler = GetAssembler();
5171   XRegister out = locations->Out().AsRegister<XRegister>();
5172   Riscv64Label done;
5173 
5174   codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5175   __ Loadw(out, TR, Thread::InterruptedOffset<kRiscv64PointerSize>().Int32Value());
5176   __ Beqz(out, &done);
5177   __ Storew(Zero, TR, Thread::InterruptedOffset<kRiscv64PointerSize>().Int32Value());
5178   codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5179   __ Bind(&done);
5180 }
5181 
VisitReachabilityFence(HInvoke * invoke)5182 void IntrinsicLocationsBuilderRISCV64::VisitReachabilityFence(HInvoke* invoke) {
5183   LocationSummary* locations =
5184       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5185   locations->SetInAt(0, Location::Any());
5186 }
5187 
VisitReachabilityFence(HInvoke * invoke)5188 void IntrinsicCodeGeneratorRISCV64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
5189 
VisitMathFmaDouble(HInvoke * invoke)5190 void IntrinsicLocationsBuilderRISCV64::VisitMathFmaDouble(HInvoke* invoke) {
5191   CreateFpFpFpToFpNoOverlapLocations(allocator_, invoke);
5192 }
5193 
VisitMathFmaDouble(HInvoke * invoke)5194 void IntrinsicCodeGeneratorRISCV64::VisitMathFmaDouble(HInvoke* invoke) {
5195   LocationSummary* locations = invoke->GetLocations();
5196   Riscv64Assembler* assembler = GetAssembler();
5197   FRegister n = locations->InAt(0).AsFpuRegister<FRegister>();
5198   FRegister m = locations->InAt(1).AsFpuRegister<FRegister>();
5199   FRegister a = locations->InAt(2).AsFpuRegister<FRegister>();
5200   FRegister out = locations->Out().AsFpuRegister<FRegister>();
5201 
5202   __ FMAddD(out, n, m, a);
5203 }
5204 
VisitMathFmaFloat(HInvoke * invoke)5205 void IntrinsicLocationsBuilderRISCV64::VisitMathFmaFloat(HInvoke* invoke) {
5206   CreateFpFpFpToFpNoOverlapLocations(allocator_, invoke);
5207 }
5208 
VisitMathFmaFloat(HInvoke * invoke)5209 void IntrinsicCodeGeneratorRISCV64::VisitMathFmaFloat(HInvoke* invoke) {
5210   LocationSummary* locations = invoke->GetLocations();
5211   Riscv64Assembler* assembler = GetAssembler();
5212   FRegister n = locations->InAt(0).AsFpuRegister<FRegister>();
5213   FRegister m = locations->InAt(1).AsFpuRegister<FRegister>();
5214   FRegister a = locations->InAt(2).AsFpuRegister<FRegister>();
5215   FRegister out = locations->Out().AsFpuRegister<FRegister>();
5216 
5217   __ FMAddS(out, n, m, a);
5218 }
5219 
5220 
VisitMathCos(HInvoke * invoke)5221 void IntrinsicLocationsBuilderRISCV64::VisitMathCos(HInvoke* invoke) {
5222   CreateFPToFPCallLocations(allocator_, invoke);
5223 }
5224 
VisitMathCos(HInvoke * invoke)5225 void IntrinsicCodeGeneratorRISCV64::VisitMathCos(HInvoke* invoke) {
5226   codegen_->InvokeRuntime(kQuickCos, invoke);
5227 }
5228 
VisitMathSin(HInvoke * invoke)5229 void IntrinsicLocationsBuilderRISCV64::VisitMathSin(HInvoke* invoke) {
5230   CreateFPToFPCallLocations(allocator_, invoke);
5231 }
5232 
VisitMathSin(HInvoke * invoke)5233 void IntrinsicCodeGeneratorRISCV64::VisitMathSin(HInvoke* invoke) {
5234   codegen_->InvokeRuntime(kQuickSin, invoke);
5235 }
5236 
VisitMathAcos(HInvoke * invoke)5237 void IntrinsicLocationsBuilderRISCV64::VisitMathAcos(HInvoke* invoke) {
5238   CreateFPToFPCallLocations(allocator_, invoke);
5239 }
5240 
VisitMathAcos(HInvoke * invoke)5241 void IntrinsicCodeGeneratorRISCV64::VisitMathAcos(HInvoke* invoke) {
5242   codegen_->InvokeRuntime(kQuickAcos, invoke);
5243 }
5244 
VisitMathAsin(HInvoke * invoke)5245 void IntrinsicLocationsBuilderRISCV64::VisitMathAsin(HInvoke* invoke) {
5246   CreateFPToFPCallLocations(allocator_, invoke);
5247 }
5248 
VisitMathAsin(HInvoke * invoke)5249 void IntrinsicCodeGeneratorRISCV64::VisitMathAsin(HInvoke* invoke) {
5250   codegen_->InvokeRuntime(kQuickAsin, invoke);
5251 }
5252 
VisitMathAtan(HInvoke * invoke)5253 void IntrinsicLocationsBuilderRISCV64::VisitMathAtan(HInvoke* invoke) {
5254   CreateFPToFPCallLocations(allocator_, invoke);
5255 }
5256 
VisitMathAtan(HInvoke * invoke)5257 void IntrinsicCodeGeneratorRISCV64::VisitMathAtan(HInvoke* invoke) {
5258   codegen_->InvokeRuntime(kQuickAtan, invoke);
5259 }
5260 
VisitMathAtan2(HInvoke * invoke)5261 void IntrinsicLocationsBuilderRISCV64::VisitMathAtan2(HInvoke* invoke) {
5262   CreateFPFPToFPCallLocations(allocator_, invoke);
5263 }
5264 
VisitMathAtan2(HInvoke * invoke)5265 void IntrinsicCodeGeneratorRISCV64::VisitMathAtan2(HInvoke* invoke) {
5266   codegen_->InvokeRuntime(kQuickAtan2, invoke);
5267 }
5268 
VisitMathPow(HInvoke * invoke)5269 void IntrinsicLocationsBuilderRISCV64::VisitMathPow(HInvoke* invoke) {
5270   CreateFPFPToFPCallLocations(allocator_, invoke);
5271 }
5272 
VisitMathPow(HInvoke * invoke)5273 void IntrinsicCodeGeneratorRISCV64::VisitMathPow(HInvoke* invoke) {
5274   codegen_->InvokeRuntime(kQuickPow, invoke);
5275 }
5276 
VisitMathCbrt(HInvoke * invoke)5277 void IntrinsicLocationsBuilderRISCV64::VisitMathCbrt(HInvoke* invoke) {
5278   CreateFPToFPCallLocations(allocator_, invoke);
5279 }
5280 
VisitMathCbrt(HInvoke * invoke)5281 void IntrinsicCodeGeneratorRISCV64::VisitMathCbrt(HInvoke* invoke) {
5282   codegen_->InvokeRuntime(kQuickCbrt, invoke);
5283 }
5284 
VisitMathCosh(HInvoke * invoke)5285 void IntrinsicLocationsBuilderRISCV64::VisitMathCosh(HInvoke* invoke) {
5286   CreateFPToFPCallLocations(allocator_, invoke);
5287 }
5288 
VisitMathCosh(HInvoke * invoke)5289 void IntrinsicCodeGeneratorRISCV64::VisitMathCosh(HInvoke* invoke) {
5290   codegen_->InvokeRuntime(kQuickCosh, invoke);
5291 }
5292 
VisitMathExp(HInvoke * invoke)5293 void IntrinsicLocationsBuilderRISCV64::VisitMathExp(HInvoke* invoke) {
5294   CreateFPToFPCallLocations(allocator_, invoke);
5295 }
5296 
VisitMathExp(HInvoke * invoke)5297 void IntrinsicCodeGeneratorRISCV64::VisitMathExp(HInvoke* invoke) {
5298   codegen_->InvokeRuntime(kQuickExp, invoke);
5299 }
5300 
VisitMathExpm1(HInvoke * invoke)5301 void IntrinsicLocationsBuilderRISCV64::VisitMathExpm1(HInvoke* invoke) {
5302   CreateFPToFPCallLocations(allocator_, invoke);
5303 }
5304 
VisitMathExpm1(HInvoke * invoke)5305 void IntrinsicCodeGeneratorRISCV64::VisitMathExpm1(HInvoke* invoke) {
5306   codegen_->InvokeRuntime(kQuickExpm1, invoke);
5307 }
5308 
VisitMathHypot(HInvoke * invoke)5309 void IntrinsicLocationsBuilderRISCV64::VisitMathHypot(HInvoke* invoke) {
5310   CreateFPFPToFPCallLocations(allocator_, invoke);
5311 }
5312 
VisitMathHypot(HInvoke * invoke)5313 void IntrinsicCodeGeneratorRISCV64::VisitMathHypot(HInvoke* invoke) {
5314   codegen_->InvokeRuntime(kQuickHypot, invoke);
5315 }
5316 
VisitMathLog(HInvoke * invoke)5317 void IntrinsicLocationsBuilderRISCV64::VisitMathLog(HInvoke* invoke) {
5318   CreateFPToFPCallLocations(allocator_, invoke);
5319 }
5320 
VisitMathLog(HInvoke * invoke)5321 void IntrinsicCodeGeneratorRISCV64::VisitMathLog(HInvoke* invoke) {
5322   codegen_->InvokeRuntime(kQuickLog, invoke);
5323 }
5324 
VisitMathLog10(HInvoke * invoke)5325 void IntrinsicLocationsBuilderRISCV64::VisitMathLog10(HInvoke* invoke) {
5326   CreateFPToFPCallLocations(allocator_, invoke);
5327 }
5328 
VisitMathLog10(HInvoke * invoke)5329 void IntrinsicCodeGeneratorRISCV64::VisitMathLog10(HInvoke* invoke) {
5330   codegen_->InvokeRuntime(kQuickLog10, invoke);
5331 }
5332 
VisitMathNextAfter(HInvoke * invoke)5333 void IntrinsicLocationsBuilderRISCV64::VisitMathNextAfter(HInvoke* invoke) {
5334   CreateFPFPToFPCallLocations(allocator_, invoke);
5335 }
5336 
VisitMathNextAfter(HInvoke * invoke)5337 void IntrinsicCodeGeneratorRISCV64::VisitMathNextAfter(HInvoke* invoke) {
5338   codegen_->InvokeRuntime(kQuickNextAfter, invoke);
5339 }
5340 
VisitMathSinh(HInvoke * invoke)5341 void IntrinsicLocationsBuilderRISCV64::VisitMathSinh(HInvoke* invoke) {
5342   CreateFPToFPCallLocations(allocator_, invoke);
5343 }
5344 
VisitMathSinh(HInvoke * invoke)5345 void IntrinsicCodeGeneratorRISCV64::VisitMathSinh(HInvoke* invoke) {
5346   codegen_->InvokeRuntime(kQuickSinh, invoke);
5347 }
5348 
VisitMathTan(HInvoke * invoke)5349 void IntrinsicLocationsBuilderRISCV64::VisitMathTan(HInvoke* invoke) {
5350   CreateFPToFPCallLocations(allocator_, invoke);
5351 }
5352 
VisitMathTan(HInvoke * invoke)5353 void IntrinsicCodeGeneratorRISCV64::VisitMathTan(HInvoke* invoke) {
5354   codegen_->InvokeRuntime(kQuickTan, invoke);
5355 }
5356 
VisitMathTanh(HInvoke * invoke)5357 void IntrinsicLocationsBuilderRISCV64::VisitMathTanh(HInvoke* invoke) {
5358   CreateFPToFPCallLocations(allocator_, invoke);
5359 }
5360 
VisitMathTanh(HInvoke * invoke)5361 void IntrinsicCodeGeneratorRISCV64::VisitMathTanh(HInvoke* invoke) {
5362   codegen_->InvokeRuntime(kQuickTanh, invoke);
5363 }
5364 
VisitMathSqrt(HInvoke * invoke)5365 void IntrinsicLocationsBuilderRISCV64::VisitMathSqrt(HInvoke* invoke) {
5366   CreateFPToFPLocations(allocator_, invoke, Location::kNoOutputOverlap);
5367 }
5368 
VisitMathSqrt(HInvoke * invoke)5369 void IntrinsicCodeGeneratorRISCV64::VisitMathSqrt(HInvoke* invoke) {
5370   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
5371   DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
5372 
5373   LocationSummary* locations = invoke->GetLocations();
5374   Riscv64Assembler* assembler = GetAssembler();
5375   FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
5376   FRegister out = locations->Out().AsFpuRegister<FRegister>();
5377 
5378   __ FSqrtD(out, in);
5379 }
5380 
GenDoubleRound(Riscv64Assembler * assembler,HInvoke * invoke,FPRoundingMode mode)5381 static void GenDoubleRound(Riscv64Assembler* assembler, HInvoke* invoke, FPRoundingMode mode) {
5382   LocationSummary* locations = invoke->GetLocations();
5383   FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
5384   FRegister out = locations->Out().AsFpuRegister<FRegister>();
5385   ScratchRegisterScope srs(assembler);
5386   XRegister tmp = srs.AllocateXRegister();
5387   FRegister ftmp = srs.AllocateFRegister();
5388   Riscv64Label done;
5389 
5390   // Load 2^52
5391   __ LoadConst64(tmp, 0x4330000000000000L);
5392   __ FMvDX(ftmp, tmp);
5393   __ FAbsD(out, in);
5394   __ FLtD(tmp, out, ftmp);
5395 
5396   // Set output as the input if input greater than the max
5397   __ FMvD(out, in);
5398   __ Beqz(tmp, &done);
5399 
5400   // Convert with rounding mode
5401   __ FCvtLD(tmp, in, mode);
5402   __ FCvtDL(ftmp, tmp, mode);
5403 
5404   // Set the signed bit
5405   __ FSgnjD(out, ftmp, in);
5406   __ Bind(&done);
5407 }
5408 
VisitMathFloor(HInvoke * invoke)5409 void IntrinsicLocationsBuilderRISCV64::VisitMathFloor(HInvoke* invoke) {
5410   CreateFPToFPLocations(allocator_, invoke);
5411 }
5412 
VisitMathFloor(HInvoke * invoke)5413 void IntrinsicCodeGeneratorRISCV64::VisitMathFloor(HInvoke* invoke) {
5414   GenDoubleRound(GetAssembler(), invoke, FPRoundingMode::kRDN);
5415 }
5416 
VisitMathCeil(HInvoke * invoke)5417 void IntrinsicLocationsBuilderRISCV64::VisitMathCeil(HInvoke* invoke) {
5418   CreateFPToFPLocations(allocator_, invoke);
5419 }
5420 
VisitMathCeil(HInvoke * invoke)5421 void IntrinsicCodeGeneratorRISCV64::VisitMathCeil(HInvoke* invoke) {
5422   GenDoubleRound(GetAssembler(), invoke, FPRoundingMode::kRUP);
5423 }
5424 
VisitMathRint(HInvoke * invoke)5425 void IntrinsicLocationsBuilderRISCV64::VisitMathRint(HInvoke* invoke) {
5426   CreateFPToFPLocations(allocator_, invoke);
5427 }
5428 
VisitMathRint(HInvoke * invoke)5429 void IntrinsicCodeGeneratorRISCV64::VisitMathRint(HInvoke* invoke) {
5430   GenDoubleRound(GetAssembler(), invoke, FPRoundingMode::kRNE);
5431 }
5432 
GenMathRound(CodeGeneratorRISCV64 * codegen,HInvoke * invoke,DataType::Type type)5433 void GenMathRound(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) {
5434   Riscv64Assembler* assembler = codegen->GetAssembler();
5435   LocationSummary* locations = invoke->GetLocations();
5436   FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
5437   XRegister out = locations->Out().AsRegister<XRegister>();
5438   ScratchRegisterScope srs(assembler);
5439   FRegister ftmp = srs.AllocateFRegister();
5440   Riscv64Label done;
5441 
5442   // Check NaN
5443   codegen->GetInstructionVisitor()->FClass(out, in, type);
5444   __ Slti(out, out, kFClassNaNMinValue);
5445   __ Beqz(out, &done);
5446 
5447   if (type == DataType::Type::kFloat64) {
5448     // Add 0.5 (0x3fe0000000000000), rounding down (towards negative infinity).
5449     __ LoadConst64(out, 0x3fe0000000000000L);
5450     __ FMvDX(ftmp, out);
5451     __ FAddD(ftmp, ftmp, in, FPRoundingMode::kRDN);
5452 
5453     // Convert to managed `long`, rounding down (towards negative infinity).
5454     __ FCvtLD(out, ftmp, FPRoundingMode::kRDN);
5455   } else {
5456     // Add 0.5 (0x3f000000), rounding down (towards negative infinity).
5457     __ LoadConst32(out, 0x3f000000);
5458     __ FMvWX(ftmp, out);
5459     __ FAddS(ftmp, ftmp, in, FPRoundingMode::kRDN);
5460 
5461     // Convert to managed `int`, rounding down (towards negative infinity).
5462     __ FCvtWS(out, ftmp, FPRoundingMode::kRDN);
5463   }
5464 
5465   __ Bind(&done);
5466 }
5467 
VisitMathRoundDouble(HInvoke * invoke)5468 void IntrinsicLocationsBuilderRISCV64::VisitMathRoundDouble(HInvoke* invoke) {
5469   CreateFPToIntLocations(allocator_, invoke);
5470 }
5471 
VisitMathRoundDouble(HInvoke * invoke)5472 void IntrinsicCodeGeneratorRISCV64::VisitMathRoundDouble(HInvoke* invoke) {
5473   GenMathRound(codegen_, invoke, DataType::Type::kFloat64);
5474 }
5475 
VisitMathRoundFloat(HInvoke * invoke)5476 void IntrinsicLocationsBuilderRISCV64::VisitMathRoundFloat(HInvoke* invoke) {
5477   CreateFPToIntLocations(allocator_, invoke);
5478 }
5479 
VisitMathRoundFloat(HInvoke * invoke)5480 void IntrinsicCodeGeneratorRISCV64::VisitMathRoundFloat(HInvoke* invoke) {
5481   GenMathRound(codegen_, invoke, DataType::Type::kFloat32);
5482 }
5483 
VisitMathMultiplyHigh(HInvoke * invoke)5484 void IntrinsicLocationsBuilderRISCV64::VisitMathMultiplyHigh(HInvoke* invoke) {
5485   LocationSummary* locations =
5486       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5487   locations->SetInAt(0, Location::RequiresRegister());
5488   locations->SetInAt(1, Location::RequiresRegister());
5489   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5490 }
5491 
VisitMathMultiplyHigh(HInvoke * invoke)5492 void IntrinsicCodeGeneratorRISCV64::VisitMathMultiplyHigh(HInvoke* invoke) {
5493   LocationSummary* locations = invoke->GetLocations();
5494   Riscv64Assembler* assembler = GetAssembler();
5495   DCHECK(invoke->GetType() == DataType::Type::kInt64);
5496 
5497   XRegister x = locations->InAt(0).AsRegister<XRegister>();
5498   XRegister y = locations->InAt(1).AsRegister<XRegister>();
5499   XRegister out = locations->Out().AsRegister<XRegister>();
5500 
5501   // Get high 64 of the multiply
5502   __ Mulh(out, x, y);
5503 }
5504 
VisitStringGetCharsNoCheck(HInvoke * invoke)5505 void IntrinsicLocationsBuilderRISCV64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
5506   LocationSummary* locations =
5507       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5508 
5509   locations->SetInAt(0, Location::RequiresRegister());
5510   locations->SetInAt(1, Location::RequiresRegister());
5511   locations->SetInAt(2, Location::RequiresRegister());
5512   locations->SetInAt(3, Location::RequiresRegister());
5513   locations->SetInAt(4, Location::RequiresRegister());
5514 
5515   locations->AddRegisterTemps(3);
5516 }
5517 
VisitStringGetCharsNoCheck(HInvoke * invoke)5518 void IntrinsicCodeGeneratorRISCV64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
5519   Riscv64Assembler* assembler = GetAssembler();
5520   LocationSummary* locations = invoke->GetLocations();
5521 
5522   // In Java sizeof(Char) is 2.
5523   constexpr size_t char_size = DataType::Size(DataType::Type::kUint16);
5524   static_assert(char_size == 2u);
5525 
5526   // Location of data in the destination char array buffer.
5527   const uint32_t array_data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
5528 
5529   // Location of char array data in the source string.
5530   const uint32_t string_value_offset = mirror::String::ValueOffset().Uint32Value();
5531 
5532   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
5533 
5534   // The source string.
5535   XRegister source_string_object = locations->InAt(0).AsRegister<XRegister>();
5536   // Index of the first character.
5537   XRegister source_begin_index = locations->InAt(1).AsRegister<XRegister>();
5538   // Index that immediately follows the last character.
5539   XRegister source_end_index = locations->InAt(2).AsRegister<XRegister>();
5540   // The destination array.
5541   XRegister destination_array_object = locations->InAt(3).AsRegister<XRegister>();
5542   // The start offset in the destination array.
5543   XRegister destination_begin_offset = locations->InAt(4).AsRegister<XRegister>();
5544 
5545   XRegister source_ptr = locations->GetTemp(0).AsRegister<XRegister>();
5546   XRegister destination_ptr = locations->GetTemp(1).AsRegister<XRegister>();
5547   XRegister number_of_chars = locations->GetTemp(2).AsRegister<XRegister>();
5548 
5549   ScratchRegisterScope temps(assembler);
5550   XRegister tmp = temps.AllocateXRegister();
5551 
5552   Riscv64Label done;
5553 
5554   // Calculate the length(number_of_chars) of the string.
5555   __ Subw(number_of_chars, source_end_index, source_begin_index);
5556 
5557   // If the string has zero length then exit.
5558   __ Beqz(number_of_chars, &done);
5559 
5560   // Prepare a register with the destination address
5561   // to start copying to the address:
5562   // 1. set the address from which the data in the
5563   //    destination array begins (destination_array_object + array_data_offset);
5564   __ Addi(destination_ptr, destination_array_object, array_data_offset);
5565   // 2. it is necessary to add the start offset relative to the beginning
5566   //    of the data in the destination array,
5567   //    yet, due to sizeof(Char) being 2, formerly scaling must be performed
5568   //    (destination_begin_offset * 2 that equals to destination_begin_offset << 1);
5569   __ Sh1Add(destination_ptr, destination_begin_offset, destination_ptr);
5570 
5571   // Prepare a register with the source address
5572   // to start copying from the address:
5573   // 1. set the address from which the data in the
5574   //    source string begins (source_string_object + string_value_offset).
5575   // Other manipulations will be performed later,
5576   // since they depend on whether the string is compressed or not.
5577   __ Addi(source_ptr, source_string_object, string_value_offset);
5578 
5579   // The string can be compressed. It is a way to store strings more compactly.
5580   // In this instance, every character is located in one byte (instead of two).
5581   Riscv64Label compressed_string_preloop;
5582 
5583   // Information about whether the string is compressed or not is located
5584   // in the area intended for storing the length of the string.
5585   // The least significant bit of the string's length is used
5586   // as the compression flag if STRING_COMPRESSION_ENABLED.
5587   if (mirror::kUseStringCompression) {
5588     // Location of count in string.
5589     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5590     // String's length.
5591     __ Loadwu(tmp, source_string_object, count_offset);
5592 
5593     // Checking the string for compression.
5594     // If so, move to the "compressed_string_preloop".
5595     __ Andi(tmp, tmp, 0x1);
5596     __ Beqz(tmp, &compressed_string_preloop);
5597   }
5598 
5599   // Continue preparing the source register:
5600   // proceed similarly to what was done for the destination register.
5601   __ Sh1Add(source_ptr, source_begin_index, source_ptr);
5602 
5603   // If the string is not compressed, then perform ordinary copying.
5604   // Copying will occur 4 characters (8 bytes) at a time, immediately after there are
5605   // less than 4 characters left, move to the "remainder_loop" and copy the remaining
5606   // characters one character (2 bytes) at a time.
5607   // Note: Unaligned addresses are acceptable here and it is not required to embed
5608   // additional code to correct them.
5609   Riscv64Label main_loop;
5610   Riscv64Label remainder_loop;
5611 
5612   // If initially there are less than 4 characters,
5613   // then we directly calculate the remainder.
5614   __ Addi(tmp, number_of_chars, -4);
5615   __ Bltz(tmp, &remainder_loop);
5616 
5617   // Otherwise, save the value to the counter and continue.
5618   __ Mv(number_of_chars, tmp);
5619 
5620   // Main loop. Loads and stores 4 16-bit Java characters at a time.
5621   __ Bind(&main_loop);
5622 
5623   __ Loadd(tmp, source_ptr, 0);
5624   __ Addi(source_ptr, source_ptr, char_size * 4);
5625   __ Stored(tmp, destination_ptr, 0);
5626   __ Addi(destination_ptr, destination_ptr, char_size * 4);
5627 
5628   __ Addi(number_of_chars, number_of_chars, -4);
5629 
5630   __ Bgez(number_of_chars, &main_loop);
5631 
5632   // Restore the previous counter value.
5633   __ Addi(number_of_chars, number_of_chars, 4);
5634   __ Beqz(number_of_chars, &done);
5635 
5636   // Remainder loop for < 4 characters case and remainder handling.
5637   // Loads and stores one 16-bit Java character at a time.
5638   __ Bind(&remainder_loop);
5639 
5640   __ Loadhu(tmp, source_ptr, 0);
5641   __ Addi(source_ptr, source_ptr, char_size);
5642 
5643   __ Storeh(tmp, destination_ptr, 0);
5644   __ Addi(destination_ptr, destination_ptr, char_size);
5645 
5646   __ Addi(number_of_chars, number_of_chars, -1);
5647   __ Bgtz(number_of_chars, &remainder_loop);
5648 
5649   Riscv64Label compressed_string_loop;
5650   if (mirror::kUseStringCompression) {
5651     __ J(&done);
5652 
5653     // Below is the copying under the string compression circumstance mentioned above.
5654     // Every character in the source string occupies only one byte (instead of two).
5655     constexpr size_t compressed_char_size = DataType::Size(DataType::Type::kInt8);
5656     static_assert(compressed_char_size == 1u);
5657 
5658     __ Bind(&compressed_string_preloop);
5659 
5660     // Continue preparing the source register:
5661     // proceed identically to what was done for the destination register,
5662     // yet take into account that only one byte yields for every source character,
5663     // hence we need to extend it to two ones when copying it to the destination address.
5664     // Against this background scaling for source_begin_index is not needed.
5665     __ Add(source_ptr, source_ptr, source_begin_index);
5666 
5667     // Copy loop for compressed strings. Copying one 8-bit character to 16-bit one at a time.
5668     __ Bind(&compressed_string_loop);
5669 
5670     __ Loadbu(tmp, source_ptr, 0);
5671     __ Addi(source_ptr, source_ptr, compressed_char_size);
5672     __ Storeh(tmp, destination_ptr, 0);
5673     __ Addi(destination_ptr, destination_ptr, char_size);
5674 
5675     __ Addi(number_of_chars, number_of_chars, -1);
5676     __ Bgtz(number_of_chars, &compressed_string_loop);
5677   }
5678 
5679   __ Bind(&done);
5680 }
5681 
GenMathSignum(CodeGeneratorRISCV64 * codegen,HInvoke * invoke,DataType::Type type)5682 void GenMathSignum(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) {
5683   LocationSummary* locations = invoke->GetLocations();
5684   DCHECK(locations->InAt(0).Equals(locations->Out()));
5685   FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
5686   Riscv64Assembler* assembler = codegen->GetAssembler();
5687   ScratchRegisterScope srs(assembler);
5688   XRegister tmp = srs.AllocateXRegister();
5689   FRegister ftmp = srs.AllocateFRegister();
5690   Riscv64Label done;
5691 
5692   if (type == DataType::Type::kFloat64) {
5693     // 0x3FF0000000000000L = 1.0
5694     __ Li(tmp, 0x3FF0000000000000L);
5695     __ FMvDX(ftmp, tmp);
5696     __ FClassD(tmp, in);
5697   } else {
5698     // 0x3f800000 = 1.0f
5699     __ Li(tmp, 0x3F800000);
5700     __ FMvWX(ftmp, tmp);
5701     __ FClassS(tmp, in);
5702   }
5703 
5704   __ Andi(tmp, tmp, kPositiveZero | kNegativeZero | kSignalingNaN | kQuietNaN);
5705   __ Bnez(tmp, &done);
5706 
5707   if (type == DataType::Type::kFloat64) {
5708     __ FSgnjD(in, ftmp, in);
5709   } else {
5710     __ FSgnjS(in, ftmp, in);
5711   }
5712 
5713   __ Bind(&done);
5714 }
5715 
VisitMathSignumDouble(HInvoke * invoke)5716 void IntrinsicLocationsBuilderRISCV64::VisitMathSignumDouble(HInvoke* invoke) {
5717   LocationSummary* locations =
5718       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5719   locations->SetInAt(0, Location::RequiresFpuRegister());
5720   locations->SetOut(Location::SameAsFirstInput());
5721 }
5722 
VisitMathSignumDouble(HInvoke * invoke)5723 void IntrinsicCodeGeneratorRISCV64::VisitMathSignumDouble(HInvoke* invoke) {
5724   GenMathSignum(codegen_, invoke, DataType::Type::kFloat64);
5725 }
5726 
VisitMathSignumFloat(HInvoke * invoke)5727 void IntrinsicLocationsBuilderRISCV64::VisitMathSignumFloat(HInvoke* invoke) {
5728   LocationSummary* locations =
5729       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5730   locations->SetInAt(0, Location::RequiresFpuRegister());
5731   locations->SetOut(Location::SameAsFirstInput());
5732 }
5733 
VisitMathSignumFloat(HInvoke * invoke)5734 void IntrinsicCodeGeneratorRISCV64::VisitMathSignumFloat(HInvoke* invoke) {
5735   GenMathSignum(codegen_, invoke, DataType::Type::kFloat32);
5736 }
5737 
GenMathCopySign(CodeGeneratorRISCV64 * codegen,HInvoke * invoke,DataType::Type type)5738 void GenMathCopySign(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) {
5739   Riscv64Assembler* assembler = codegen->GetAssembler();
5740   LocationSummary* locations = invoke->GetLocations();
5741   FRegister in0 = locations->InAt(0).AsFpuRegister<FRegister>();
5742   FRegister in1 = locations->InAt(1).AsFpuRegister<FRegister>();
5743   FRegister out = locations->Out().AsFpuRegister<FRegister>();
5744 
5745   if (type == DataType::Type::kFloat64) {
5746     __ FSgnjD(out, in0, in1);
5747   } else {
5748     __ FSgnjS(out, in0, in1);
5749   }
5750 }
5751 
VisitMathCopySignDouble(HInvoke * invoke)5752 void IntrinsicLocationsBuilderRISCV64::VisitMathCopySignDouble(HInvoke* invoke) {
5753   CreateFPFPToFPCallLocations(allocator_, invoke);
5754 }
5755 
VisitMathCopySignDouble(HInvoke * invoke)5756 void IntrinsicCodeGeneratorRISCV64::VisitMathCopySignDouble(HInvoke* invoke) {
5757   GenMathCopySign(codegen_, invoke, DataType::Type::kFloat64);
5758 }
5759 
VisitMathCopySignFloat(HInvoke * invoke)5760 void IntrinsicLocationsBuilderRISCV64::VisitMathCopySignFloat(HInvoke* invoke) {
5761   CreateFPFPToFPCallLocations(allocator_, invoke);
5762 }
5763 
VisitMathCopySignFloat(HInvoke * invoke)5764 void IntrinsicCodeGeneratorRISCV64::VisitMathCopySignFloat(HInvoke* invoke) {
5765   GenMathCopySign(codegen_, invoke, DataType::Type::kFloat32);
5766 }
5767 
VisitMethodHandleInvokeExact(HInvoke * invoke)5768 void IntrinsicLocationsBuilderRISCV64::VisitMethodHandleInvokeExact(HInvoke* invoke) {
5769   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
5770   LocationSummary* locations = new (allocator)
5771       LocationSummary(invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
5772 
5773   InvokeDexCallingConventionVisitorRISCV64 calling_convention;
5774   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
5775   locations->SetInAt(0, Location::RequiresRegister());
5776 
5777   // Accomodating LocationSummary for underlying invoke-* call.
5778   uint32_t number_of_args = invoke->GetNumberOfArguments();
5779   for (uint32_t i = 1; i < number_of_args; ++i) {
5780     locations->SetInAt(i, calling_convention.GetNextLocation(invoke->InputAt(i)->GetType()));
5781   }
5782 
5783   // The last input is MethodType object corresponding to the call-site.
5784   locations->SetInAt(number_of_args, Location::RequiresRegister());
5785 
5786   locations->AddTemp(calling_convention.GetMethodLocation());
5787   locations->AddRegisterTemps(2);
5788 }
5789 
VisitMethodHandleInvokeExact(HInvoke * invoke)5790 void IntrinsicCodeGeneratorRISCV64::VisitMethodHandleInvokeExact(HInvoke* invoke) {
5791   LocationSummary* locations = invoke->GetLocations();
5792   XRegister method_handle = locations->InAt(0).AsRegister<XRegister>();
5793   SlowPathCodeRISCV64* slow_path =
5794       new (codegen_->GetScopedAllocator()) InvokePolymorphicSlowPathRISCV64(invoke, method_handle);
5795 
5796   codegen_->AddSlowPath(slow_path);
5797   Riscv64Assembler* assembler = GetAssembler();
5798   XRegister call_site_type =
5799       locations->InAt(invoke->GetNumberOfArguments()).AsRegister<XRegister>();
5800 
5801   // Call site should match with MethodHandle's type.
5802   XRegister temp = locations->GetTemp(1).AsRegister<XRegister>();
5803   __ Loadwu(temp, method_handle, mirror::MethodHandle::MethodTypeOffset().Int32Value());
5804   codegen_->MaybeUnpoisonHeapReference(temp);
5805   __ Bne(call_site_type, temp, slow_path->GetEntryLabel());
5806 
5807   XRegister method = locations->GetTemp(0).AsRegister<XRegister>();
5808   __ Loadd(method, method_handle, mirror::MethodHandle::ArtFieldOrMethodOffset().Int32Value());
5809 
5810   Riscv64Label execute_target_method;
5811 
5812   XRegister method_handle_kind = locations->GetTemp(2).AsRegister<XRegister>();
5813   __ Loadd(method_handle_kind,
5814            method_handle, mirror::MethodHandle::HandleKindOffset().Int32Value());
5815   __ Li(temp, mirror::MethodHandle::Kind::kInvokeStatic);
5816   __ Beq(method_handle_kind, temp, &execute_target_method);
5817 
5818   if (invoke->AsInvokePolymorphic()->CanTargetInstanceMethod()) {
5819     XRegister receiver = locations->InAt(1).AsRegister<XRegister>();
5820 
5821     // Receiver shouldn't be null for all the following cases.
5822     __ Beqz(receiver, slow_path->GetEntryLabel());
5823 
5824     __ Li(temp, mirror::MethodHandle::Kind::kInvokeDirect);
5825     // No dispatch is needed for invoke-direct.
5826     __ Beq(method_handle_kind, temp, &execute_target_method);
5827 
5828     Riscv64Label non_virtual_dispatch;
5829     __ Li(temp, mirror::MethodHandle::Kind::kInvokeVirtual);
5830     __ Bne(method_handle_kind, temp, &non_virtual_dispatch);
5831 
5832     // Skip virtual dispatch if `method` is private.
5833     __ Loadd(temp, method, ArtMethod::AccessFlagsOffset().Int32Value());
5834     __ Andi(temp, temp, kAccPrivate);
5835     __ Bnez(temp, &execute_target_method);
5836 
5837     XRegister receiver_class = locations->GetTemp(2).AsRegister<XRegister>();
5838     // If method is defined in the receiver's class, execute it as it is.
5839     __ Loadd(temp, method, ArtMethod::DeclaringClassOffset().Int32Value());
5840     __ Loadd(receiver_class, receiver, mirror::Object::ClassOffset().Int32Value());
5841     codegen_->MaybeUnpoisonHeapReference(receiver_class);
5842 
5843     // We're not emitting the read barrier for the receiver_class, so false negatives just go
5844     // through the virtual dispath below.
5845     __ Beq(temp, receiver_class, &execute_target_method);
5846 
5847     // MethodIndex is uint16_t.
5848     __ Loadhu(temp, method, ArtMethod::MethodIndexOffset().Int32Value());
5849 
5850     constexpr uint32_t vtable_offset =
5851         mirror::Class::EmbeddedVTableOffset(art::PointerSize::k64).Int32Value();
5852     __ Sh3Add(temp, temp, receiver_class);
5853     __ Loadd(method, temp, vtable_offset);
5854     __ J(&execute_target_method);
5855     __ Bind(&non_virtual_dispatch);
5856   }
5857 
5858   // Checks above are jumping to `execute_target_method` is they succeed. If none match, try to
5859   // handle in the slow path.
5860   __ J(slow_path->GetEntryLabel());
5861 
5862   __ Bind(&execute_target_method);
5863   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kRiscv64PointerSize);
5864   __ Loadd(RA, method, entry_point.SizeValue());
5865   __ Jalr(RA);
5866   codegen_->RecordPcInfo(invoke, slow_path);
5867   __ Bind(slow_path->GetExitLabel());
5868 }
5869 
5870 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(RISCV64, Name)
5871 UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(MARK_UNIMPLEMENTED);
5872 #undef MARK_UNIMPLEMENTED
5873 
5874 UNREACHABLE_INTRINSICS(RISCV64)
5875 
5876 }  // namespace riscv64
5877 }  // namespace art
5878