1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/compiler/code-generator.h"
6
7 #include "src/arm/macro-assembler-arm.h"
8 #include "src/assembler-inl.h"
9 #include "src/boxed-float.h"
10 #include "src/compiler/code-generator-impl.h"
11 #include "src/compiler/gap-resolver.h"
12 #include "src/compiler/node-matchers.h"
13 #include "src/compiler/osr.h"
14 #include "src/double.h"
15 #include "src/heap/heap-inl.h"
16 #include "src/optimized-compilation-info.h"
17 #include "src/wasm/wasm-objects.h"
18
19 namespace v8 {
20 namespace internal {
21 namespace compiler {
22
23 #define __ tasm()->
24
25 // Adds Arm-specific methods to convert InstructionOperands.
26 class ArmOperandConverter final : public InstructionOperandConverter {
27 public:
ArmOperandConverter(CodeGenerator * gen,Instruction * instr)28 ArmOperandConverter(CodeGenerator* gen, Instruction* instr)
29 : InstructionOperandConverter(gen, instr) {}
30
OutputSBit() const31 SBit OutputSBit() const {
32 switch (instr_->flags_mode()) {
33 case kFlags_branch:
34 case kFlags_branch_and_poison:
35 case kFlags_deoptimize:
36 case kFlags_deoptimize_and_poison:
37 case kFlags_set:
38 case kFlags_trap:
39 return SetCC;
40 case kFlags_none:
41 return LeaveCC;
42 }
43 UNREACHABLE();
44 }
45
InputImmediate(size_t index)46 Operand InputImmediate(size_t index) {
47 return ToImmediate(instr_->InputAt(index));
48 }
49
InputOperand2(size_t first_index)50 Operand InputOperand2(size_t first_index) {
51 const size_t index = first_index;
52 switch (AddressingModeField::decode(instr_->opcode())) {
53 case kMode_None:
54 case kMode_Offset_RI:
55 case kMode_Offset_RR:
56 break;
57 case kMode_Operand2_I:
58 return InputImmediate(index + 0);
59 case kMode_Operand2_R:
60 return Operand(InputRegister(index + 0));
61 case kMode_Operand2_R_ASR_I:
62 return Operand(InputRegister(index + 0), ASR, InputInt5(index + 1));
63 case kMode_Operand2_R_ASR_R:
64 return Operand(InputRegister(index + 0), ASR, InputRegister(index + 1));
65 case kMode_Operand2_R_LSL_I:
66 return Operand(InputRegister(index + 0), LSL, InputInt5(index + 1));
67 case kMode_Operand2_R_LSL_R:
68 return Operand(InputRegister(index + 0), LSL, InputRegister(index + 1));
69 case kMode_Operand2_R_LSR_I:
70 return Operand(InputRegister(index + 0), LSR, InputInt5(index + 1));
71 case kMode_Operand2_R_LSR_R:
72 return Operand(InputRegister(index + 0), LSR, InputRegister(index + 1));
73 case kMode_Operand2_R_ROR_I:
74 return Operand(InputRegister(index + 0), ROR, InputInt5(index + 1));
75 case kMode_Operand2_R_ROR_R:
76 return Operand(InputRegister(index + 0), ROR, InputRegister(index + 1));
77 }
78 UNREACHABLE();
79 }
80
InputOffset(size_t * first_index)81 MemOperand InputOffset(size_t* first_index) {
82 const size_t index = *first_index;
83 switch (AddressingModeField::decode(instr_->opcode())) {
84 case kMode_None:
85 case kMode_Operand2_I:
86 case kMode_Operand2_R:
87 case kMode_Operand2_R_ASR_I:
88 case kMode_Operand2_R_ASR_R:
89 case kMode_Operand2_R_LSL_R:
90 case kMode_Operand2_R_LSR_I:
91 case kMode_Operand2_R_LSR_R:
92 case kMode_Operand2_R_ROR_I:
93 case kMode_Operand2_R_ROR_R:
94 break;
95 case kMode_Operand2_R_LSL_I:
96 *first_index += 3;
97 return MemOperand(InputRegister(index + 0), InputRegister(index + 1),
98 LSL, InputInt32(index + 2));
99 case kMode_Offset_RI:
100 *first_index += 2;
101 return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
102 case kMode_Offset_RR:
103 *first_index += 2;
104 return MemOperand(InputRegister(index + 0), InputRegister(index + 1));
105 }
106 UNREACHABLE();
107 }
108
InputOffset(size_t first_index=0)109 MemOperand InputOffset(size_t first_index = 0) {
110 return InputOffset(&first_index);
111 }
112
ToImmediate(InstructionOperand * operand)113 Operand ToImmediate(InstructionOperand* operand) {
114 Constant constant = ToConstant(operand);
115 switch (constant.type()) {
116 case Constant::kInt32:
117 if (RelocInfo::IsWasmReference(constant.rmode())) {
118 return Operand(constant.ToInt32(), constant.rmode());
119 } else {
120 return Operand(constant.ToInt32());
121 }
122 case Constant::kFloat32:
123 return Operand::EmbeddedNumber(constant.ToFloat32());
124 case Constant::kFloat64:
125 return Operand::EmbeddedNumber(constant.ToFloat64().value());
126 case Constant::kExternalReference:
127 return Operand(constant.ToExternalReference());
128 case Constant::kInt64:
129 case Constant::kHeapObject:
130 // TODO(dcarney): loading RPO constants on arm.
131 case Constant::kRpoNumber:
132 break;
133 }
134 UNREACHABLE();
135 }
136
ToMemOperand(InstructionOperand * op) const137 MemOperand ToMemOperand(InstructionOperand* op) const {
138 DCHECK_NOT_NULL(op);
139 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
140 return SlotToMemOperand(AllocatedOperand::cast(op)->index());
141 }
142
SlotToMemOperand(int slot) const143 MemOperand SlotToMemOperand(int slot) const {
144 FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
145 return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
146 }
147
NeonInputOperand(size_t first_index)148 NeonMemOperand NeonInputOperand(size_t first_index) {
149 const size_t index = first_index;
150 switch (AddressingModeField::decode(instr_->opcode())) {
151 case kMode_Offset_RR:
152 return NeonMemOperand(InputRegister(index + 0),
153 InputRegister(index + 1));
154 case kMode_Operand2_R:
155 return NeonMemOperand(InputRegister(index + 0));
156 default:
157 break;
158 }
159 UNREACHABLE();
160 }
161 };
162
163 namespace {
164
165 class OutOfLineRecordWrite final : public OutOfLineCode {
166 public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Register index,Register value,Register scratch0,Register scratch1,RecordWriteMode mode,UnwindingInfoWriter * unwinding_info_writer)167 OutOfLineRecordWrite(CodeGenerator* gen, Register object, Register index,
168 Register value, Register scratch0, Register scratch1,
169 RecordWriteMode mode,
170 UnwindingInfoWriter* unwinding_info_writer)
171 : OutOfLineCode(gen),
172 object_(object),
173 index_(index),
174 index_immediate_(0),
175 value_(value),
176 scratch0_(scratch0),
177 scratch1_(scratch1),
178 mode_(mode),
179 must_save_lr_(!gen->frame_access_state()->has_frame()),
180 unwinding_info_writer_(unwinding_info_writer),
181 zone_(gen->zone()) {}
182
OutOfLineRecordWrite(CodeGenerator * gen,Register object,int32_t index,Register value,Register scratch0,Register scratch1,RecordWriteMode mode,UnwindingInfoWriter * unwinding_info_writer)183 OutOfLineRecordWrite(CodeGenerator* gen, Register object, int32_t index,
184 Register value, Register scratch0, Register scratch1,
185 RecordWriteMode mode,
186 UnwindingInfoWriter* unwinding_info_writer)
187 : OutOfLineCode(gen),
188 object_(object),
189 index_(no_reg),
190 index_immediate_(index),
191 value_(value),
192 scratch0_(scratch0),
193 scratch1_(scratch1),
194 mode_(mode),
195 must_save_lr_(!gen->frame_access_state()->has_frame()),
196 unwinding_info_writer_(unwinding_info_writer),
197 zone_(gen->zone()) {}
198
Generate()199 void Generate() final {
200 if (mode_ > RecordWriteMode::kValueIsPointer) {
201 __ JumpIfSmi(value_, exit());
202 }
203 __ CheckPageFlag(value_, scratch0_,
204 MemoryChunk::kPointersToHereAreInterestingMask, eq,
205 exit());
206 if (index_ == no_reg) {
207 __ add(scratch1_, object_, Operand(index_immediate_));
208 } else {
209 DCHECK_EQ(0, index_immediate_);
210 __ add(scratch1_, object_, Operand(index_));
211 }
212 RememberedSetAction const remembered_set_action =
213 mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
214 : OMIT_REMEMBERED_SET;
215 SaveFPRegsMode const save_fp_mode =
216 frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
217 if (must_save_lr_) {
218 // We need to save and restore lr if the frame was elided.
219 __ Push(lr);
220 unwinding_info_writer_->MarkLinkRegisterOnTopOfStack(__ pc_offset());
221 }
222 __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
223 save_fp_mode);
224 if (must_save_lr_) {
225 __ Pop(lr);
226 unwinding_info_writer_->MarkPopLinkRegisterFromTopOfStack(__ pc_offset());
227 }
228 }
229
230 private:
231 Register const object_;
232 Register const index_;
233 int32_t const index_immediate_; // Valid if index_==no_reg.
234 Register const value_;
235 Register const scratch0_;
236 Register const scratch1_;
237 RecordWriteMode const mode_;
238 bool must_save_lr_;
239 UnwindingInfoWriter* const unwinding_info_writer_;
240 Zone* zone_;
241 };
242
243 template <typename T>
244 class OutOfLineFloatMin final : public OutOfLineCode {
245 public:
OutOfLineFloatMin(CodeGenerator * gen,T result,T left,T right)246 OutOfLineFloatMin(CodeGenerator* gen, T result, T left, T right)
247 : OutOfLineCode(gen), result_(result), left_(left), right_(right) {}
248
Generate()249 void Generate() final { __ FloatMinOutOfLine(result_, left_, right_); }
250
251 private:
252 T const result_;
253 T const left_;
254 T const right_;
255 };
256 typedef OutOfLineFloatMin<SwVfpRegister> OutOfLineFloat32Min;
257 typedef OutOfLineFloatMin<DwVfpRegister> OutOfLineFloat64Min;
258
259 template <typename T>
260 class OutOfLineFloatMax final : public OutOfLineCode {
261 public:
OutOfLineFloatMax(CodeGenerator * gen,T result,T left,T right)262 OutOfLineFloatMax(CodeGenerator* gen, T result, T left, T right)
263 : OutOfLineCode(gen), result_(result), left_(left), right_(right) {}
264
Generate()265 void Generate() final { __ FloatMaxOutOfLine(result_, left_, right_); }
266
267 private:
268 T const result_;
269 T const left_;
270 T const right_;
271 };
272 typedef OutOfLineFloatMax<SwVfpRegister> OutOfLineFloat32Max;
273 typedef OutOfLineFloatMax<DwVfpRegister> OutOfLineFloat64Max;
274
FlagsConditionToCondition(FlagsCondition condition)275 Condition FlagsConditionToCondition(FlagsCondition condition) {
276 switch (condition) {
277 case kEqual:
278 return eq;
279 case kNotEqual:
280 return ne;
281 case kSignedLessThan:
282 return lt;
283 case kSignedGreaterThanOrEqual:
284 return ge;
285 case kSignedLessThanOrEqual:
286 return le;
287 case kSignedGreaterThan:
288 return gt;
289 case kUnsignedLessThan:
290 return lo;
291 case kUnsignedGreaterThanOrEqual:
292 return hs;
293 case kUnsignedLessThanOrEqual:
294 return ls;
295 case kUnsignedGreaterThan:
296 return hi;
297 case kFloatLessThanOrUnordered:
298 return lt;
299 case kFloatGreaterThanOrEqual:
300 return ge;
301 case kFloatLessThanOrEqual:
302 return ls;
303 case kFloatGreaterThanOrUnordered:
304 return hi;
305 case kFloatLessThan:
306 return lo;
307 case kFloatGreaterThanOrEqualOrUnordered:
308 return hs;
309 case kFloatLessThanOrEqualOrUnordered:
310 return le;
311 case kFloatGreaterThan:
312 return gt;
313 case kOverflow:
314 return vs;
315 case kNotOverflow:
316 return vc;
317 case kPositiveOrZero:
318 return pl;
319 case kNegative:
320 return mi;
321 default:
322 break;
323 }
324 UNREACHABLE();
325 }
326
EmitWordLoadPoisoningIfNeeded(CodeGenerator * codegen,InstructionCode opcode,ArmOperandConverter & i)327 void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
328 InstructionCode opcode,
329 ArmOperandConverter& i) {
330 const MemoryAccessMode access_mode =
331 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
332 if (access_mode == kMemoryAccessPoisoned) {
333 Register value = i.OutputRegister();
334 codegen->tasm()->and_(value, value, Operand(kSpeculationPoisonRegister));
335 }
336 }
337
ComputePoisonedAddressForLoad(CodeGenerator * codegen,InstructionCode opcode,ArmOperandConverter & i,Register address)338 void ComputePoisonedAddressForLoad(CodeGenerator* codegen,
339 InstructionCode opcode,
340 ArmOperandConverter& i, Register address) {
341 DCHECK_EQ(kMemoryAccessPoisoned,
342 static_cast<MemoryAccessMode>(MiscField::decode(opcode)));
343 switch (AddressingModeField::decode(opcode)) {
344 case kMode_Offset_RI:
345 codegen->tasm()->mov(address, i.InputImmediate(1));
346 codegen->tasm()->add(address, address, i.InputRegister(0));
347 break;
348 case kMode_Offset_RR:
349 codegen->tasm()->add(address, i.InputRegister(0), i.InputRegister(1));
350 break;
351 default:
352 UNREACHABLE();
353 }
354 codegen->tasm()->and_(address, address, Operand(kSpeculationPoisonRegister));
355 }
356
357 } // namespace
358
359 #define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr) \
360 do { \
361 __ asm_instr(i.OutputRegister(), \
362 MemOperand(i.InputRegister(0), i.InputRegister(1))); \
363 __ dmb(ISH); \
364 } while (0)
365
366 #define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr) \
367 do { \
368 __ dmb(ISH); \
369 __ asm_instr(i.InputRegister(2), \
370 MemOperand(i.InputRegister(0), i.InputRegister(1))); \
371 __ dmb(ISH); \
372 } while (0)
373
374 #define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_instr, store_instr) \
375 do { \
376 Label exchange; \
377 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); \
378 __ dmb(ISH); \
379 __ bind(&exchange); \
380 __ load_instr(i.OutputRegister(0), i.TempRegister(1)); \
381 __ store_instr(i.TempRegister(0), i.InputRegister(2), i.TempRegister(1)); \
382 __ teq(i.TempRegister(0), Operand(0)); \
383 __ b(ne, &exchange); \
384 __ dmb(ISH); \
385 } while (0)
386
387 #define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_instr, store_instr, \
388 cmp_reg) \
389 do { \
390 Label compareExchange; \
391 Label exit; \
392 __ dmb(ISH); \
393 __ bind(&compareExchange); \
394 __ load_instr(i.OutputRegister(0), i.TempRegister(1)); \
395 __ teq(cmp_reg, Operand(i.OutputRegister(0))); \
396 __ b(ne, &exit); \
397 __ store_instr(i.TempRegister(0), i.InputRegister(3), i.TempRegister(1)); \
398 __ teq(i.TempRegister(0), Operand(0)); \
399 __ b(ne, &compareExchange); \
400 __ bind(&exit); \
401 __ dmb(ISH); \
402 } while (0)
403
404 #define ASSEMBLE_ATOMIC_BINOP(load_instr, store_instr, bin_instr) \
405 do { \
406 Label binop; \
407 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); \
408 __ dmb(ISH); \
409 __ bind(&binop); \
410 __ load_instr(i.OutputRegister(0), i.TempRegister(1)); \
411 __ bin_instr(i.TempRegister(0), i.OutputRegister(0), \
412 Operand(i.InputRegister(2))); \
413 __ store_instr(i.TempRegister(2), i.TempRegister(0), i.TempRegister(1)); \
414 __ teq(i.TempRegister(2), Operand(0)); \
415 __ b(ne, &binop); \
416 __ dmb(ISH); \
417 } while (0)
418
419 #define ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2) \
420 do { \
421 Label binop; \
422 __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3)); \
423 __ dmb(ISH); \
424 __ bind(&binop); \
425 __ ldrexd(i.OutputRegister(0), i.OutputRegister(1), i.TempRegister(0)); \
426 __ instr1(i.TempRegister(1), i.OutputRegister(0), i.InputRegister(0), \
427 SBit::SetCC); \
428 __ instr2(i.TempRegister(2), i.OutputRegister(1), \
429 Operand(i.InputRegister(1))); \
430 DCHECK_EQ(LeaveCC, i.OutputSBit()); \
431 __ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2), \
432 i.TempRegister(0)); \
433 __ teq(i.TempRegister(3), Operand(0)); \
434 __ b(ne, &binop); \
435 __ dmb(ISH); \
436 } while (0)
437
438 #define ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr) \
439 do { \
440 Label binop; \
441 __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3)); \
442 __ dmb(ISH); \
443 __ bind(&binop); \
444 __ ldrexd(i.OutputRegister(0), i.OutputRegister(1), i.TempRegister(0)); \
445 __ instr(i.TempRegister(1), i.OutputRegister(0), \
446 Operand(i.InputRegister(0))); \
447 __ instr(i.TempRegister(2), i.OutputRegister(1), \
448 Operand(i.InputRegister(1))); \
449 __ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2), \
450 i.TempRegister(0)); \
451 __ teq(i.TempRegister(3), Operand(0)); \
452 __ b(ne, &binop); \
453 __ dmb(ISH); \
454 } while (0)
455
456 #define ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(op) \
457 if (arch_opcode == kArmWord64AtomicNarrow##op) { \
458 __ mov(i.OutputRegister(1), Operand(0)); \
459 }
460
461 #define ASSEMBLE_IEEE754_BINOP(name) \
462 do { \
463 /* TODO(bmeurer): We should really get rid of this special instruction, */ \
464 /* and generate a CallAddress instruction instead. */ \
465 FrameScope scope(tasm(), StackFrame::MANUAL); \
466 __ PrepareCallCFunction(0, 2); \
467 __ MovToFloatParameters(i.InputDoubleRegister(0), \
468 i.InputDoubleRegister(1)); \
469 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2); \
470 /* Move the result in the double result register. */ \
471 __ MovFromFloatResult(i.OutputDoubleRegister()); \
472 DCHECK_EQ(LeaveCC, i.OutputSBit()); \
473 } while (0)
474
475 #define ASSEMBLE_IEEE754_UNOP(name) \
476 do { \
477 /* TODO(bmeurer): We should really get rid of this special instruction, */ \
478 /* and generate a CallAddress instruction instead. */ \
479 FrameScope scope(tasm(), StackFrame::MANUAL); \
480 __ PrepareCallCFunction(0, 1); \
481 __ MovToFloatParameter(i.InputDoubleRegister(0)); \
482 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1); \
483 /* Move the result in the double result register. */ \
484 __ MovFromFloatResult(i.OutputDoubleRegister()); \
485 DCHECK_EQ(LeaveCC, i.OutputSBit()); \
486 } while (0)
487
488 #define ASSEMBLE_NEON_NARROWING_OP(dt) \
489 do { \
490 Simd128Register dst = i.OutputSimd128Register(), \
491 src0 = i.InputSimd128Register(0), \
492 src1 = i.InputSimd128Register(1); \
493 if (dst == src0 && dst == src1) { \
494 __ vqmovn(dt, dst.low(), src0); \
495 __ vmov(dst.high(), dst.low()); \
496 } else if (dst == src0) { \
497 __ vqmovn(dt, dst.low(), src0); \
498 __ vqmovn(dt, dst.high(), src1); \
499 } else { \
500 __ vqmovn(dt, dst.high(), src1); \
501 __ vqmovn(dt, dst.low(), src0); \
502 } \
503 } while (0)
504
505 #define ASSEMBLE_NEON_PAIRWISE_OP(op, size) \
506 do { \
507 Simd128Register dst = i.OutputSimd128Register(), \
508 src0 = i.InputSimd128Register(0), \
509 src1 = i.InputSimd128Register(1); \
510 if (dst == src0) { \
511 __ op(size, dst.low(), src0.low(), src0.high()); \
512 if (dst == src1) { \
513 __ vmov(dst.high(), dst.low()); \
514 } else { \
515 __ op(size, dst.high(), src1.low(), src1.high()); \
516 } \
517 } else { \
518 __ op(size, dst.high(), src1.low(), src1.high()); \
519 __ op(size, dst.low(), src0.low(), src0.high()); \
520 } \
521 } while (0)
522
AssembleDeconstructFrame()523 void CodeGenerator::AssembleDeconstructFrame() {
524 __ LeaveFrame(StackFrame::MANUAL);
525 unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
526 }
527
AssemblePrepareTailCall()528 void CodeGenerator::AssemblePrepareTailCall() {
529 if (frame_access_state()->has_frame()) {
530 __ ldr(lr, MemOperand(fp, StandardFrameConstants::kCallerPCOffset));
531 __ ldr(fp, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
532 }
533 frame_access_state()->SetFrameAccessToSP();
534 }
535
AssemblePopArgumentsAdaptorFrame(Register args_reg,Register scratch1,Register scratch2,Register scratch3)536 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
537 Register scratch1,
538 Register scratch2,
539 Register scratch3) {
540 DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
541 Label done;
542
543 // Check if current frame is an arguments adaptor frame.
544 __ ldr(scratch1, MemOperand(fp, StandardFrameConstants::kContextOffset));
545 __ cmp(scratch1,
546 Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
547 __ b(ne, &done);
548
549 // Load arguments count from current arguments adaptor frame (note, it
550 // does not include receiver).
551 Register caller_args_count_reg = scratch1;
552 __ ldr(caller_args_count_reg,
553 MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset));
554 __ SmiUntag(caller_args_count_reg);
555
556 ParameterCount callee_args_count(args_reg);
557 __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
558 scratch3);
559 __ bind(&done);
560 }
561
562 namespace {
563
FlushPendingPushRegisters(TurboAssembler * tasm,FrameAccessState * frame_access_state,ZoneVector<Register> * pending_pushes)564 void FlushPendingPushRegisters(TurboAssembler* tasm,
565 FrameAccessState* frame_access_state,
566 ZoneVector<Register>* pending_pushes) {
567 switch (pending_pushes->size()) {
568 case 0:
569 break;
570 case 1:
571 tasm->push((*pending_pushes)[0]);
572 break;
573 case 2:
574 tasm->Push((*pending_pushes)[0], (*pending_pushes)[1]);
575 break;
576 case 3:
577 tasm->Push((*pending_pushes)[0], (*pending_pushes)[1],
578 (*pending_pushes)[2]);
579 break;
580 default:
581 UNREACHABLE();
582 break;
583 }
584 frame_access_state->IncreaseSPDelta(pending_pushes->size());
585 pending_pushes->clear();
586 }
587
AdjustStackPointerForTailCall(TurboAssembler * tasm,FrameAccessState * state,int new_slot_above_sp,ZoneVector<Register> * pending_pushes=nullptr,bool allow_shrinkage=true)588 void AdjustStackPointerForTailCall(
589 TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp,
590 ZoneVector<Register>* pending_pushes = nullptr,
591 bool allow_shrinkage = true) {
592 int current_sp_offset = state->GetSPToFPSlotCount() +
593 StandardFrameConstants::kFixedSlotCountAboveFp;
594 int stack_slot_delta = new_slot_above_sp - current_sp_offset;
595 if (stack_slot_delta > 0) {
596 if (pending_pushes != nullptr) {
597 FlushPendingPushRegisters(tasm, state, pending_pushes);
598 }
599 tasm->sub(sp, sp, Operand(stack_slot_delta * kPointerSize));
600 state->IncreaseSPDelta(stack_slot_delta);
601 } else if (allow_shrinkage && stack_slot_delta < 0) {
602 if (pending_pushes != nullptr) {
603 FlushPendingPushRegisters(tasm, state, pending_pushes);
604 }
605 tasm->add(sp, sp, Operand(-stack_slot_delta * kPointerSize));
606 state->IncreaseSPDelta(stack_slot_delta);
607 }
608 }
609
610 } // namespace
611
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_stack_slot)612 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
613 int first_unused_stack_slot) {
614 ZoneVector<MoveOperands*> pushes(zone());
615 GetPushCompatibleMoves(instr, kRegisterPush, &pushes);
616
617 if (!pushes.empty() &&
618 (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
619 first_unused_stack_slot)) {
620 ArmOperandConverter g(this, instr);
621 ZoneVector<Register> pending_pushes(zone());
622 for (auto move : pushes) {
623 LocationOperand destination_location(
624 LocationOperand::cast(move->destination()));
625 InstructionOperand source(move->source());
626 AdjustStackPointerForTailCall(
627 tasm(), frame_access_state(),
628 destination_location.index() - pending_pushes.size(),
629 &pending_pushes);
630 // Pushes of non-register data types are not supported.
631 DCHECK(source.IsRegister());
632 LocationOperand source_location(LocationOperand::cast(source));
633 pending_pushes.push_back(source_location.GetRegister());
634 // TODO(arm): We can push more than 3 registers at once. Add support in
635 // the macro-assembler for pushing a list of registers.
636 if (pending_pushes.size() == 3) {
637 FlushPendingPushRegisters(tasm(), frame_access_state(),
638 &pending_pushes);
639 }
640 move->Eliminate();
641 }
642 FlushPendingPushRegisters(tasm(), frame_access_state(), &pending_pushes);
643 }
644 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
645 first_unused_stack_slot, nullptr, false);
646 }
647
AssembleTailCallAfterGap(Instruction * instr,int first_unused_stack_slot)648 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
649 int first_unused_stack_slot) {
650 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
651 first_unused_stack_slot);
652 }
653
654 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()655 void CodeGenerator::AssembleCodeStartRegisterCheck() {
656 UseScratchRegisterScope temps(tasm());
657 Register scratch = temps.Acquire();
658 __ ComputeCodeStartAddress(scratch);
659 __ cmp(scratch, kJavaScriptCallCodeStartRegister);
660 __ Assert(eq, AbortReason::kWrongFunctionCodeStart);
661 }
662
663 // Check if the code object is marked for deoptimization. If it is, then it
664 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
665 // to:
666 // 1. read from memory the word that contains that bit, which can be found in
667 // the flags in the referenced {CodeDataContainer} object;
668 // 2. test kMarkedForDeoptimizationBit in those flags; and
669 // 3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()670 void CodeGenerator::BailoutIfDeoptimized() {
671 UseScratchRegisterScope temps(tasm());
672 Register scratch = temps.Acquire();
673 int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
674 __ ldr(scratch, MemOperand(kJavaScriptCallCodeStartRegister, offset));
675 __ ldr(scratch,
676 FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset));
677 __ tst(scratch, Operand(1 << Code::kMarkedForDeoptimizationBit));
678 // Ensure we're not serializing (otherwise we'd need to use an indirection to
679 // access the builtin below).
680 DCHECK(!isolate()->ShouldLoadConstantsFromRootList());
681 Handle<Code> code = isolate()->builtins()->builtin_handle(
682 Builtins::kCompileLazyDeoptimizedCode);
683 __ Jump(code, RelocInfo::CODE_TARGET, ne);
684 }
685
GenerateSpeculationPoisonFromCodeStartRegister()686 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
687 UseScratchRegisterScope temps(tasm());
688 Register scratch = temps.Acquire();
689
690 // Set a mask which has all bits set in the normal case, but has all
691 // bits cleared if we are speculatively executing the wrong PC.
692 __ ComputeCodeStartAddress(scratch);
693 __ cmp(kJavaScriptCallCodeStartRegister, scratch);
694 __ mov(kSpeculationPoisonRegister, Operand(-1), SBit::LeaveCC, eq);
695 __ mov(kSpeculationPoisonRegister, Operand(0), SBit::LeaveCC, ne);
696 __ csdb();
697 }
698
AssembleRegisterArgumentPoisoning()699 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
700 __ and_(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister);
701 __ and_(kContextRegister, kContextRegister, kSpeculationPoisonRegister);
702 __ and_(sp, sp, kSpeculationPoisonRegister);
703 }
704
705 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)706 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
707 Instruction* instr) {
708 ArmOperandConverter i(this, instr);
709
710 __ MaybeCheckConstPool();
711 InstructionCode opcode = instr->opcode();
712 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
713 switch (arch_opcode) {
714 case kArchCallCodeObject: {
715 if (instr->InputAt(0)->IsImmediate()) {
716 __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
717 } else {
718 Register reg = i.InputRegister(0);
719 DCHECK_IMPLIES(
720 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
721 reg == kJavaScriptCallCodeStartRegister);
722 __ add(reg, reg, Operand(Code::kHeaderSize - kHeapObjectTag));
723 __ Call(reg);
724 }
725 RecordCallPosition(instr);
726 DCHECK_EQ(LeaveCC, i.OutputSBit());
727 frame_access_state()->ClearSPDelta();
728 break;
729 }
730 case kArchCallWasmFunction: {
731 if (instr->InputAt(0)->IsImmediate()) {
732 Constant constant = i.ToConstant(instr->InputAt(0));
733 Address wasm_code = static_cast<Address>(constant.ToInt32());
734 __ Call(wasm_code, constant.rmode());
735 } else {
736 __ Call(i.InputRegister(0));
737 }
738 RecordCallPosition(instr);
739 DCHECK_EQ(LeaveCC, i.OutputSBit());
740 frame_access_state()->ClearSPDelta();
741 break;
742 }
743 case kArchTailCallCodeObjectFromJSFunction:
744 case kArchTailCallCodeObject: {
745 if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
746 AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
747 i.TempRegister(0), i.TempRegister(1),
748 i.TempRegister(2));
749 }
750 if (instr->InputAt(0)->IsImmediate()) {
751 __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
752 } else {
753 Register reg = i.InputRegister(0);
754 DCHECK_IMPLIES(
755 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
756 reg == kJavaScriptCallCodeStartRegister);
757 __ add(reg, reg, Operand(Code::kHeaderSize - kHeapObjectTag));
758 __ Jump(reg);
759 }
760 DCHECK_EQ(LeaveCC, i.OutputSBit());
761 unwinding_info_writer_.MarkBlockWillExit();
762 frame_access_state()->ClearSPDelta();
763 frame_access_state()->SetFrameAccessToDefault();
764 break;
765 }
766 case kArchTailCallWasm: {
767 if (instr->InputAt(0)->IsImmediate()) {
768 Constant constant = i.ToConstant(instr->InputAt(0));
769 Address wasm_code = static_cast<Address>(constant.ToInt32());
770 __ Jump(wasm_code, constant.rmode());
771 } else {
772 __ Jump(i.InputRegister(0));
773 }
774 DCHECK_EQ(LeaveCC, i.OutputSBit());
775 unwinding_info_writer_.MarkBlockWillExit();
776 frame_access_state()->ClearSPDelta();
777 frame_access_state()->SetFrameAccessToDefault();
778 break;
779 }
780 case kArchTailCallAddress: {
781 CHECK(!instr->InputAt(0)->IsImmediate());
782 Register reg = i.InputRegister(0);
783 DCHECK_IMPLIES(
784 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
785 reg == kJavaScriptCallCodeStartRegister);
786 __ Jump(reg);
787 unwinding_info_writer_.MarkBlockWillExit();
788 frame_access_state()->ClearSPDelta();
789 frame_access_state()->SetFrameAccessToDefault();
790 break;
791 }
792 case kArchCallJSFunction: {
793 Register func = i.InputRegister(0);
794 if (FLAG_debug_code) {
795 UseScratchRegisterScope temps(tasm());
796 Register scratch = temps.Acquire();
797 // Check the function's context matches the context argument.
798 __ ldr(scratch, FieldMemOperand(func, JSFunction::kContextOffset));
799 __ cmp(cp, scratch);
800 __ Assert(eq, AbortReason::kWrongFunctionContext);
801 }
802 static_assert(kJavaScriptCallCodeStartRegister == r2, "ABI mismatch");
803 __ ldr(r2, FieldMemOperand(func, JSFunction::kCodeOffset));
804 __ add(r2, r2, Operand(Code::kHeaderSize - kHeapObjectTag));
805 __ Call(r2);
806 RecordCallPosition(instr);
807 DCHECK_EQ(LeaveCC, i.OutputSBit());
808 frame_access_state()->ClearSPDelta();
809 break;
810 }
811 case kArchPrepareCallCFunction: {
812 int const num_parameters = MiscField::decode(instr->opcode());
813 __ PrepareCallCFunction(num_parameters);
814 // Frame alignment requires using FP-relative frame addressing.
815 frame_access_state()->SetFrameAccessToFP();
816 break;
817 }
818 case kArchSaveCallerRegisters: {
819 fp_mode_ =
820 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
821 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
822 // kReturnRegister0 should have been saved before entering the stub.
823 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
824 DCHECK_EQ(0, bytes % kPointerSize);
825 DCHECK_EQ(0, frame_access_state()->sp_delta());
826 frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
827 DCHECK(!caller_registers_saved_);
828 caller_registers_saved_ = true;
829 break;
830 }
831 case kArchRestoreCallerRegisters: {
832 DCHECK(fp_mode_ ==
833 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
834 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
835 // Don't overwrite the returned value.
836 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
837 frame_access_state()->IncreaseSPDelta(-(bytes / kPointerSize));
838 DCHECK_EQ(0, frame_access_state()->sp_delta());
839 DCHECK(caller_registers_saved_);
840 caller_registers_saved_ = false;
841 break;
842 }
843 case kArchPrepareTailCall:
844 AssemblePrepareTailCall();
845 break;
846 case kArchCallCFunction: {
847 int const num_parameters = MiscField::decode(instr->opcode());
848 if (instr->InputAt(0)->IsImmediate()) {
849 ExternalReference ref = i.InputExternalReference(0);
850 __ CallCFunction(ref, num_parameters);
851 } else {
852 Register func = i.InputRegister(0);
853 __ CallCFunction(func, num_parameters);
854 }
855 frame_access_state()->SetFrameAccessToDefault();
856 // Ideally, we should decrement SP delta to match the change of stack
857 // pointer in CallCFunction. However, for certain architectures (e.g.
858 // ARM), there may be more strict alignment requirement, causing old SP
859 // to be saved on the stack. In those cases, we can not calculate the SP
860 // delta statically.
861 frame_access_state()->ClearSPDelta();
862 if (caller_registers_saved_) {
863 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
864 // Here, we assume the sequence to be:
865 // kArchSaveCallerRegisters;
866 // kArchCallCFunction;
867 // kArchRestoreCallerRegisters;
868 int bytes =
869 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
870 frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
871 }
872 break;
873 }
874 case kArchJmp:
875 AssembleArchJump(i.InputRpo(0));
876 DCHECK_EQ(LeaveCC, i.OutputSBit());
877 break;
878 case kArchBinarySearchSwitch:
879 AssembleArchBinarySearchSwitch(instr);
880 break;
881 case kArchLookupSwitch:
882 AssembleArchLookupSwitch(instr);
883 DCHECK_EQ(LeaveCC, i.OutputSBit());
884 break;
885 case kArchTableSwitch:
886 AssembleArchTableSwitch(instr);
887 DCHECK_EQ(LeaveCC, i.OutputSBit());
888 break;
889 case kArchDebugAbort:
890 DCHECK(i.InputRegister(0) == r1);
891 if (!frame_access_state()->has_frame()) {
892 // We don't actually want to generate a pile of code for this, so just
893 // claim there is a stack frame, without generating one.
894 FrameScope scope(tasm(), StackFrame::NONE);
895 __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
896 RelocInfo::CODE_TARGET);
897 } else {
898 __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
899 RelocInfo::CODE_TARGET);
900 }
901 __ stop("kArchDebugAbort");
902 unwinding_info_writer_.MarkBlockWillExit();
903 break;
904 case kArchDebugBreak:
905 __ stop("kArchDebugBreak");
906 break;
907 case kArchComment:
908 __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
909 break;
910 case kArchThrowTerminator:
911 DCHECK_EQ(LeaveCC, i.OutputSBit());
912 unwinding_info_writer_.MarkBlockWillExit();
913 break;
914 case kArchNop:
915 // don't emit code for nops.
916 DCHECK_EQ(LeaveCC, i.OutputSBit());
917 break;
918 case kArchDeoptimize: {
919 int deopt_state_id =
920 BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
921 CodeGenResult result =
922 AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
923 if (result != kSuccess) return result;
924 unwinding_info_writer_.MarkBlockWillExit();
925 break;
926 }
927 case kArchRet:
928 AssembleReturn(instr->InputAt(0));
929 DCHECK_EQ(LeaveCC, i.OutputSBit());
930 break;
931 case kArchStackPointer:
932 __ mov(i.OutputRegister(), sp);
933 DCHECK_EQ(LeaveCC, i.OutputSBit());
934 break;
935 case kArchFramePointer:
936 __ mov(i.OutputRegister(), fp);
937 DCHECK_EQ(LeaveCC, i.OutputSBit());
938 break;
939 case kArchParentFramePointer:
940 if (frame_access_state()->has_frame()) {
941 __ ldr(i.OutputRegister(), MemOperand(fp, 0));
942 } else {
943 __ mov(i.OutputRegister(), fp);
944 }
945 break;
946 case kArchTruncateDoubleToI:
947 __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
948 i.InputDoubleRegister(0), DetermineStubCallMode());
949 DCHECK_EQ(LeaveCC, i.OutputSBit());
950 break;
951 case kArchStoreWithWriteBarrier: {
952 RecordWriteMode mode =
953 static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
954 Register object = i.InputRegister(0);
955 Register value = i.InputRegister(2);
956 Register scratch0 = i.TempRegister(0);
957 Register scratch1 = i.TempRegister(1);
958 OutOfLineRecordWrite* ool;
959
960 AddressingMode addressing_mode =
961 AddressingModeField::decode(instr->opcode());
962 if (addressing_mode == kMode_Offset_RI) {
963 int32_t index = i.InputInt32(1);
964 ool = new (zone())
965 OutOfLineRecordWrite(this, object, index, value, scratch0, scratch1,
966 mode, &unwinding_info_writer_);
967 __ str(value, MemOperand(object, index));
968 } else {
969 DCHECK_EQ(kMode_Offset_RR, addressing_mode);
970 Register index(i.InputRegister(1));
971 ool = new (zone())
972 OutOfLineRecordWrite(this, object, index, value, scratch0, scratch1,
973 mode, &unwinding_info_writer_);
974 __ str(value, MemOperand(object, index));
975 }
976 __ CheckPageFlag(object, scratch0,
977 MemoryChunk::kPointersFromHereAreInterestingMask, ne,
978 ool->entry());
979 __ bind(ool->exit());
980 break;
981 }
982 case kArchStackSlot: {
983 FrameOffset offset =
984 frame_access_state()->GetFrameOffset(i.InputInt32(0));
985 Register base = offset.from_stack_pointer() ? sp : fp;
986 __ add(i.OutputRegister(0), base, Operand(offset.offset()));
987 break;
988 }
989 case kIeee754Float64Acos:
990 ASSEMBLE_IEEE754_UNOP(acos);
991 break;
992 case kIeee754Float64Acosh:
993 ASSEMBLE_IEEE754_UNOP(acosh);
994 break;
995 case kIeee754Float64Asin:
996 ASSEMBLE_IEEE754_UNOP(asin);
997 break;
998 case kIeee754Float64Asinh:
999 ASSEMBLE_IEEE754_UNOP(asinh);
1000 break;
1001 case kIeee754Float64Atan:
1002 ASSEMBLE_IEEE754_UNOP(atan);
1003 break;
1004 case kIeee754Float64Atanh:
1005 ASSEMBLE_IEEE754_UNOP(atanh);
1006 break;
1007 case kIeee754Float64Atan2:
1008 ASSEMBLE_IEEE754_BINOP(atan2);
1009 break;
1010 case kIeee754Float64Cbrt:
1011 ASSEMBLE_IEEE754_UNOP(cbrt);
1012 break;
1013 case kIeee754Float64Cos:
1014 ASSEMBLE_IEEE754_UNOP(cos);
1015 break;
1016 case kIeee754Float64Cosh:
1017 ASSEMBLE_IEEE754_UNOP(cosh);
1018 break;
1019 case kIeee754Float64Exp:
1020 ASSEMBLE_IEEE754_UNOP(exp);
1021 break;
1022 case kIeee754Float64Expm1:
1023 ASSEMBLE_IEEE754_UNOP(expm1);
1024 break;
1025 case kIeee754Float64Log:
1026 ASSEMBLE_IEEE754_UNOP(log);
1027 break;
1028 case kIeee754Float64Log1p:
1029 ASSEMBLE_IEEE754_UNOP(log1p);
1030 break;
1031 case kIeee754Float64Log2:
1032 ASSEMBLE_IEEE754_UNOP(log2);
1033 break;
1034 case kIeee754Float64Log10:
1035 ASSEMBLE_IEEE754_UNOP(log10);
1036 break;
1037 case kIeee754Float64Pow: {
1038 __ Call(BUILTIN_CODE(isolate(), MathPowInternal), RelocInfo::CODE_TARGET);
1039 __ vmov(d0, d2);
1040 break;
1041 }
1042 case kIeee754Float64Sin:
1043 ASSEMBLE_IEEE754_UNOP(sin);
1044 break;
1045 case kIeee754Float64Sinh:
1046 ASSEMBLE_IEEE754_UNOP(sinh);
1047 break;
1048 case kIeee754Float64Tan:
1049 ASSEMBLE_IEEE754_UNOP(tan);
1050 break;
1051 case kIeee754Float64Tanh:
1052 ASSEMBLE_IEEE754_UNOP(tanh);
1053 break;
1054 case kArmAdd:
1055 __ add(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1056 i.OutputSBit());
1057 break;
1058 case kArmAnd:
1059 __ and_(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1060 i.OutputSBit());
1061 break;
1062 case kArmBic:
1063 __ bic(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1064 i.OutputSBit());
1065 break;
1066 case kArmMul:
1067 __ mul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1068 i.OutputSBit());
1069 break;
1070 case kArmMla:
1071 __ mla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1072 i.InputRegister(2), i.OutputSBit());
1073 break;
1074 case kArmMls: {
1075 CpuFeatureScope scope(tasm(), ARMv7);
1076 __ mls(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1077 i.InputRegister(2));
1078 DCHECK_EQ(LeaveCC, i.OutputSBit());
1079 break;
1080 }
1081 case kArmSmull:
1082 __ smull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1083 i.InputRegister(1));
1084 break;
1085 case kArmSmmul:
1086 __ smmul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1087 DCHECK_EQ(LeaveCC, i.OutputSBit());
1088 break;
1089 case kArmSmmla:
1090 __ smmla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1091 i.InputRegister(2));
1092 DCHECK_EQ(LeaveCC, i.OutputSBit());
1093 break;
1094 case kArmUmull:
1095 __ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1096 i.InputRegister(1), i.OutputSBit());
1097 break;
1098 case kArmSdiv: {
1099 CpuFeatureScope scope(tasm(), SUDIV);
1100 __ sdiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1101 DCHECK_EQ(LeaveCC, i.OutputSBit());
1102 break;
1103 }
1104 case kArmUdiv: {
1105 CpuFeatureScope scope(tasm(), SUDIV);
1106 __ udiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1107 DCHECK_EQ(LeaveCC, i.OutputSBit());
1108 break;
1109 }
1110 case kArmMov:
1111 __ Move(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit());
1112 break;
1113 case kArmMvn:
1114 __ mvn(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit());
1115 break;
1116 case kArmOrr:
1117 __ orr(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1118 i.OutputSBit());
1119 break;
1120 case kArmEor:
1121 __ eor(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1122 i.OutputSBit());
1123 break;
1124 case kArmSub:
1125 __ sub(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1126 i.OutputSBit());
1127 break;
1128 case kArmRsb:
1129 __ rsb(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1130 i.OutputSBit());
1131 break;
1132 case kArmBfc: {
1133 CpuFeatureScope scope(tasm(), ARMv7);
1134 __ bfc(i.OutputRegister(), i.InputInt8(1), i.InputInt8(2));
1135 DCHECK_EQ(LeaveCC, i.OutputSBit());
1136 break;
1137 }
1138 case kArmUbfx: {
1139 CpuFeatureScope scope(tasm(), ARMv7);
1140 __ ubfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
1141 i.InputInt8(2));
1142 DCHECK_EQ(LeaveCC, i.OutputSBit());
1143 break;
1144 }
1145 case kArmSbfx: {
1146 CpuFeatureScope scope(tasm(), ARMv7);
1147 __ sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
1148 i.InputInt8(2));
1149 DCHECK_EQ(LeaveCC, i.OutputSBit());
1150 break;
1151 }
1152 case kArmSxtb:
1153 __ sxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1154 DCHECK_EQ(LeaveCC, i.OutputSBit());
1155 break;
1156 case kArmSxth:
1157 __ sxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1158 DCHECK_EQ(LeaveCC, i.OutputSBit());
1159 break;
1160 case kArmSxtab:
1161 __ sxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1162 i.InputInt32(2));
1163 DCHECK_EQ(LeaveCC, i.OutputSBit());
1164 break;
1165 case kArmSxtah:
1166 __ sxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1167 i.InputInt32(2));
1168 DCHECK_EQ(LeaveCC, i.OutputSBit());
1169 break;
1170 case kArmUxtb:
1171 __ uxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1172 DCHECK_EQ(LeaveCC, i.OutputSBit());
1173 break;
1174 case kArmUxth:
1175 __ uxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1176 DCHECK_EQ(LeaveCC, i.OutputSBit());
1177 break;
1178 case kArmUxtab:
1179 __ uxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1180 i.InputInt32(2));
1181 DCHECK_EQ(LeaveCC, i.OutputSBit());
1182 break;
1183 case kArmUxtah:
1184 __ uxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1185 i.InputInt32(2));
1186 DCHECK_EQ(LeaveCC, i.OutputSBit());
1187 break;
1188 case kArmRbit: {
1189 CpuFeatureScope scope(tasm(), ARMv7);
1190 __ rbit(i.OutputRegister(), i.InputRegister(0));
1191 DCHECK_EQ(LeaveCC, i.OutputSBit());
1192 break;
1193 }
1194 case kArmRev:
1195 __ rev(i.OutputRegister(), i.InputRegister(0));
1196 DCHECK_EQ(LeaveCC, i.OutputSBit());
1197 break;
1198 case kArmClz:
1199 __ clz(i.OutputRegister(), i.InputRegister(0));
1200 DCHECK_EQ(LeaveCC, i.OutputSBit());
1201 break;
1202 case kArmCmp:
1203 __ cmp(i.InputRegister(0), i.InputOperand2(1));
1204 DCHECK_EQ(SetCC, i.OutputSBit());
1205 break;
1206 case kArmCmn:
1207 __ cmn(i.InputRegister(0), i.InputOperand2(1));
1208 DCHECK_EQ(SetCC, i.OutputSBit());
1209 break;
1210 case kArmTst:
1211 __ tst(i.InputRegister(0), i.InputOperand2(1));
1212 DCHECK_EQ(SetCC, i.OutputSBit());
1213 break;
1214 case kArmTeq:
1215 __ teq(i.InputRegister(0), i.InputOperand2(1));
1216 DCHECK_EQ(SetCC, i.OutputSBit());
1217 break;
1218 case kArmAddPair:
1219 // i.InputRegister(0) ... left low word.
1220 // i.InputRegister(1) ... left high word.
1221 // i.InputRegister(2) ... right low word.
1222 // i.InputRegister(3) ... right high word.
1223 __ add(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2),
1224 SBit::SetCC);
1225 __ adc(i.OutputRegister(1), i.InputRegister(1),
1226 Operand(i.InputRegister(3)));
1227 DCHECK_EQ(LeaveCC, i.OutputSBit());
1228 break;
1229 case kArmSubPair:
1230 // i.InputRegister(0) ... left low word.
1231 // i.InputRegister(1) ... left high word.
1232 // i.InputRegister(2) ... right low word.
1233 // i.InputRegister(3) ... right high word.
1234 __ sub(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2),
1235 SBit::SetCC);
1236 __ sbc(i.OutputRegister(1), i.InputRegister(1),
1237 Operand(i.InputRegister(3)));
1238 DCHECK_EQ(LeaveCC, i.OutputSBit());
1239 break;
1240 case kArmMulPair:
1241 // i.InputRegister(0) ... left low word.
1242 // i.InputRegister(1) ... left high word.
1243 // i.InputRegister(2) ... right low word.
1244 // i.InputRegister(3) ... right high word.
1245 __ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1246 i.InputRegister(2));
1247 __ mla(i.OutputRegister(1), i.InputRegister(0), i.InputRegister(3),
1248 i.OutputRegister(1));
1249 __ mla(i.OutputRegister(1), i.InputRegister(2), i.InputRegister(1),
1250 i.OutputRegister(1));
1251 break;
1252 case kArmLslPair: {
1253 Register second_output =
1254 instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1255 if (instr->InputAt(2)->IsImmediate()) {
1256 __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1257 i.InputRegister(1), i.InputInt32(2));
1258 } else {
1259 __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1260 i.InputRegister(1), i.InputRegister(2));
1261 }
1262 break;
1263 }
1264 case kArmLsrPair: {
1265 Register second_output =
1266 instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1267 if (instr->InputAt(2)->IsImmediate()) {
1268 __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1269 i.InputRegister(1), i.InputInt32(2));
1270 } else {
1271 __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1272 i.InputRegister(1), i.InputRegister(2));
1273 }
1274 break;
1275 }
1276 case kArmAsrPair: {
1277 Register second_output =
1278 instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1279 if (instr->InputAt(2)->IsImmediate()) {
1280 __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1281 i.InputRegister(1), i.InputInt32(2));
1282 } else {
1283 __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1284 i.InputRegister(1), i.InputRegister(2));
1285 }
1286 break;
1287 }
1288 case kArmVcmpF32:
1289 if (instr->InputAt(1)->IsFPRegister()) {
1290 __ VFPCompareAndSetFlags(i.InputFloatRegister(0),
1291 i.InputFloatRegister(1));
1292 } else {
1293 DCHECK(instr->InputAt(1)->IsImmediate());
1294 // 0.0 is the only immediate supported by vcmp instructions.
1295 DCHECK_EQ(0.0f, i.InputFloat32(1));
1296 __ VFPCompareAndSetFlags(i.InputFloatRegister(0), i.InputFloat32(1));
1297 }
1298 DCHECK_EQ(SetCC, i.OutputSBit());
1299 break;
1300 case kArmVaddF32:
1301 __ vadd(i.OutputFloatRegister(), i.InputFloatRegister(0),
1302 i.InputFloatRegister(1));
1303 DCHECK_EQ(LeaveCC, i.OutputSBit());
1304 break;
1305 case kArmVsubF32:
1306 __ vsub(i.OutputFloatRegister(), i.InputFloatRegister(0),
1307 i.InputFloatRegister(1));
1308 DCHECK_EQ(LeaveCC, i.OutputSBit());
1309 break;
1310 case kArmVmulF32:
1311 __ vmul(i.OutputFloatRegister(), i.InputFloatRegister(0),
1312 i.InputFloatRegister(1));
1313 DCHECK_EQ(LeaveCC, i.OutputSBit());
1314 break;
1315 case kArmVmlaF32:
1316 __ vmla(i.OutputFloatRegister(), i.InputFloatRegister(1),
1317 i.InputFloatRegister(2));
1318 DCHECK_EQ(LeaveCC, i.OutputSBit());
1319 break;
1320 case kArmVmlsF32:
1321 __ vmls(i.OutputFloatRegister(), i.InputFloatRegister(1),
1322 i.InputFloatRegister(2));
1323 DCHECK_EQ(LeaveCC, i.OutputSBit());
1324 break;
1325 case kArmVdivF32:
1326 __ vdiv(i.OutputFloatRegister(), i.InputFloatRegister(0),
1327 i.InputFloatRegister(1));
1328 DCHECK_EQ(LeaveCC, i.OutputSBit());
1329 break;
1330 case kArmVsqrtF32:
1331 __ vsqrt(i.OutputFloatRegister(), i.InputFloatRegister(0));
1332 break;
1333 case kArmVabsF32:
1334 __ vabs(i.OutputFloatRegister(), i.InputFloatRegister(0));
1335 break;
1336 case kArmVnegF32:
1337 __ vneg(i.OutputFloatRegister(), i.InputFloatRegister(0));
1338 break;
1339 case kArmVcmpF64:
1340 if (instr->InputAt(1)->IsFPRegister()) {
1341 __ VFPCompareAndSetFlags(i.InputDoubleRegister(0),
1342 i.InputDoubleRegister(1));
1343 } else {
1344 DCHECK(instr->InputAt(1)->IsImmediate());
1345 // 0.0 is the only immediate supported by vcmp instructions.
1346 DCHECK_EQ(0.0, i.InputDouble(1));
1347 __ VFPCompareAndSetFlags(i.InputDoubleRegister(0), i.InputDouble(1));
1348 }
1349 DCHECK_EQ(SetCC, i.OutputSBit());
1350 break;
1351 case kArmVaddF64:
1352 __ vadd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1353 i.InputDoubleRegister(1));
1354 DCHECK_EQ(LeaveCC, i.OutputSBit());
1355 break;
1356 case kArmVsubF64:
1357 __ vsub(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1358 i.InputDoubleRegister(1));
1359 DCHECK_EQ(LeaveCC, i.OutputSBit());
1360 break;
1361 case kArmVmulF64:
1362 __ vmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1363 i.InputDoubleRegister(1));
1364 DCHECK_EQ(LeaveCC, i.OutputSBit());
1365 break;
1366 case kArmVmlaF64:
1367 __ vmla(i.OutputDoubleRegister(), i.InputDoubleRegister(1),
1368 i.InputDoubleRegister(2));
1369 DCHECK_EQ(LeaveCC, i.OutputSBit());
1370 break;
1371 case kArmVmlsF64:
1372 __ vmls(i.OutputDoubleRegister(), i.InputDoubleRegister(1),
1373 i.InputDoubleRegister(2));
1374 DCHECK_EQ(LeaveCC, i.OutputSBit());
1375 break;
1376 case kArmVdivF64:
1377 __ vdiv(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1378 i.InputDoubleRegister(1));
1379 DCHECK_EQ(LeaveCC, i.OutputSBit());
1380 break;
1381 case kArmVmodF64: {
1382 // TODO(bmeurer): We should really get rid of this special instruction,
1383 // and generate a CallAddress instruction instead.
1384 FrameScope scope(tasm(), StackFrame::MANUAL);
1385 __ PrepareCallCFunction(0, 2);
1386 __ MovToFloatParameters(i.InputDoubleRegister(0),
1387 i.InputDoubleRegister(1));
1388 __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
1389 // Move the result in the double result register.
1390 __ MovFromFloatResult(i.OutputDoubleRegister());
1391 DCHECK_EQ(LeaveCC, i.OutputSBit());
1392 break;
1393 }
1394 case kArmVsqrtF64:
1395 __ vsqrt(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1396 break;
1397 case kArmVabsF64:
1398 __ vabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1399 break;
1400 case kArmVnegF64:
1401 __ vneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1402 break;
1403 case kArmVrintmF32: {
1404 CpuFeatureScope scope(tasm(), ARMv8);
1405 __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0));
1406 break;
1407 }
1408 case kArmVrintmF64: {
1409 CpuFeatureScope scope(tasm(), ARMv8);
1410 __ vrintm(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1411 break;
1412 }
1413 case kArmVrintpF32: {
1414 CpuFeatureScope scope(tasm(), ARMv8);
1415 __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0));
1416 break;
1417 }
1418 case kArmVrintpF64: {
1419 CpuFeatureScope scope(tasm(), ARMv8);
1420 __ vrintp(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1421 break;
1422 }
1423 case kArmVrintzF32: {
1424 CpuFeatureScope scope(tasm(), ARMv8);
1425 __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0));
1426 break;
1427 }
1428 case kArmVrintzF64: {
1429 CpuFeatureScope scope(tasm(), ARMv8);
1430 __ vrintz(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1431 break;
1432 }
1433 case kArmVrintaF64: {
1434 CpuFeatureScope scope(tasm(), ARMv8);
1435 __ vrinta(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1436 break;
1437 }
1438 case kArmVrintnF32: {
1439 CpuFeatureScope scope(tasm(), ARMv8);
1440 __ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0));
1441 break;
1442 }
1443 case kArmVrintnF64: {
1444 CpuFeatureScope scope(tasm(), ARMv8);
1445 __ vrintn(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1446 break;
1447 }
1448 case kArmVcvtF32F64: {
1449 __ vcvt_f32_f64(i.OutputFloatRegister(), i.InputDoubleRegister(0));
1450 DCHECK_EQ(LeaveCC, i.OutputSBit());
1451 break;
1452 }
1453 case kArmVcvtF64F32: {
1454 __ vcvt_f64_f32(i.OutputDoubleRegister(), i.InputFloatRegister(0));
1455 DCHECK_EQ(LeaveCC, i.OutputSBit());
1456 break;
1457 }
1458 case kArmVcvtF32S32: {
1459 UseScratchRegisterScope temps(tasm());
1460 SwVfpRegister scratch = temps.AcquireS();
1461 __ vmov(scratch, i.InputRegister(0));
1462 __ vcvt_f32_s32(i.OutputFloatRegister(), scratch);
1463 DCHECK_EQ(LeaveCC, i.OutputSBit());
1464 break;
1465 }
1466 case kArmVcvtF32U32: {
1467 UseScratchRegisterScope temps(tasm());
1468 SwVfpRegister scratch = temps.AcquireS();
1469 __ vmov(scratch, i.InputRegister(0));
1470 __ vcvt_f32_u32(i.OutputFloatRegister(), scratch);
1471 DCHECK_EQ(LeaveCC, i.OutputSBit());
1472 break;
1473 }
1474 case kArmVcvtF64S32: {
1475 UseScratchRegisterScope temps(tasm());
1476 SwVfpRegister scratch = temps.AcquireS();
1477 __ vmov(scratch, i.InputRegister(0));
1478 __ vcvt_f64_s32(i.OutputDoubleRegister(), scratch);
1479 DCHECK_EQ(LeaveCC, i.OutputSBit());
1480 break;
1481 }
1482 case kArmVcvtF64U32: {
1483 UseScratchRegisterScope temps(tasm());
1484 SwVfpRegister scratch = temps.AcquireS();
1485 __ vmov(scratch, i.InputRegister(0));
1486 __ vcvt_f64_u32(i.OutputDoubleRegister(), scratch);
1487 DCHECK_EQ(LeaveCC, i.OutputSBit());
1488 break;
1489 }
1490 case kArmVcvtS32F32: {
1491 UseScratchRegisterScope temps(tasm());
1492 SwVfpRegister scratch = temps.AcquireS();
1493 __ vcvt_s32_f32(scratch, i.InputFloatRegister(0));
1494 __ vmov(i.OutputRegister(), scratch);
1495 // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
1496 // because INT32_MIN allows easier out-of-bounds detection.
1497 __ cmn(i.OutputRegister(), Operand(1));
1498 __ mov(i.OutputRegister(), Operand(INT32_MIN), SBit::LeaveCC, vs);
1499 DCHECK_EQ(LeaveCC, i.OutputSBit());
1500 break;
1501 }
1502 case kArmVcvtU32F32: {
1503 UseScratchRegisterScope temps(tasm());
1504 SwVfpRegister scratch = temps.AcquireS();
1505 __ vcvt_u32_f32(scratch, i.InputFloatRegister(0));
1506 __ vmov(i.OutputRegister(), scratch);
1507 // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
1508 // because 0 allows easier out-of-bounds detection.
1509 __ cmn(i.OutputRegister(), Operand(1));
1510 __ adc(i.OutputRegister(), i.OutputRegister(), Operand::Zero());
1511 DCHECK_EQ(LeaveCC, i.OutputSBit());
1512 break;
1513 }
1514 case kArmVcvtS32F64: {
1515 UseScratchRegisterScope temps(tasm());
1516 SwVfpRegister scratch = temps.AcquireS();
1517 __ vcvt_s32_f64(scratch, i.InputDoubleRegister(0));
1518 __ vmov(i.OutputRegister(), scratch);
1519 DCHECK_EQ(LeaveCC, i.OutputSBit());
1520 break;
1521 }
1522 case kArmVcvtU32F64: {
1523 UseScratchRegisterScope temps(tasm());
1524 SwVfpRegister scratch = temps.AcquireS();
1525 __ vcvt_u32_f64(scratch, i.InputDoubleRegister(0));
1526 __ vmov(i.OutputRegister(), scratch);
1527 DCHECK_EQ(LeaveCC, i.OutputSBit());
1528 break;
1529 }
1530 case kArmVmovU32F32:
1531 __ vmov(i.OutputRegister(), i.InputFloatRegister(0));
1532 DCHECK_EQ(LeaveCC, i.OutputSBit());
1533 break;
1534 case kArmVmovF32U32:
1535 __ vmov(i.OutputFloatRegister(), i.InputRegister(0));
1536 DCHECK_EQ(LeaveCC, i.OutputSBit());
1537 break;
1538 case kArmVmovLowU32F64:
1539 __ VmovLow(i.OutputRegister(), i.InputDoubleRegister(0));
1540 DCHECK_EQ(LeaveCC, i.OutputSBit());
1541 break;
1542 case kArmVmovLowF64U32:
1543 __ VmovLow(i.OutputDoubleRegister(), i.InputRegister(1));
1544 DCHECK_EQ(LeaveCC, i.OutputSBit());
1545 break;
1546 case kArmVmovHighU32F64:
1547 __ VmovHigh(i.OutputRegister(), i.InputDoubleRegister(0));
1548 DCHECK_EQ(LeaveCC, i.OutputSBit());
1549 break;
1550 case kArmVmovHighF64U32:
1551 __ VmovHigh(i.OutputDoubleRegister(), i.InputRegister(1));
1552 DCHECK_EQ(LeaveCC, i.OutputSBit());
1553 break;
1554 case kArmVmovF64U32U32:
1555 __ vmov(i.OutputDoubleRegister(), i.InputRegister(0), i.InputRegister(1));
1556 DCHECK_EQ(LeaveCC, i.OutputSBit());
1557 break;
1558 case kArmVmovU32U32F64:
1559 __ vmov(i.OutputRegister(0), i.OutputRegister(1),
1560 i.InputDoubleRegister(0));
1561 DCHECK_EQ(LeaveCC, i.OutputSBit());
1562 break;
1563 case kArmLdrb:
1564 __ ldrb(i.OutputRegister(), i.InputOffset());
1565 DCHECK_EQ(LeaveCC, i.OutputSBit());
1566 EmitWordLoadPoisoningIfNeeded(this, opcode, i);
1567 break;
1568 case kArmLdrsb:
1569 __ ldrsb(i.OutputRegister(), i.InputOffset());
1570 DCHECK_EQ(LeaveCC, i.OutputSBit());
1571 EmitWordLoadPoisoningIfNeeded(this, opcode, i);
1572 break;
1573 case kArmStrb:
1574 __ strb(i.InputRegister(0), i.InputOffset(1));
1575 DCHECK_EQ(LeaveCC, i.OutputSBit());
1576 break;
1577 case kArmLdrh:
1578 __ ldrh(i.OutputRegister(), i.InputOffset());
1579 EmitWordLoadPoisoningIfNeeded(this, opcode, i);
1580 break;
1581 case kArmLdrsh:
1582 __ ldrsh(i.OutputRegister(), i.InputOffset());
1583 EmitWordLoadPoisoningIfNeeded(this, opcode, i);
1584 break;
1585 case kArmStrh:
1586 __ strh(i.InputRegister(0), i.InputOffset(1));
1587 DCHECK_EQ(LeaveCC, i.OutputSBit());
1588 break;
1589 case kArmLdr:
1590 __ ldr(i.OutputRegister(), i.InputOffset());
1591 EmitWordLoadPoisoningIfNeeded(this, opcode, i);
1592 break;
1593 case kArmStr:
1594 __ str(i.InputRegister(0), i.InputOffset(1));
1595 DCHECK_EQ(LeaveCC, i.OutputSBit());
1596 break;
1597 case kArmVldrF32: {
1598 const MemoryAccessMode access_mode =
1599 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
1600 if (access_mode == kMemoryAccessPoisoned) {
1601 UseScratchRegisterScope temps(tasm());
1602 Register address = temps.Acquire();
1603 ComputePoisonedAddressForLoad(this, opcode, i, address);
1604 __ vldr(i.OutputFloatRegister(), address, 0);
1605 } else {
1606 __ vldr(i.OutputFloatRegister(), i.InputOffset());
1607 }
1608 DCHECK_EQ(LeaveCC, i.OutputSBit());
1609 break;
1610 }
1611 case kArmVstrF32:
1612 __ vstr(i.InputFloatRegister(0), i.InputOffset(1));
1613 DCHECK_EQ(LeaveCC, i.OutputSBit());
1614 break;
1615 case kArmVld1F64: {
1616 __ vld1(Neon8, NeonListOperand(i.OutputDoubleRegister()),
1617 i.NeonInputOperand(0));
1618 break;
1619 }
1620 case kArmVst1F64: {
1621 __ vst1(Neon8, NeonListOperand(i.InputDoubleRegister(0)),
1622 i.NeonInputOperand(1));
1623 break;
1624 }
1625 case kArmVld1S128: {
1626 __ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
1627 i.NeonInputOperand(0));
1628 break;
1629 }
1630 case kArmVst1S128: {
1631 __ vst1(Neon8, NeonListOperand(i.InputSimd128Register(0)),
1632 i.NeonInputOperand(1));
1633 break;
1634 }
1635 case kArmVldrF64: {
1636 const MemoryAccessMode access_mode =
1637 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
1638 if (access_mode == kMemoryAccessPoisoned) {
1639 UseScratchRegisterScope temps(tasm());
1640 Register address = temps.Acquire();
1641 ComputePoisonedAddressForLoad(this, opcode, i, address);
1642 __ vldr(i.OutputDoubleRegister(), address, 0);
1643 } else {
1644 __ vldr(i.OutputDoubleRegister(), i.InputOffset());
1645 }
1646 DCHECK_EQ(LeaveCC, i.OutputSBit());
1647 break;
1648 }
1649 case kArmVstrF64:
1650 __ vstr(i.InputDoubleRegister(0), i.InputOffset(1));
1651 DCHECK_EQ(LeaveCC, i.OutputSBit());
1652 break;
1653 case kArmFloat32Max: {
1654 SwVfpRegister result = i.OutputFloatRegister();
1655 SwVfpRegister left = i.InputFloatRegister(0);
1656 SwVfpRegister right = i.InputFloatRegister(1);
1657 if (left == right) {
1658 __ Move(result, left);
1659 } else {
1660 auto ool = new (zone()) OutOfLineFloat32Max(this, result, left, right);
1661 __ FloatMax(result, left, right, ool->entry());
1662 __ bind(ool->exit());
1663 }
1664 DCHECK_EQ(LeaveCC, i.OutputSBit());
1665 break;
1666 }
1667 case kArmFloat64Max: {
1668 DwVfpRegister result = i.OutputDoubleRegister();
1669 DwVfpRegister left = i.InputDoubleRegister(0);
1670 DwVfpRegister right = i.InputDoubleRegister(1);
1671 if (left == right) {
1672 __ Move(result, left);
1673 } else {
1674 auto ool = new (zone()) OutOfLineFloat64Max(this, result, left, right);
1675 __ FloatMax(result, left, right, ool->entry());
1676 __ bind(ool->exit());
1677 }
1678 DCHECK_EQ(LeaveCC, i.OutputSBit());
1679 break;
1680 }
1681 case kArmFloat32Min: {
1682 SwVfpRegister result = i.OutputFloatRegister();
1683 SwVfpRegister left = i.InputFloatRegister(0);
1684 SwVfpRegister right = i.InputFloatRegister(1);
1685 if (left == right) {
1686 __ Move(result, left);
1687 } else {
1688 auto ool = new (zone()) OutOfLineFloat32Min(this, result, left, right);
1689 __ FloatMin(result, left, right, ool->entry());
1690 __ bind(ool->exit());
1691 }
1692 DCHECK_EQ(LeaveCC, i.OutputSBit());
1693 break;
1694 }
1695 case kArmFloat64Min: {
1696 DwVfpRegister result = i.OutputDoubleRegister();
1697 DwVfpRegister left = i.InputDoubleRegister(0);
1698 DwVfpRegister right = i.InputDoubleRegister(1);
1699 if (left == right) {
1700 __ Move(result, left);
1701 } else {
1702 auto ool = new (zone()) OutOfLineFloat64Min(this, result, left, right);
1703 __ FloatMin(result, left, right, ool->entry());
1704 __ bind(ool->exit());
1705 }
1706 DCHECK_EQ(LeaveCC, i.OutputSBit());
1707 break;
1708 }
1709 case kArmFloat64SilenceNaN: {
1710 DwVfpRegister value = i.InputDoubleRegister(0);
1711 DwVfpRegister result = i.OutputDoubleRegister();
1712 __ VFPCanonicalizeNaN(result, value);
1713 break;
1714 }
1715 case kArmPush:
1716 if (instr->InputAt(0)->IsFPRegister()) {
1717 LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
1718 switch (op->representation()) {
1719 case MachineRepresentation::kFloat32:
1720 __ vpush(i.InputFloatRegister(0));
1721 frame_access_state()->IncreaseSPDelta(1);
1722 break;
1723 case MachineRepresentation::kFloat64:
1724 __ vpush(i.InputDoubleRegister(0));
1725 frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
1726 break;
1727 case MachineRepresentation::kSimd128: {
1728 __ vpush(i.InputSimd128Register(0));
1729 frame_access_state()->IncreaseSPDelta(kSimd128Size / kPointerSize);
1730 break;
1731 }
1732 default:
1733 UNREACHABLE();
1734 break;
1735 }
1736 } else {
1737 __ push(i.InputRegister(0));
1738 frame_access_state()->IncreaseSPDelta(1);
1739 }
1740 DCHECK_EQ(LeaveCC, i.OutputSBit());
1741 break;
1742 case kArmPoke: {
1743 int const slot = MiscField::decode(instr->opcode());
1744 __ str(i.InputRegister(0), MemOperand(sp, slot * kPointerSize));
1745 DCHECK_EQ(LeaveCC, i.OutputSBit());
1746 break;
1747 }
1748 case kArmPeek: {
1749 // The incoming value is 0-based, but we need a 1-based value.
1750 int reverse_slot = i.InputInt32(0) + 1;
1751 int offset =
1752 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1753 if (instr->OutputAt(0)->IsFPRegister()) {
1754 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1755 if (op->representation() == MachineRepresentation::kFloat64) {
1756 __ vldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
1757 } else {
1758 DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
1759 __ vldr(i.OutputFloatRegister(), MemOperand(fp, offset));
1760 }
1761 } else {
1762 __ ldr(i.OutputRegister(), MemOperand(fp, offset));
1763 }
1764 break;
1765 }
1766 case kArmDsbIsb: {
1767 __ dsb(SY);
1768 __ isb(SY);
1769 break;
1770 }
1771 case kArchWordPoisonOnSpeculation:
1772 __ and_(i.OutputRegister(0), i.InputRegister(0),
1773 Operand(kSpeculationPoisonRegister));
1774 break;
1775 case kArmF32x4Splat: {
1776 int src_code = i.InputFloatRegister(0).code();
1777 __ vdup(Neon32, i.OutputSimd128Register(),
1778 DwVfpRegister::from_code(src_code / 2), src_code % 2);
1779 break;
1780 }
1781 case kArmF32x4ExtractLane: {
1782 __ ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
1783 i.InputInt8(1));
1784 break;
1785 }
1786 case kArmF32x4ReplaceLane: {
1787 __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
1788 i.InputFloatRegister(2), i.InputInt8(1));
1789 break;
1790 }
1791 case kArmF32x4SConvertI32x4: {
1792 __ vcvt_f32_s32(i.OutputSimd128Register(), i.InputSimd128Register(0));
1793 break;
1794 }
1795 case kArmF32x4UConvertI32x4: {
1796 __ vcvt_f32_u32(i.OutputSimd128Register(), i.InputSimd128Register(0));
1797 break;
1798 }
1799 case kArmF32x4Abs: {
1800 __ vabs(i.OutputSimd128Register(), i.InputSimd128Register(0));
1801 break;
1802 }
1803 case kArmF32x4Neg: {
1804 __ vneg(i.OutputSimd128Register(), i.InputSimd128Register(0));
1805 break;
1806 }
1807 case kArmF32x4RecipApprox: {
1808 __ vrecpe(i.OutputSimd128Register(), i.InputSimd128Register(0));
1809 break;
1810 }
1811 case kArmF32x4RecipSqrtApprox: {
1812 __ vrsqrte(i.OutputSimd128Register(), i.InputSimd128Register(0));
1813 break;
1814 }
1815 case kArmF32x4Add: {
1816 __ vadd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1817 i.InputSimd128Register(1));
1818 break;
1819 }
1820 case kArmF32x4AddHoriz: {
1821 Simd128Register dst = i.OutputSimd128Register(),
1822 src0 = i.InputSimd128Register(0),
1823 src1 = i.InputSimd128Register(1);
1824 // Make sure we don't overwrite source data before it's used.
1825 if (dst == src0) {
1826 __ vpadd(dst.low(), src0.low(), src0.high());
1827 if (dst == src1) {
1828 __ vmov(dst.high(), dst.low());
1829 } else {
1830 __ vpadd(dst.high(), src1.low(), src1.high());
1831 }
1832 } else {
1833 __ vpadd(dst.high(), src1.low(), src1.high());
1834 __ vpadd(dst.low(), src0.low(), src0.high());
1835 }
1836 break;
1837 }
1838 case kArmF32x4Sub: {
1839 __ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0),
1840 i.InputSimd128Register(1));
1841 break;
1842 }
1843 case kArmF32x4Mul: {
1844 __ vmul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1845 i.InputSimd128Register(1));
1846 break;
1847 }
1848 case kArmF32x4Min: {
1849 __ vmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
1850 i.InputSimd128Register(1));
1851 break;
1852 }
1853 case kArmF32x4Max: {
1854 __ vmax(i.OutputSimd128Register(), i.InputSimd128Register(0),
1855 i.InputSimd128Register(1));
1856 break;
1857 }
1858 case kArmF32x4Eq: {
1859 __ vceq(i.OutputSimd128Register(), i.InputSimd128Register(0),
1860 i.InputSimd128Register(1));
1861 break;
1862 }
1863 case kArmF32x4Ne: {
1864 Simd128Register dst = i.OutputSimd128Register();
1865 __ vceq(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
1866 __ vmvn(dst, dst);
1867 break;
1868 }
1869 case kArmF32x4Lt: {
1870 __ vcgt(i.OutputSimd128Register(), i.InputSimd128Register(1),
1871 i.InputSimd128Register(0));
1872 break;
1873 }
1874 case kArmF32x4Le: {
1875 __ vcge(i.OutputSimd128Register(), i.InputSimd128Register(1),
1876 i.InputSimd128Register(0));
1877 break;
1878 }
1879 case kArmI32x4Splat: {
1880 __ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0));
1881 break;
1882 }
1883 case kArmI32x4ExtractLane: {
1884 __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS32,
1885 i.InputInt8(1));
1886 break;
1887 }
1888 case kArmI32x4ReplaceLane: {
1889 __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
1890 i.InputRegister(2), NeonS32, i.InputInt8(1));
1891 break;
1892 }
1893 case kArmI32x4SConvertF32x4: {
1894 __ vcvt_s32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
1895 break;
1896 }
1897 case kArmI32x4SConvertI16x8Low: {
1898 __ vmovl(NeonS16, i.OutputSimd128Register(),
1899 i.InputSimd128Register(0).low());
1900 break;
1901 }
1902 case kArmI32x4SConvertI16x8High: {
1903 __ vmovl(NeonS16, i.OutputSimd128Register(),
1904 i.InputSimd128Register(0).high());
1905 break;
1906 }
1907 case kArmI32x4Neg: {
1908 __ vneg(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
1909 break;
1910 }
1911 case kArmI32x4Shl: {
1912 __ vshl(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1913 i.InputInt5(1));
1914 break;
1915 }
1916 case kArmI32x4ShrS: {
1917 __ vshr(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1918 i.InputInt5(1));
1919 break;
1920 }
1921 case kArmI32x4Add: {
1922 __ vadd(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1923 i.InputSimd128Register(1));
1924 break;
1925 }
1926 case kArmI32x4AddHoriz:
1927 ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon32);
1928 break;
1929 case kArmI32x4Sub: {
1930 __ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1931 i.InputSimd128Register(1));
1932 break;
1933 }
1934 case kArmI32x4Mul: {
1935 __ vmul(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1936 i.InputSimd128Register(1));
1937 break;
1938 }
1939 case kArmI32x4MinS: {
1940 __ vmin(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1941 i.InputSimd128Register(1));
1942 break;
1943 }
1944 case kArmI32x4MaxS: {
1945 __ vmax(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1946 i.InputSimd128Register(1));
1947 break;
1948 }
1949 case kArmI32x4Eq: {
1950 __ vceq(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1951 i.InputSimd128Register(1));
1952 break;
1953 }
1954 case kArmI32x4Ne: {
1955 Simd128Register dst = i.OutputSimd128Register();
1956 __ vceq(Neon32, dst, i.InputSimd128Register(0),
1957 i.InputSimd128Register(1));
1958 __ vmvn(dst, dst);
1959 break;
1960 }
1961 case kArmI32x4GtS: {
1962 __ vcgt(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1963 i.InputSimd128Register(1));
1964 break;
1965 }
1966 case kArmI32x4GeS: {
1967 __ vcge(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1968 i.InputSimd128Register(1));
1969 break;
1970 }
1971 case kArmI32x4UConvertF32x4: {
1972 __ vcvt_u32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
1973 break;
1974 }
1975 case kArmI32x4UConvertI16x8Low: {
1976 __ vmovl(NeonU16, i.OutputSimd128Register(),
1977 i.InputSimd128Register(0).low());
1978 break;
1979 }
1980 case kArmI32x4UConvertI16x8High: {
1981 __ vmovl(NeonU16, i.OutputSimd128Register(),
1982 i.InputSimd128Register(0).high());
1983 break;
1984 }
1985 case kArmI32x4ShrU: {
1986 __ vshr(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1987 i.InputInt5(1));
1988 break;
1989 }
1990 case kArmI32x4MinU: {
1991 __ vmin(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1992 i.InputSimd128Register(1));
1993 break;
1994 }
1995 case kArmI32x4MaxU: {
1996 __ vmax(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1997 i.InputSimd128Register(1));
1998 break;
1999 }
2000 case kArmI32x4GtU: {
2001 __ vcgt(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2002 i.InputSimd128Register(1));
2003 break;
2004 }
2005 case kArmI32x4GeU: {
2006 __ vcge(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2007 i.InputSimd128Register(1));
2008 break;
2009 }
2010 case kArmI16x8Splat: {
2011 __ vdup(Neon16, i.OutputSimd128Register(), i.InputRegister(0));
2012 break;
2013 }
2014 case kArmI16x8ExtractLane: {
2015 __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS16,
2016 i.InputInt8(1));
2017 break;
2018 }
2019 case kArmI16x8ReplaceLane: {
2020 __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2021 i.InputRegister(2), NeonS16, i.InputInt8(1));
2022 break;
2023 }
2024 case kArmI16x8SConvertI8x16Low: {
2025 __ vmovl(NeonS8, i.OutputSimd128Register(),
2026 i.InputSimd128Register(0).low());
2027 break;
2028 }
2029 case kArmI16x8SConvertI8x16High: {
2030 __ vmovl(NeonS8, i.OutputSimd128Register(),
2031 i.InputSimd128Register(0).high());
2032 break;
2033 }
2034 case kArmI16x8Neg: {
2035 __ vneg(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2036 break;
2037 }
2038 case kArmI16x8Shl: {
2039 __ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2040 i.InputInt4(1));
2041 break;
2042 }
2043 case kArmI16x8ShrS: {
2044 __ vshr(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2045 i.InputInt4(1));
2046 break;
2047 }
2048 case kArmI16x8SConvertI32x4:
2049 ASSEMBLE_NEON_NARROWING_OP(NeonS16);
2050 break;
2051 case kArmI16x8Add: {
2052 __ vadd(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2053 i.InputSimd128Register(1));
2054 break;
2055 }
2056 case kArmI16x8AddSaturateS: {
2057 __ vqadd(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2058 i.InputSimd128Register(1));
2059 break;
2060 }
2061 case kArmI16x8AddHoriz:
2062 ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon16);
2063 break;
2064 case kArmI16x8Sub: {
2065 __ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2066 i.InputSimd128Register(1));
2067 break;
2068 }
2069 case kArmI16x8SubSaturateS: {
2070 __ vqsub(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2071 i.InputSimd128Register(1));
2072 break;
2073 }
2074 case kArmI16x8Mul: {
2075 __ vmul(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2076 i.InputSimd128Register(1));
2077 break;
2078 }
2079 case kArmI16x8MinS: {
2080 __ vmin(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2081 i.InputSimd128Register(1));
2082 break;
2083 }
2084 case kArmI16x8MaxS: {
2085 __ vmax(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2086 i.InputSimd128Register(1));
2087 break;
2088 }
2089 case kArmI16x8Eq: {
2090 __ vceq(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2091 i.InputSimd128Register(1));
2092 break;
2093 }
2094 case kArmI16x8Ne: {
2095 Simd128Register dst = i.OutputSimd128Register();
2096 __ vceq(Neon16, dst, i.InputSimd128Register(0),
2097 i.InputSimd128Register(1));
2098 __ vmvn(dst, dst);
2099 break;
2100 }
2101 case kArmI16x8GtS: {
2102 __ vcgt(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2103 i.InputSimd128Register(1));
2104 break;
2105 }
2106 case kArmI16x8GeS: {
2107 __ vcge(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2108 i.InputSimd128Register(1));
2109 break;
2110 }
2111 case kArmI16x8UConvertI8x16Low: {
2112 __ vmovl(NeonU8, i.OutputSimd128Register(),
2113 i.InputSimd128Register(0).low());
2114 break;
2115 }
2116 case kArmI16x8UConvertI8x16High: {
2117 __ vmovl(NeonU8, i.OutputSimd128Register(),
2118 i.InputSimd128Register(0).high());
2119 break;
2120 }
2121 case kArmI16x8ShrU: {
2122 __ vshr(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2123 i.InputInt4(1));
2124 break;
2125 }
2126 case kArmI16x8UConvertI32x4:
2127 ASSEMBLE_NEON_NARROWING_OP(NeonU16);
2128 break;
2129 case kArmI16x8AddSaturateU: {
2130 __ vqadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2131 i.InputSimd128Register(1));
2132 break;
2133 }
2134 case kArmI16x8SubSaturateU: {
2135 __ vqsub(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2136 i.InputSimd128Register(1));
2137 break;
2138 }
2139 case kArmI16x8MinU: {
2140 __ vmin(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2141 i.InputSimd128Register(1));
2142 break;
2143 }
2144 case kArmI16x8MaxU: {
2145 __ vmax(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2146 i.InputSimd128Register(1));
2147 break;
2148 }
2149 case kArmI16x8GtU: {
2150 __ vcgt(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2151 i.InputSimd128Register(1));
2152 break;
2153 }
2154 case kArmI16x8GeU: {
2155 __ vcge(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2156 i.InputSimd128Register(1));
2157 break;
2158 }
2159 case kArmI8x16Splat: {
2160 __ vdup(Neon8, i.OutputSimd128Register(), i.InputRegister(0));
2161 break;
2162 }
2163 case kArmI8x16ExtractLane: {
2164 __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS8,
2165 i.InputInt8(1));
2166 break;
2167 }
2168 case kArmI8x16ReplaceLane: {
2169 __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2170 i.InputRegister(2), NeonS8, i.InputInt8(1));
2171 break;
2172 }
2173 case kArmI8x16Neg: {
2174 __ vneg(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2175 break;
2176 }
2177 case kArmI8x16Shl: {
2178 __ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2179 i.InputInt3(1));
2180 break;
2181 }
2182 case kArmI8x16ShrS: {
2183 __ vshr(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2184 i.InputInt3(1));
2185 break;
2186 }
2187 case kArmI8x16SConvertI16x8:
2188 ASSEMBLE_NEON_NARROWING_OP(NeonS8);
2189 break;
2190 case kArmI8x16Add: {
2191 __ vadd(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2192 i.InputSimd128Register(1));
2193 break;
2194 }
2195 case kArmI8x16AddSaturateS: {
2196 __ vqadd(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2197 i.InputSimd128Register(1));
2198 break;
2199 }
2200 case kArmI8x16Sub: {
2201 __ vsub(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2202 i.InputSimd128Register(1));
2203 break;
2204 }
2205 case kArmI8x16SubSaturateS: {
2206 __ vqsub(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2207 i.InputSimd128Register(1));
2208 break;
2209 }
2210 case kArmI8x16Mul: {
2211 __ vmul(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2212 i.InputSimd128Register(1));
2213 break;
2214 }
2215 case kArmI8x16MinS: {
2216 __ vmin(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2217 i.InputSimd128Register(1));
2218 break;
2219 }
2220 case kArmI8x16MaxS: {
2221 __ vmax(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2222 i.InputSimd128Register(1));
2223 break;
2224 }
2225 case kArmI8x16Eq: {
2226 __ vceq(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2227 i.InputSimd128Register(1));
2228 break;
2229 }
2230 case kArmI8x16Ne: {
2231 Simd128Register dst = i.OutputSimd128Register();
2232 __ vceq(Neon8, dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
2233 __ vmvn(dst, dst);
2234 break;
2235 }
2236 case kArmI8x16GtS: {
2237 __ vcgt(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2238 i.InputSimd128Register(1));
2239 break;
2240 }
2241 case kArmI8x16GeS: {
2242 __ vcge(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2243 i.InputSimd128Register(1));
2244 break;
2245 }
2246 case kArmI8x16ShrU: {
2247 __ vshr(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2248 i.InputInt3(1));
2249 break;
2250 }
2251 case kArmI8x16UConvertI16x8:
2252 ASSEMBLE_NEON_NARROWING_OP(NeonU8);
2253 break;
2254 case kArmI8x16AddSaturateU: {
2255 __ vqadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2256 i.InputSimd128Register(1));
2257 break;
2258 }
2259 case kArmI8x16SubSaturateU: {
2260 __ vqsub(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2261 i.InputSimd128Register(1));
2262 break;
2263 }
2264 case kArmI8x16MinU: {
2265 __ vmin(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2266 i.InputSimd128Register(1));
2267 break;
2268 }
2269 case kArmI8x16MaxU: {
2270 __ vmax(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2271 i.InputSimd128Register(1));
2272 break;
2273 }
2274 case kArmI8x16GtU: {
2275 __ vcgt(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2276 i.InputSimd128Register(1));
2277 break;
2278 }
2279 case kArmI8x16GeU: {
2280 __ vcge(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2281 i.InputSimd128Register(1));
2282 break;
2283 }
2284 case kArmS128Zero: {
2285 __ veor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2286 i.OutputSimd128Register());
2287 break;
2288 }
2289 case kArmS128Dup: {
2290 NeonSize size = static_cast<NeonSize>(i.InputInt32(1));
2291 int lanes = kSimd128Size >> size;
2292 int index = i.InputInt32(2);
2293 DCHECK(index < lanes);
2294 int d_lanes = lanes / 2;
2295 int src_d_index = index & (d_lanes - 1);
2296 int src_d_code = i.InputSimd128Register(0).low().code() + index / d_lanes;
2297 __ vdup(size, i.OutputSimd128Register(),
2298 DwVfpRegister::from_code(src_d_code), src_d_index);
2299 break;
2300 }
2301 case kArmS128And: {
2302 __ vand(i.OutputSimd128Register(), i.InputSimd128Register(0),
2303 i.InputSimd128Register(1));
2304 break;
2305 }
2306 case kArmS128Or: {
2307 __ vorr(i.OutputSimd128Register(), i.InputSimd128Register(0),
2308 i.InputSimd128Register(1));
2309 break;
2310 }
2311 case kArmS128Xor: {
2312 __ veor(i.OutputSimd128Register(), i.InputSimd128Register(0),
2313 i.InputSimd128Register(1));
2314 break;
2315 }
2316 case kArmS128Not: {
2317 __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0));
2318 break;
2319 }
2320 case kArmS128Select: {
2321 Simd128Register dst = i.OutputSimd128Register();
2322 DCHECK(dst == i.InputSimd128Register(0));
2323 __ vbsl(dst, i.InputSimd128Register(1), i.InputSimd128Register(2));
2324 break;
2325 }
2326 case kArmS32x4ZipLeft: {
2327 Simd128Register dst = i.OutputSimd128Register(),
2328 src1 = i.InputSimd128Register(1);
2329 DCHECK(dst == i.InputSimd128Register(0));
2330 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2331 __ vmov(dst.high(), src1.low()); // dst = [0, 1, 4, 5]
2332 __ vtrn(Neon32, dst.low(), dst.high()); // dst = [0, 4, 1, 5]
2333 break;
2334 }
2335 case kArmS32x4ZipRight: {
2336 Simd128Register dst = i.OutputSimd128Register(),
2337 src1 = i.InputSimd128Register(1);
2338 DCHECK(dst == i.InputSimd128Register(0));
2339 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from ZipLeft).
2340 __ vmov(dst.low(), src1.high()); // dst = [2, 3, 6, 7]
2341 __ vtrn(Neon32, dst.low(), dst.high()); // dst = [2, 6, 3, 7]
2342 break;
2343 }
2344 case kArmS32x4UnzipLeft: {
2345 Simd128Register dst = i.OutputSimd128Register(),
2346 src1 = i.InputSimd128Register(1);
2347 DCHECK(dst == i.InputSimd128Register(0));
2348 UseScratchRegisterScope temps(tasm());
2349 Simd128Register scratch = temps.AcquireQ();
2350 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2351 __ vmov(scratch, src1);
2352 __ vuzp(Neon32, dst, scratch); // dst = [0, 2, 4, 6]
2353 break;
2354 }
2355 case kArmS32x4UnzipRight: {
2356 Simd128Register dst = i.OutputSimd128Register(),
2357 src1 = i.InputSimd128Register(1);
2358 DCHECK(dst == i.InputSimd128Register(0));
2359 UseScratchRegisterScope temps(tasm());
2360 Simd128Register scratch = temps.AcquireQ();
2361 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft).
2362 __ vmov(scratch, src1);
2363 __ vuzp(Neon32, scratch, dst); // dst = [1, 3, 5, 7]
2364 break;
2365 }
2366 case kArmS32x4TransposeLeft: {
2367 Simd128Register dst = i.OutputSimd128Register(),
2368 src1 = i.InputSimd128Register(1);
2369 DCHECK(dst == i.InputSimd128Register(0));
2370 UseScratchRegisterScope temps(tasm());
2371 Simd128Register scratch = temps.AcquireQ();
2372 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2373 __ vmov(scratch, src1);
2374 __ vtrn(Neon32, dst, scratch); // dst = [0, 4, 2, 6]
2375 break;
2376 }
2377 case kArmS32x4Shuffle: {
2378 Simd128Register dst = i.OutputSimd128Register(),
2379 src0 = i.InputSimd128Register(0),
2380 src1 = i.InputSimd128Register(1);
2381 DCHECK_NE(dst, src0);
2382 DCHECK_NE(dst, src1);
2383 // Perform shuffle as a vmov per lane.
2384 int dst_code = dst.code() * 4;
2385 int src0_code = src0.code() * 4;
2386 int src1_code = src1.code() * 4;
2387 int32_t shuffle = i.InputInt32(2);
2388 for (int i = 0; i < 4; i++) {
2389 int lane = shuffle & 0x7;
2390 int src_code = src0_code;
2391 if (lane >= 4) {
2392 src_code = src1_code;
2393 lane &= 0x3;
2394 }
2395 __ VmovExtended(dst_code + i, src_code + lane);
2396 shuffle >>= 8;
2397 }
2398 break;
2399 }
2400 case kArmS32x4TransposeRight: {
2401 Simd128Register dst = i.OutputSimd128Register(),
2402 src1 = i.InputSimd128Register(1);
2403 UseScratchRegisterScope temps(tasm());
2404 Simd128Register scratch = temps.AcquireQ();
2405 DCHECK(dst == i.InputSimd128Register(0));
2406 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft).
2407 __ vmov(scratch, src1);
2408 __ vtrn(Neon32, scratch, dst); // dst = [1, 5, 3, 7]
2409 break;
2410 }
2411 case kArmS16x8ZipLeft: {
2412 Simd128Register dst = i.OutputSimd128Register(),
2413 src1 = i.InputSimd128Register(1);
2414 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2415 DCHECK(dst == i.InputSimd128Register(0));
2416 __ vmov(dst.high(), src1.low()); // dst = [0, 1, 2, 3, 8, ... 11]
2417 __ vzip(Neon16, dst.low(), dst.high()); // dst = [0, 8, 1, 9, ... 11]
2418 break;
2419 }
2420 case kArmS16x8ZipRight: {
2421 Simd128Register dst = i.OutputSimd128Register(),
2422 src1 = i.InputSimd128Register(1);
2423 DCHECK(dst == i.InputSimd128Register(0));
2424 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2425 __ vmov(dst.low(), src1.high());
2426 __ vzip(Neon16, dst.low(), dst.high()); // dst = [4, 12, 5, 13, ... 15]
2427 break;
2428 }
2429 case kArmS16x8UnzipLeft: {
2430 Simd128Register dst = i.OutputSimd128Register(),
2431 src1 = i.InputSimd128Register(1);
2432 UseScratchRegisterScope temps(tasm());
2433 Simd128Register scratch = temps.AcquireQ();
2434 DCHECK(dst == i.InputSimd128Register(0));
2435 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2436 __ vmov(scratch, src1);
2437 __ vuzp(Neon16, dst, scratch); // dst = [0, 2, 4, 6, ... 14]
2438 break;
2439 }
2440 case kArmS16x8UnzipRight: {
2441 Simd128Register dst = i.OutputSimd128Register(),
2442 src1 = i.InputSimd128Register(1);
2443 UseScratchRegisterScope temps(tasm());
2444 Simd128Register scratch = temps.AcquireQ();
2445 DCHECK(dst == i.InputSimd128Register(0));
2446 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2447 __ vmov(scratch, src1);
2448 __ vuzp(Neon16, scratch, dst); // dst = [1, 3, 5, 7, ... 15]
2449 break;
2450 }
2451 case kArmS16x8TransposeLeft: {
2452 Simd128Register dst = i.OutputSimd128Register(),
2453 src1 = i.InputSimd128Register(1);
2454 UseScratchRegisterScope temps(tasm());
2455 Simd128Register scratch = temps.AcquireQ();
2456 DCHECK(dst == i.InputSimd128Register(0));
2457 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2458 __ vmov(scratch, src1);
2459 __ vtrn(Neon16, dst, scratch); // dst = [0, 8, 2, 10, ... 14]
2460 break;
2461 }
2462 case kArmS16x8TransposeRight: {
2463 Simd128Register dst = i.OutputSimd128Register(),
2464 src1 = i.InputSimd128Register(1);
2465 UseScratchRegisterScope temps(tasm());
2466 Simd128Register scratch = temps.AcquireQ();
2467 DCHECK(dst == i.InputSimd128Register(0));
2468 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2469 __ vmov(scratch, src1);
2470 __ vtrn(Neon16, scratch, dst); // dst = [1, 9, 3, 11, ... 15]
2471 break;
2472 }
2473 case kArmS8x16ZipLeft: {
2474 Simd128Register dst = i.OutputSimd128Register(),
2475 src1 = i.InputSimd128Register(1);
2476 DCHECK(dst == i.InputSimd128Register(0));
2477 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2478 __ vmov(dst.high(), src1.low());
2479 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23]
2480 break;
2481 }
2482 case kArmS8x16ZipRight: {
2483 Simd128Register dst = i.OutputSimd128Register(),
2484 src1 = i.InputSimd128Register(1);
2485 DCHECK(dst == i.InputSimd128Register(0));
2486 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
2487 __ vmov(dst.low(), src1.high());
2488 __ vzip(Neon8, dst.low(), dst.high()); // dst = [8, 24, 9, 25, ... 31]
2489 break;
2490 }
2491 case kArmS8x16UnzipLeft: {
2492 Simd128Register dst = i.OutputSimd128Register(),
2493 src1 = i.InputSimd128Register(1);
2494 UseScratchRegisterScope temps(tasm());
2495 Simd128Register scratch = temps.AcquireQ();
2496 DCHECK(dst == i.InputSimd128Register(0));
2497 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2498 __ vmov(scratch, src1);
2499 __ vuzp(Neon8, dst, scratch); // dst = [0, 2, 4, 6, ... 30]
2500 break;
2501 }
2502 case kArmS8x16UnzipRight: {
2503 Simd128Register dst = i.OutputSimd128Register(),
2504 src1 = i.InputSimd128Register(1);
2505 UseScratchRegisterScope temps(tasm());
2506 Simd128Register scratch = temps.AcquireQ();
2507 DCHECK(dst == i.InputSimd128Register(0));
2508 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
2509 __ vmov(scratch, src1);
2510 __ vuzp(Neon8, scratch, dst); // dst = [1, 3, 5, 7, ... 31]
2511 break;
2512 }
2513 case kArmS8x16TransposeLeft: {
2514 Simd128Register dst = i.OutputSimd128Register(),
2515 src1 = i.InputSimd128Register(1);
2516 UseScratchRegisterScope temps(tasm());
2517 Simd128Register scratch = temps.AcquireQ();
2518 DCHECK(dst == i.InputSimd128Register(0));
2519 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2520 __ vmov(scratch, src1);
2521 __ vtrn(Neon8, dst, scratch); // dst = [0, 16, 2, 18, ... 30]
2522 break;
2523 }
2524 case kArmS8x16TransposeRight: {
2525 Simd128Register dst = i.OutputSimd128Register(),
2526 src1 = i.InputSimd128Register(1);
2527 UseScratchRegisterScope temps(tasm());
2528 Simd128Register scratch = temps.AcquireQ();
2529 DCHECK(dst == i.InputSimd128Register(0));
2530 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
2531 __ vmov(scratch, src1);
2532 __ vtrn(Neon8, scratch, dst); // dst = [1, 17, 3, 19, ... 31]
2533 break;
2534 }
2535 case kArmS8x16Concat: {
2536 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),
2537 i.InputSimd128Register(1), i.InputInt4(2));
2538 break;
2539 }
2540 case kArmS8x16Shuffle: {
2541 Simd128Register dst = i.OutputSimd128Register(),
2542 src0 = i.InputSimd128Register(0),
2543 src1 = i.InputSimd128Register(1);
2544 DwVfpRegister table_base = src0.low();
2545 UseScratchRegisterScope temps(tasm());
2546 Simd128Register scratch = temps.AcquireQ();
2547 // If unary shuffle, table is src0 (2 d-registers), otherwise src0 and
2548 // src1. They must be consecutive.
2549 int table_size = src0 == src1 ? 2 : 4;
2550 DCHECK_IMPLIES(src0 != src1, src0.code() + 1 == src1.code());
2551 // The shuffle lane mask is a byte mask, materialize in scratch.
2552 int scratch_s_base = scratch.code() * 4;
2553 for (int j = 0; j < 4; j++) {
2554 uint32_t four_lanes = i.InputUint32(2 + j);
2555 // Ensure byte indices are in [0, 31] so masks are never NaNs.
2556 four_lanes &= 0x1F1F1F1F;
2557 __ vmov(SwVfpRegister::from_code(scratch_s_base + j),
2558 Float32::FromBits(four_lanes));
2559 }
2560 NeonListOperand table(table_base, table_size);
2561 if (dst != src0 && dst != src1) {
2562 __ vtbl(dst.low(), table, scratch.low());
2563 __ vtbl(dst.high(), table, scratch.high());
2564 } else {
2565 __ vtbl(scratch.low(), table, scratch.low());
2566 __ vtbl(scratch.high(), table, scratch.high());
2567 __ vmov(dst, scratch);
2568 }
2569 break;
2570 }
2571 case kArmS32x2Reverse: {
2572 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
2573 break;
2574 }
2575 case kArmS16x4Reverse: {
2576 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2577 break;
2578 }
2579 case kArmS16x2Reverse: {
2580 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2581 break;
2582 }
2583 case kArmS8x8Reverse: {
2584 __ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2585 break;
2586 }
2587 case kArmS8x4Reverse: {
2588 __ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2589 break;
2590 }
2591 case kArmS8x2Reverse: {
2592 __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2593 break;
2594 }
2595 case kArmS1x4AnyTrue: {
2596 const QwNeonRegister& src = i.InputSimd128Register(0);
2597 UseScratchRegisterScope temps(tasm());
2598 DwVfpRegister scratch = temps.AcquireD();
2599 __ vpmax(NeonU32, scratch, src.low(), src.high());
2600 __ vpmax(NeonU32, scratch, scratch, scratch);
2601 __ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
2602 break;
2603 }
2604 case kArmS1x4AllTrue: {
2605 const QwNeonRegister& src = i.InputSimd128Register(0);
2606 UseScratchRegisterScope temps(tasm());
2607 DwVfpRegister scratch = temps.AcquireD();
2608 __ vpmin(NeonU32, scratch, src.low(), src.high());
2609 __ vpmin(NeonU32, scratch, scratch, scratch);
2610 __ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
2611 break;
2612 }
2613 case kArmS1x8AnyTrue: {
2614 const QwNeonRegister& src = i.InputSimd128Register(0);
2615 UseScratchRegisterScope temps(tasm());
2616 DwVfpRegister scratch = temps.AcquireD();
2617 __ vpmax(NeonU16, scratch, src.low(), src.high());
2618 __ vpmax(NeonU16, scratch, scratch, scratch);
2619 __ vpmax(NeonU16, scratch, scratch, scratch);
2620 __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0);
2621 break;
2622 }
2623 case kArmS1x8AllTrue: {
2624 const QwNeonRegister& src = i.InputSimd128Register(0);
2625 UseScratchRegisterScope temps(tasm());
2626 DwVfpRegister scratch = temps.AcquireD();
2627 __ vpmin(NeonU16, scratch, src.low(), src.high());
2628 __ vpmin(NeonU16, scratch, scratch, scratch);
2629 __ vpmin(NeonU16, scratch, scratch, scratch);
2630 __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0);
2631 break;
2632 }
2633 case kArmS1x16AnyTrue: {
2634 const QwNeonRegister& src = i.InputSimd128Register(0);
2635 UseScratchRegisterScope temps(tasm());
2636 QwNeonRegister q_scratch = temps.AcquireQ();
2637 DwVfpRegister d_scratch = q_scratch.low();
2638 __ vpmax(NeonU8, d_scratch, src.low(), src.high());
2639 __ vpmax(NeonU8, d_scratch, d_scratch, d_scratch);
2640 // vtst to detect any bits in the bottom 32 bits of d_scratch.
2641 // This saves an instruction vs. the naive sequence of vpmax.
2642 // kDoubleRegZero is not changed, since it is 0.
2643 __ vtst(Neon32, q_scratch, q_scratch, q_scratch);
2644 __ ExtractLane(i.OutputRegister(), d_scratch, NeonS32, 0);
2645 break;
2646 }
2647 case kArmS1x16AllTrue: {
2648 const QwNeonRegister& src = i.InputSimd128Register(0);
2649 UseScratchRegisterScope temps(tasm());
2650 DwVfpRegister scratch = temps.AcquireD();
2651 __ vpmin(NeonU8, scratch, src.low(), src.high());
2652 __ vpmin(NeonU8, scratch, scratch, scratch);
2653 __ vpmin(NeonU8, scratch, scratch, scratch);
2654 __ vpmin(NeonU8, scratch, scratch, scratch);
2655 __ ExtractLane(i.OutputRegister(), scratch, NeonS8, 0);
2656 break;
2657 }
2658 case kWord32AtomicLoadInt8:
2659 ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrsb);
2660 break;
2661 case kWord32AtomicLoadUint8:
2662 ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrb);
2663 break;
2664 case kWord32AtomicLoadInt16:
2665 ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrsh);
2666 break;
2667 case kWord32AtomicLoadUint16:
2668 ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrh);
2669 break;
2670 case kWord32AtomicLoadWord32:
2671 ASSEMBLE_ATOMIC_LOAD_INTEGER(ldr);
2672 break;
2673 case kWord32AtomicStoreWord8:
2674 ASSEMBLE_ATOMIC_STORE_INTEGER(strb);
2675 break;
2676 case kWord32AtomicStoreWord16:
2677 ASSEMBLE_ATOMIC_STORE_INTEGER(strh);
2678 break;
2679 case kWord32AtomicStoreWord32:
2680 ASSEMBLE_ATOMIC_STORE_INTEGER(str);
2681 break;
2682 case kWord32AtomicExchangeInt8:
2683 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb);
2684 __ sxtb(i.OutputRegister(0), i.OutputRegister(0));
2685 break;
2686 case kWord32AtomicExchangeUint8:
2687 case kArmWord64AtomicNarrowExchangeUint8:
2688 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb);
2689 ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(ExchangeUint8);
2690 break;
2691 case kWord32AtomicExchangeInt16:
2692 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh);
2693 __ sxth(i.OutputRegister(0), i.OutputRegister(0));
2694 break;
2695 case kWord32AtomicExchangeUint16:
2696 case kArmWord64AtomicNarrowExchangeUint16:
2697 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh);
2698 ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(ExchangeUint16);
2699 break;
2700 case kWord32AtomicExchangeWord32:
2701 case kArmWord64AtomicNarrowExchangeUint32:
2702 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrex, strex);
2703 ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(ExchangeUint32);
2704 break;
2705 case kWord32AtomicCompareExchangeInt8:
2706 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
2707 __ uxtb(i.TempRegister(2), i.InputRegister(2));
2708 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexb, strexb,
2709 i.TempRegister(2));
2710 __ sxtb(i.OutputRegister(0), i.OutputRegister(0));
2711 break;
2712 case kWord32AtomicCompareExchangeUint8:
2713 case kArmWord64AtomicNarrowCompareExchangeUint8:
2714 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
2715 __ uxtb(i.TempRegister(2), i.InputRegister(2));
2716 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexb, strexb,
2717 i.TempRegister(2));
2718 ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(CompareExchangeUint8);
2719 break;
2720 case kWord32AtomicCompareExchangeInt16:
2721 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
2722 __ uxth(i.TempRegister(2), i.InputRegister(2));
2723 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexh, strexh,
2724 i.TempRegister(2));
2725 __ sxth(i.OutputRegister(0), i.OutputRegister(0));
2726 break;
2727 case kWord32AtomicCompareExchangeUint16:
2728 case kArmWord64AtomicNarrowCompareExchangeUint16:
2729 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
2730 __ uxth(i.TempRegister(2), i.InputRegister(2));
2731 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexh, strexh,
2732 i.TempRegister(2));
2733 ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(CompareExchangeUint16);
2734 break;
2735 case kWord32AtomicCompareExchangeWord32:
2736 case kArmWord64AtomicNarrowCompareExchangeUint32:
2737 __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
2738 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrex, strex,
2739 i.InputRegister(2));
2740 ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(CompareExchangeUint32);
2741 break;
2742 #define ATOMIC_BINOP_CASE(op, inst) \
2743 case kWord32Atomic##op##Int8: \
2744 ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst); \
2745 __ sxtb(i.OutputRegister(0), i.OutputRegister(0)); \
2746 break; \
2747 case kWord32Atomic##op##Uint8: \
2748 case kArmWord64AtomicNarrow##op##Uint8: \
2749 ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst); \
2750 ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(op##Uint8); \
2751 break; \
2752 case kWord32Atomic##op##Int16: \
2753 ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst); \
2754 __ sxth(i.OutputRegister(0), i.OutputRegister(0)); \
2755 break; \
2756 case kWord32Atomic##op##Uint16: \
2757 case kArmWord64AtomicNarrow##op##Uint16: \
2758 ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst); \
2759 ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(op##Uint16); \
2760 break; \
2761 case kWord32Atomic##op##Word32: \
2762 case kArmWord64AtomicNarrow##op##Uint32: \
2763 ASSEMBLE_ATOMIC_BINOP(ldrex, strex, inst); \
2764 ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(op##Uint32); \
2765 break;
2766 ATOMIC_BINOP_CASE(Add, add)
2767 ATOMIC_BINOP_CASE(Sub, sub)
2768 ATOMIC_BINOP_CASE(And, and_)
2769 ATOMIC_BINOP_CASE(Or, orr)
2770 ATOMIC_BINOP_CASE(Xor, eor)
2771 #undef ATOMIC_BINOP_CASE
2772 case kArmWord32AtomicPairLoad:
2773 __ add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
2774 __ ldrexd(i.OutputRegister(0), i.OutputRegister(1), i.TempRegister(0));
2775 __ dmb(ISH);
2776 break;
2777 case kArmWord32AtomicPairStore: {
2778 Label store;
2779 __ add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
2780 __ dmb(ISH);
2781 __ bind(&store);
2782 __ ldrexd(i.TempRegister(1), i.TempRegister(2), i.TempRegister(0));
2783 __ strexd(i.TempRegister(1), i.InputRegister(2), i.InputRegister(3),
2784 i.TempRegister(0));
2785 __ teq(i.TempRegister(1), Operand(0));
2786 __ b(ne, &store);
2787 __ dmb(ISH);
2788 break;
2789 }
2790 #define ATOMIC_ARITH_BINOP_CASE(op, instr1, instr2) \
2791 case kArmWord32AtomicPair##op: { \
2792 ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2); \
2793 break; \
2794 }
2795 ATOMIC_ARITH_BINOP_CASE(Add, add, adc)
2796 ATOMIC_ARITH_BINOP_CASE(Sub, sub, sbc)
2797 #undef ATOMIC_ARITH_BINOP_CASE
2798 #define ATOMIC_LOGIC_BINOP_CASE(op, instr) \
2799 case kArmWord32AtomicPair##op: { \
2800 ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr); \
2801 break; \
2802 }
2803 ATOMIC_LOGIC_BINOP_CASE(And, and_)
2804 ATOMIC_LOGIC_BINOP_CASE(Or, orr)
2805 ATOMIC_LOGIC_BINOP_CASE(Xor, eor)
2806 case kArmWord32AtomicPairExchange: {
2807 Label exchange;
2808 __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3));
2809 __ dmb(ISH);
2810 __ bind(&exchange);
2811 __ ldrexd(i.OutputRegister(0), i.OutputRegister(1), i.TempRegister(0));
2812 __ strexd(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1),
2813 i.TempRegister(0));
2814 __ teq(i.TempRegister(1), Operand(0));
2815 __ b(ne, &exchange);
2816 __ dmb(ISH);
2817 break;
2818 }
2819 case kArmWord32AtomicPairCompareExchange: {
2820 __ add(i.TempRegister(0), i.InputRegister(4), i.InputRegister(5));
2821 Label compareExchange;
2822 Label exit;
2823 __ dmb(ISH);
2824 __ bind(&compareExchange);
2825 __ ldrexd(i.OutputRegister(0), i.OutputRegister(1), i.TempRegister(0));
2826 __ teq(i.InputRegister(0), Operand(i.OutputRegister(0)));
2827 __ b(ne, &exit);
2828 __ teq(i.InputRegister(1), Operand(i.OutputRegister(1)));
2829 __ b(ne, &exit);
2830 __ strexd(i.TempRegister(1), i.InputRegister(2), i.InputRegister(3),
2831 i.TempRegister(0));
2832 __ teq(i.TempRegister(1), Operand(0));
2833 __ b(ne, &compareExchange);
2834 __ bind(&exit);
2835 __ dmb(ISH);
2836 break;
2837 }
2838 #undef ATOMIC_LOGIC_BINOP_CASE
2839 #undef ATOMIC_NARROW_OP_CLEAR_HIGH_WORD
2840 #undef ASSEMBLE_ATOMIC_LOAD_INTEGER
2841 #undef ASSEMBLE_ATOMIC_STORE_INTEGER
2842 #undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
2843 #undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
2844 #undef ASSEMBLE_ATOMIC_BINOP
2845 #undef ASSEMBLE_ATOMIC64_ARITH_BINOP
2846 #undef ASSEMBLE_ATOMIC64_LOGIC_BINOP
2847 #undef ASSEMBLE_IEEE754_BINOP
2848 #undef ASSEMBLE_IEEE754_UNOP
2849 #undef ASSEMBLE_NEON_NARROWING_OP
2850 #undef ASSEMBLE_NEON_PAIRWISE_OP
2851 }
2852 return kSuccess;
2853 } // NOLINT(readability/fn_size)
2854
2855
2856 // Assembles branches after an instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)2857 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
2858 ArmOperandConverter i(this, instr);
2859 Label* tlabel = branch->true_label;
2860 Label* flabel = branch->false_label;
2861 Condition cc = FlagsConditionToCondition(branch->condition);
2862 __ b(cc, tlabel);
2863 if (!branch->fallthru) __ b(flabel); // no fallthru to flabel.
2864 }
2865
AssembleBranchPoisoning(FlagsCondition condition,Instruction * instr)2866 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
2867 Instruction* instr) {
2868 // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
2869 if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
2870 return;
2871 }
2872
2873 condition = NegateFlagsCondition(condition);
2874 __ eor(kSpeculationPoisonRegister, kSpeculationPoisonRegister,
2875 Operand(kSpeculationPoisonRegister), SBit::LeaveCC,
2876 FlagsConditionToCondition(condition));
2877 __ csdb();
2878 }
2879
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)2880 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
2881 BranchInfo* branch) {
2882 AssembleArchBranch(instr, branch);
2883 }
2884
AssembleArchJump(RpoNumber target)2885 void CodeGenerator::AssembleArchJump(RpoNumber target) {
2886 if (!IsNextInAssemblyOrder(target)) __ b(GetLabel(target));
2887 }
2888
AssembleArchTrap(Instruction * instr,FlagsCondition condition)2889 void CodeGenerator::AssembleArchTrap(Instruction* instr,
2890 FlagsCondition condition) {
2891 class OutOfLineTrap final : public OutOfLineCode {
2892 public:
2893 OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
2894 : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
2895
2896 void Generate() final {
2897 ArmOperandConverter i(gen_, instr_);
2898 TrapId trap_id =
2899 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
2900 GenerateCallToTrap(trap_id);
2901 }
2902
2903 private:
2904 void GenerateCallToTrap(TrapId trap_id) {
2905 if (trap_id == TrapId::kInvalid) {
2906 // We cannot test calls to the runtime in cctest/test-run-wasm.
2907 // Therefore we emit a call to C here instead of a call to the runtime.
2908 // We use the context register as the scratch register, because we do
2909 // not have a context here.
2910 __ PrepareCallCFunction(0, 0);
2911 __ CallCFunction(
2912 ExternalReference::wasm_call_trap_callback_for_testing(), 0);
2913 __ LeaveFrame(StackFrame::WASM_COMPILED);
2914 auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
2915 int pop_count =
2916 static_cast<int>(call_descriptor->StackParameterCount());
2917 __ Drop(pop_count);
2918 __ Ret();
2919 } else {
2920 gen_->AssembleSourcePosition(instr_);
2921 // A direct call to a wasm runtime stub defined in this module.
2922 // Just encode the stub index. This will be patched at relocation.
2923 __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
2924 ReferenceMap* reference_map =
2925 new (gen_->zone()) ReferenceMap(gen_->zone());
2926 gen_->RecordSafepoint(reference_map, Safepoint::kSimple, 0,
2927 Safepoint::kNoLazyDeopt);
2928 if (FLAG_debug_code) {
2929 __ stop(GetAbortReason(AbortReason::kUnexpectedReturnFromWasmTrap));
2930 }
2931 }
2932 }
2933
2934 Instruction* instr_;
2935 CodeGenerator* gen_;
2936 };
2937 auto ool = new (zone()) OutOfLineTrap(this, instr);
2938 Label* tlabel = ool->entry();
2939 Condition cc = FlagsConditionToCondition(condition);
2940 __ b(cc, tlabel);
2941 }
2942
2943 // Assembles boolean materializations after an instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)2944 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
2945 FlagsCondition condition) {
2946 ArmOperandConverter i(this, instr);
2947
2948 // Materialize a full 32-bit 1 or 0 value. The result register is always the
2949 // last output of the instruction.
2950 DCHECK_NE(0u, instr->OutputCount());
2951 Register reg = i.OutputRegister(instr->OutputCount() - 1);
2952 Condition cc = FlagsConditionToCondition(condition);
2953 __ mov(reg, Operand(0));
2954 __ mov(reg, Operand(1), LeaveCC, cc);
2955 }
2956
AssembleArchBinarySearchSwitch(Instruction * instr)2957 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
2958 ArmOperandConverter i(this, instr);
2959 Register input = i.InputRegister(0);
2960 std::vector<std::pair<int32_t, Label*>> cases;
2961 for (size_t index = 2; index < instr->InputCount(); index += 2) {
2962 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
2963 }
2964 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
2965 cases.data() + cases.size());
2966 }
2967
AssembleArchLookupSwitch(Instruction * instr)2968 void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
2969 ArmOperandConverter i(this, instr);
2970 Register input = i.InputRegister(0);
2971 for (size_t index = 2; index < instr->InputCount(); index += 2) {
2972 __ cmp(input, Operand(i.InputInt32(index + 0)));
2973 __ b(eq, GetLabel(i.InputRpo(index + 1)));
2974 }
2975 AssembleArchJump(i.InputRpo(1));
2976 }
2977
2978
AssembleArchTableSwitch(Instruction * instr)2979 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
2980 ArmOperandConverter i(this, instr);
2981 Register input = i.InputRegister(0);
2982 size_t const case_count = instr->InputCount() - 2;
2983 // Ensure to emit the constant pool first if necessary.
2984 __ CheckConstPool(true, true);
2985 __ cmp(input, Operand(case_count));
2986 __ BlockConstPoolFor(case_count + 2);
2987 __ add(pc, pc, Operand(input, LSL, 2), LeaveCC, lo);
2988 __ b(GetLabel(i.InputRpo(1)));
2989 for (size_t index = 0; index < case_count; ++index) {
2990 __ b(GetLabel(i.InputRpo(index + 2)));
2991 }
2992 }
2993
FinishFrame(Frame * frame)2994 void CodeGenerator::FinishFrame(Frame* frame) {
2995 auto call_descriptor = linkage()->GetIncomingDescriptor();
2996
2997 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
2998 if (saves_fp != 0) {
2999 frame->AlignSavedCalleeRegisterSlots();
3000 }
3001
3002 if (saves_fp != 0) {
3003 // Save callee-saved FP registers.
3004 STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
3005 uint32_t last = base::bits::CountLeadingZeros32(saves_fp) - 1;
3006 uint32_t first = base::bits::CountTrailingZeros32(saves_fp);
3007 DCHECK_EQ((last - first + 1), base::bits::CountPopulation(saves_fp));
3008 frame->AllocateSavedCalleeRegisterSlots((last - first + 1) *
3009 (kDoubleSize / kPointerSize));
3010 }
3011 const RegList saves = call_descriptor->CalleeSavedRegisters();
3012 if (saves != 0) {
3013 // Save callee-saved registers.
3014 frame->AllocateSavedCalleeRegisterSlots(base::bits::CountPopulation(saves));
3015 }
3016 }
3017
AssembleConstructFrame()3018 void CodeGenerator::AssembleConstructFrame() {
3019 auto call_descriptor = linkage()->GetIncomingDescriptor();
3020 if (frame_access_state()->has_frame()) {
3021 if (call_descriptor->IsCFunctionCall()) {
3022 __ Push(lr, fp);
3023 __ mov(fp, sp);
3024 } else if (call_descriptor->IsJSFunctionCall()) {
3025 __ Prologue();
3026 if (call_descriptor->PushArgumentCount()) {
3027 __ Push(kJavaScriptCallArgCountRegister);
3028 }
3029 } else {
3030 __ StubPrologue(info()->GetOutputStackFrameType());
3031 if (call_descriptor->IsWasmFunctionCall()) {
3032 __ Push(kWasmInstanceRegister);
3033 }
3034 }
3035
3036 unwinding_info_writer_.MarkFrameConstructed(__ pc_offset());
3037 }
3038
3039 int shrink_slots = frame()->GetTotalFrameSlotCount() -
3040 call_descriptor->CalculateFixedFrameSize();
3041
3042 if (info()->is_osr()) {
3043 // TurboFan OSR-compiled functions cannot be entered directly.
3044 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
3045
3046 // Unoptimized code jumps directly to this entrypoint while the unoptimized
3047 // frame is still on the stack. Optimized code uses OSR values directly from
3048 // the unoptimized frame. Thus, all that needs to be done is to allocate the
3049 // remaining stack slots.
3050 if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
3051 osr_pc_offset_ = __ pc_offset();
3052 shrink_slots -= osr_helper()->UnoptimizedFrameSlots();
3053 ResetSpeculationPoison();
3054 }
3055
3056 const RegList saves = call_descriptor->CalleeSavedRegisters();
3057 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3058
3059 if (shrink_slots > 0) {
3060 DCHECK(frame_access_state()->has_frame());
3061 if (info()->IsWasm() && shrink_slots > 128) {
3062 // For WebAssembly functions with big frames we have to do the stack
3063 // overflow check before we construct the frame. Otherwise we may not
3064 // have enough space on the stack to call the runtime for the stack
3065 // overflow.
3066 Label done;
3067
3068 // If the frame is bigger than the stack, we throw the stack overflow
3069 // exception unconditionally. Thereby we can avoid the integer overflow
3070 // check in the condition code.
3071 if ((shrink_slots * kPointerSize) < (FLAG_stack_size * 1024)) {
3072 UseScratchRegisterScope temps(tasm());
3073 Register scratch = temps.Acquire();
3074 __ ldr(scratch, FieldMemOperand(
3075 kWasmInstanceRegister,
3076 WasmInstanceObject::kRealStackLimitAddressOffset));
3077 __ ldr(scratch, MemOperand(scratch));
3078 __ add(scratch, scratch, Operand(shrink_slots * kPointerSize));
3079 __ cmp(sp, scratch);
3080 __ b(cs, &done);
3081 }
3082
3083 __ ldr(r2, FieldMemOperand(kWasmInstanceRegister,
3084 WasmInstanceObject::kCEntryStubOffset));
3085 __ Move(cp, Smi::kZero);
3086 __ CallRuntimeWithCEntry(Runtime::kThrowWasmStackOverflow, r2);
3087 // We come from WebAssembly, there are no references for the GC.
3088 ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
3089 RecordSafepoint(reference_map, Safepoint::kSimple, 0,
3090 Safepoint::kNoLazyDeopt);
3091 if (FLAG_debug_code) {
3092 __ stop(GetAbortReason(AbortReason::kUnexpectedReturnFromThrow));
3093 }
3094
3095 __ bind(&done);
3096 }
3097
3098 // Skip callee-saved and return slots, which are pushed below.
3099 shrink_slots -= base::bits::CountPopulation(saves);
3100 shrink_slots -= frame()->GetReturnSlotCount();
3101 shrink_slots -= 2 * base::bits::CountPopulation(saves_fp);
3102 if (shrink_slots > 0) {
3103 __ sub(sp, sp, Operand(shrink_slots * kPointerSize));
3104 }
3105 }
3106
3107 if (saves_fp != 0) {
3108 // Save callee-saved FP registers.
3109 STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
3110 uint32_t last = base::bits::CountLeadingZeros32(saves_fp) - 1;
3111 uint32_t first = base::bits::CountTrailingZeros32(saves_fp);
3112 DCHECK_EQ((last - first + 1), base::bits::CountPopulation(saves_fp));
3113 __ vstm(db_w, sp, DwVfpRegister::from_code(first),
3114 DwVfpRegister::from_code(last));
3115 }
3116
3117 if (saves != 0) {
3118 // Save callee-saved registers.
3119 __ stm(db_w, sp, saves);
3120 }
3121
3122 const int returns = frame()->GetReturnSlotCount();
3123 if (returns != 0) {
3124 // Create space for returns.
3125 __ sub(sp, sp, Operand(returns * kPointerSize));
3126 }
3127 }
3128
AssembleReturn(InstructionOperand * pop)3129 void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
3130 auto call_descriptor = linkage()->GetIncomingDescriptor();
3131 int pop_count = static_cast<int>(call_descriptor->StackParameterCount());
3132
3133 const int returns = frame()->GetReturnSlotCount();
3134 if (returns != 0) {
3135 // Free space of returns.
3136 __ add(sp, sp, Operand(returns * kPointerSize));
3137 }
3138
3139 // Restore registers.
3140 const RegList saves = call_descriptor->CalleeSavedRegisters();
3141 if (saves != 0) {
3142 __ ldm(ia_w, sp, saves);
3143 }
3144
3145 // Restore FP registers.
3146 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3147 if (saves_fp != 0) {
3148 STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
3149 uint32_t last = base::bits::CountLeadingZeros32(saves_fp) - 1;
3150 uint32_t first = base::bits::CountTrailingZeros32(saves_fp);
3151 __ vldm(ia_w, sp, DwVfpRegister::from_code(first),
3152 DwVfpRegister::from_code(last));
3153 }
3154
3155 unwinding_info_writer_.MarkBlockWillExit();
3156
3157 ArmOperandConverter g(this, nullptr);
3158 if (call_descriptor->IsCFunctionCall()) {
3159 AssembleDeconstructFrame();
3160 } else if (frame_access_state()->has_frame()) {
3161 // Canonicalize JSFunction return sites for now unless they have an variable
3162 // number of stack slot pops.
3163 if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
3164 if (return_label_.is_bound()) {
3165 __ b(&return_label_);
3166 return;
3167 } else {
3168 __ bind(&return_label_);
3169 AssembleDeconstructFrame();
3170 }
3171 } else {
3172 AssembleDeconstructFrame();
3173 }
3174 }
3175
3176 if (pop->IsImmediate()) {
3177 DCHECK_EQ(Constant::kInt32, g.ToConstant(pop).type());
3178 pop_count += g.ToConstant(pop).ToInt32();
3179 } else {
3180 __ Drop(g.ToRegister(pop));
3181 }
3182 __ Drop(pop_count);
3183 __ Ret();
3184 }
3185
FinishCode()3186 void CodeGenerator::FinishCode() { __ CheckConstPool(true, false); }
3187
AssembleMove(InstructionOperand * source,InstructionOperand * destination)3188 void CodeGenerator::AssembleMove(InstructionOperand* source,
3189 InstructionOperand* destination) {
3190 ArmOperandConverter g(this, nullptr);
3191 // Helper function to write the given constant to the dst register.
3192 auto MoveConstantToRegister = [&](Register dst, Constant src) {
3193 if (src.type() == Constant::kHeapObject) {
3194 Handle<HeapObject> src_object = src.ToHeapObject();
3195 Heap::RootListIndex index;
3196 if (IsMaterializableFromRoot(src_object, &index)) {
3197 __ LoadRoot(dst, index);
3198 } else {
3199 __ Move(dst, src_object);
3200 }
3201 } else if (src.type() == Constant::kExternalReference) {
3202 __ Move(dst, src.ToExternalReference());
3203 } else {
3204 __ mov(dst, g.ToImmediate(source));
3205 }
3206 };
3207 switch (MoveType::InferMove(source, destination)) {
3208 case MoveType::kRegisterToRegister:
3209 if (source->IsRegister()) {
3210 __ mov(g.ToRegister(destination), g.ToRegister(source));
3211 } else if (source->IsFloatRegister()) {
3212 DCHECK(destination->IsFloatRegister());
3213 // GapResolver may give us reg codes that don't map to actual
3214 // s-registers. Generate code to work around those cases.
3215 int src_code = LocationOperand::cast(source)->register_code();
3216 int dst_code = LocationOperand::cast(destination)->register_code();
3217 __ VmovExtended(dst_code, src_code);
3218 } else if (source->IsDoubleRegister()) {
3219 __ Move(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
3220 } else {
3221 __ Move(g.ToSimd128Register(destination), g.ToSimd128Register(source));
3222 }
3223 return;
3224 case MoveType::kRegisterToStack: {
3225 MemOperand dst = g.ToMemOperand(destination);
3226 if (source->IsRegister()) {
3227 __ str(g.ToRegister(source), dst);
3228 } else if (source->IsFloatRegister()) {
3229 // GapResolver may give us reg codes that don't map to actual
3230 // s-registers. Generate code to work around those cases.
3231 int src_code = LocationOperand::cast(source)->register_code();
3232 __ VmovExtended(dst, src_code);
3233 } else if (source->IsDoubleRegister()) {
3234 __ vstr(g.ToDoubleRegister(source), dst);
3235 } else {
3236 UseScratchRegisterScope temps(tasm());
3237 Register temp = temps.Acquire();
3238 QwNeonRegister src = g.ToSimd128Register(source);
3239 __ add(temp, dst.rn(), Operand(dst.offset()));
3240 __ vst1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp));
3241 }
3242 return;
3243 }
3244 case MoveType::kStackToRegister: {
3245 MemOperand src = g.ToMemOperand(source);
3246 if (source->IsStackSlot()) {
3247 __ ldr(g.ToRegister(destination), src);
3248 } else if (source->IsFloatStackSlot()) {
3249 DCHECK(destination->IsFloatRegister());
3250 // GapResolver may give us reg codes that don't map to actual
3251 // s-registers. Generate code to work around those cases.
3252 int dst_code = LocationOperand::cast(destination)->register_code();
3253 __ VmovExtended(dst_code, src);
3254 } else if (source->IsDoubleStackSlot()) {
3255 __ vldr(g.ToDoubleRegister(destination), src);
3256 } else {
3257 UseScratchRegisterScope temps(tasm());
3258 Register temp = temps.Acquire();
3259 QwNeonRegister dst = g.ToSimd128Register(destination);
3260 __ add(temp, src.rn(), Operand(src.offset()));
3261 __ vld1(Neon8, NeonListOperand(dst.low(), 2), NeonMemOperand(temp));
3262 }
3263 return;
3264 }
3265 case MoveType::kStackToStack: {
3266 MemOperand src = g.ToMemOperand(source);
3267 MemOperand dst = g.ToMemOperand(destination);
3268 UseScratchRegisterScope temps(tasm());
3269 if (source->IsStackSlot() || source->IsFloatStackSlot()) {
3270 SwVfpRegister temp = temps.AcquireS();
3271 __ vldr(temp, src);
3272 __ vstr(temp, dst);
3273 } else if (source->IsDoubleStackSlot()) {
3274 DwVfpRegister temp = temps.AcquireD();
3275 __ vldr(temp, src);
3276 __ vstr(temp, dst);
3277 } else {
3278 DCHECK(source->IsSimd128StackSlot());
3279 Register temp = temps.Acquire();
3280 QwNeonRegister temp_q = temps.AcquireQ();
3281 __ add(temp, src.rn(), Operand(src.offset()));
3282 __ vld1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
3283 __ add(temp, dst.rn(), Operand(dst.offset()));
3284 __ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
3285 }
3286 return;
3287 }
3288 case MoveType::kConstantToRegister: {
3289 Constant src = g.ToConstant(source);
3290 if (destination->IsRegister()) {
3291 MoveConstantToRegister(g.ToRegister(destination), src);
3292 } else if (destination->IsFloatRegister()) {
3293 __ vmov(g.ToFloatRegister(destination),
3294 Float32::FromBits(src.ToFloat32AsInt()));
3295 } else {
3296 // TODO(arm): Look into optimizing this further if possible. Supporting
3297 // the NEON version of VMOV may help.
3298 __ vmov(g.ToDoubleRegister(destination), src.ToFloat64());
3299 }
3300 return;
3301 }
3302 case MoveType::kConstantToStack: {
3303 Constant src = g.ToConstant(source);
3304 MemOperand dst = g.ToMemOperand(destination);
3305 if (destination->IsStackSlot()) {
3306 UseScratchRegisterScope temps(tasm());
3307 // Acquire a S register instead of a general purpose register in case
3308 // `vstr` needs one to compute the address of `dst`.
3309 SwVfpRegister s_temp = temps.AcquireS();
3310 {
3311 // TODO(arm): This sequence could be optimized further if necessary by
3312 // writing the constant directly into `s_temp`.
3313 UseScratchRegisterScope temps(tasm());
3314 Register temp = temps.Acquire();
3315 MoveConstantToRegister(temp, src);
3316 __ vmov(s_temp, temp);
3317 }
3318 __ vstr(s_temp, dst);
3319 } else if (destination->IsFloatStackSlot()) {
3320 UseScratchRegisterScope temps(tasm());
3321 SwVfpRegister temp = temps.AcquireS();
3322 __ vmov(temp, Float32::FromBits(src.ToFloat32AsInt()));
3323 __ vstr(temp, dst);
3324 } else {
3325 DCHECK(destination->IsDoubleStackSlot());
3326 UseScratchRegisterScope temps(tasm());
3327 DwVfpRegister temp = temps.AcquireD();
3328 // TODO(arm): Look into optimizing this further if possible. Supporting
3329 // the NEON version of VMOV may help.
3330 __ vmov(temp, src.ToFloat64());
3331 __ vstr(temp, g.ToMemOperand(destination));
3332 }
3333 return;
3334 }
3335 }
3336 UNREACHABLE();
3337 }
3338
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)3339 void CodeGenerator::AssembleSwap(InstructionOperand* source,
3340 InstructionOperand* destination) {
3341 ArmOperandConverter g(this, nullptr);
3342 switch (MoveType::InferSwap(source, destination)) {
3343 case MoveType::kRegisterToRegister:
3344 if (source->IsRegister()) {
3345 __ Swap(g.ToRegister(source), g.ToRegister(destination));
3346 } else if (source->IsFloatRegister()) {
3347 DCHECK(destination->IsFloatRegister());
3348 // GapResolver may give us reg codes that don't map to actual
3349 // s-registers. Generate code to work around those cases.
3350 UseScratchRegisterScope temps(tasm());
3351 LowDwVfpRegister temp = temps.AcquireLowD();
3352 int src_code = LocationOperand::cast(source)->register_code();
3353 int dst_code = LocationOperand::cast(destination)->register_code();
3354 __ VmovExtended(temp.low().code(), src_code);
3355 __ VmovExtended(src_code, dst_code);
3356 __ VmovExtended(dst_code, temp.low().code());
3357 } else if (source->IsDoubleRegister()) {
3358 __ Swap(g.ToDoubleRegister(source), g.ToDoubleRegister(destination));
3359 } else {
3360 __ Swap(g.ToSimd128Register(source), g.ToSimd128Register(destination));
3361 }
3362 return;
3363 case MoveType::kRegisterToStack: {
3364 MemOperand dst = g.ToMemOperand(destination);
3365 if (source->IsRegister()) {
3366 Register src = g.ToRegister(source);
3367 UseScratchRegisterScope temps(tasm());
3368 SwVfpRegister temp = temps.AcquireS();
3369 __ vmov(temp, src);
3370 __ ldr(src, dst);
3371 __ vstr(temp, dst);
3372 } else if (source->IsFloatRegister()) {
3373 int src_code = LocationOperand::cast(source)->register_code();
3374 UseScratchRegisterScope temps(tasm());
3375 LowDwVfpRegister temp = temps.AcquireLowD();
3376 __ VmovExtended(temp.low().code(), src_code);
3377 __ VmovExtended(src_code, dst);
3378 __ vstr(temp.low(), dst);
3379 } else if (source->IsDoubleRegister()) {
3380 UseScratchRegisterScope temps(tasm());
3381 DwVfpRegister temp = temps.AcquireD();
3382 DwVfpRegister src = g.ToDoubleRegister(source);
3383 __ Move(temp, src);
3384 __ vldr(src, dst);
3385 __ vstr(temp, dst);
3386 } else {
3387 QwNeonRegister src = g.ToSimd128Register(source);
3388 UseScratchRegisterScope temps(tasm());
3389 Register temp = temps.Acquire();
3390 QwNeonRegister temp_q = temps.AcquireQ();
3391 __ Move(temp_q, src);
3392 __ add(temp, dst.rn(), Operand(dst.offset()));
3393 __ vld1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp));
3394 __ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
3395 }
3396 return;
3397 }
3398 case MoveType::kStackToStack: {
3399 MemOperand src = g.ToMemOperand(source);
3400 MemOperand dst = g.ToMemOperand(destination);
3401 if (source->IsStackSlot() || source->IsFloatStackSlot()) {
3402 UseScratchRegisterScope temps(tasm());
3403 SwVfpRegister temp_0 = temps.AcquireS();
3404 SwVfpRegister temp_1 = temps.AcquireS();
3405 __ vldr(temp_0, dst);
3406 __ vldr(temp_1, src);
3407 __ vstr(temp_0, src);
3408 __ vstr(temp_1, dst);
3409 } else if (source->IsDoubleStackSlot()) {
3410 UseScratchRegisterScope temps(tasm());
3411 LowDwVfpRegister temp = temps.AcquireLowD();
3412 if (temps.CanAcquireD()) {
3413 DwVfpRegister temp_0 = temp;
3414 DwVfpRegister temp_1 = temps.AcquireD();
3415 __ vldr(temp_0, dst);
3416 __ vldr(temp_1, src);
3417 __ vstr(temp_0, src);
3418 __ vstr(temp_1, dst);
3419 } else {
3420 // We only have a single D register available. However, we can split
3421 // it into 2 S registers and swap the slots 32 bits at a time.
3422 MemOperand src0 = src;
3423 MemOperand dst0 = dst;
3424 MemOperand src1(src.rn(), src.offset() + kFloatSize);
3425 MemOperand dst1(dst.rn(), dst.offset() + kFloatSize);
3426 SwVfpRegister temp_0 = temp.low();
3427 SwVfpRegister temp_1 = temp.high();
3428 __ vldr(temp_0, dst0);
3429 __ vldr(temp_1, src0);
3430 __ vstr(temp_0, src0);
3431 __ vstr(temp_1, dst0);
3432 __ vldr(temp_0, dst1);
3433 __ vldr(temp_1, src1);
3434 __ vstr(temp_0, src1);
3435 __ vstr(temp_1, dst1);
3436 }
3437 } else {
3438 DCHECK(source->IsSimd128StackSlot());
3439 MemOperand src0 = src;
3440 MemOperand dst0 = dst;
3441 MemOperand src1(src.rn(), src.offset() + kDoubleSize);
3442 MemOperand dst1(dst.rn(), dst.offset() + kDoubleSize);
3443 UseScratchRegisterScope temps(tasm());
3444 DwVfpRegister temp_0 = temps.AcquireD();
3445 DwVfpRegister temp_1 = temps.AcquireD();
3446 __ vldr(temp_0, dst0);
3447 __ vldr(temp_1, src0);
3448 __ vstr(temp_0, src0);
3449 __ vstr(temp_1, dst0);
3450 __ vldr(temp_0, dst1);
3451 __ vldr(temp_1, src1);
3452 __ vstr(temp_0, src1);
3453 __ vstr(temp_1, dst1);
3454 }
3455 return;
3456 }
3457 default:
3458 UNREACHABLE();
3459 break;
3460 }
3461 }
3462
AssembleJumpTable(Label ** targets,size_t target_count)3463 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
3464 // On 32-bit ARM we emit the jump tables inline.
3465 UNREACHABLE();
3466 }
3467
3468 #undef __
3469
3470 } // namespace compiler
3471 } // namespace internal
3472 } // namespace v8
3473