• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/compiler/code-generator.h"
6 
7 #include "src/arm/macro-assembler-arm.h"
8 #include "src/assembler-inl.h"
9 #include "src/boxed-float.h"
10 #include "src/compiler/code-generator-impl.h"
11 #include "src/compiler/gap-resolver.h"
12 #include "src/compiler/node-matchers.h"
13 #include "src/compiler/osr.h"
14 #include "src/double.h"
15 #include "src/heap/heap-inl.h"
16 #include "src/optimized-compilation-info.h"
17 #include "src/wasm/wasm-objects.h"
18 
19 namespace v8 {
20 namespace internal {
21 namespace compiler {
22 
23 #define __ tasm()->
24 
25 // Adds Arm-specific methods to convert InstructionOperands.
26 class ArmOperandConverter final : public InstructionOperandConverter {
27  public:
ArmOperandConverter(CodeGenerator * gen,Instruction * instr)28   ArmOperandConverter(CodeGenerator* gen, Instruction* instr)
29       : InstructionOperandConverter(gen, instr) {}
30 
OutputSBit() const31   SBit OutputSBit() const {
32     switch (instr_->flags_mode()) {
33       case kFlags_branch:
34       case kFlags_branch_and_poison:
35       case kFlags_deoptimize:
36       case kFlags_deoptimize_and_poison:
37       case kFlags_set:
38       case kFlags_trap:
39         return SetCC;
40       case kFlags_none:
41         return LeaveCC;
42     }
43     UNREACHABLE();
44   }
45 
InputImmediate(size_t index)46   Operand InputImmediate(size_t index) {
47     return ToImmediate(instr_->InputAt(index));
48   }
49 
InputOperand2(size_t first_index)50   Operand InputOperand2(size_t first_index) {
51     const size_t index = first_index;
52     switch (AddressingModeField::decode(instr_->opcode())) {
53       case kMode_None:
54       case kMode_Offset_RI:
55       case kMode_Offset_RR:
56         break;
57       case kMode_Operand2_I:
58         return InputImmediate(index + 0);
59       case kMode_Operand2_R:
60         return Operand(InputRegister(index + 0));
61       case kMode_Operand2_R_ASR_I:
62         return Operand(InputRegister(index + 0), ASR, InputInt5(index + 1));
63       case kMode_Operand2_R_ASR_R:
64         return Operand(InputRegister(index + 0), ASR, InputRegister(index + 1));
65       case kMode_Operand2_R_LSL_I:
66         return Operand(InputRegister(index + 0), LSL, InputInt5(index + 1));
67       case kMode_Operand2_R_LSL_R:
68         return Operand(InputRegister(index + 0), LSL, InputRegister(index + 1));
69       case kMode_Operand2_R_LSR_I:
70         return Operand(InputRegister(index + 0), LSR, InputInt5(index + 1));
71       case kMode_Operand2_R_LSR_R:
72         return Operand(InputRegister(index + 0), LSR, InputRegister(index + 1));
73       case kMode_Operand2_R_ROR_I:
74         return Operand(InputRegister(index + 0), ROR, InputInt5(index + 1));
75       case kMode_Operand2_R_ROR_R:
76         return Operand(InputRegister(index + 0), ROR, InputRegister(index + 1));
77     }
78     UNREACHABLE();
79   }
80 
InputOffset(size_t * first_index)81   MemOperand InputOffset(size_t* first_index) {
82     const size_t index = *first_index;
83     switch (AddressingModeField::decode(instr_->opcode())) {
84       case kMode_None:
85       case kMode_Operand2_I:
86       case kMode_Operand2_R:
87       case kMode_Operand2_R_ASR_I:
88       case kMode_Operand2_R_ASR_R:
89       case kMode_Operand2_R_LSL_R:
90       case kMode_Operand2_R_LSR_I:
91       case kMode_Operand2_R_LSR_R:
92       case kMode_Operand2_R_ROR_I:
93       case kMode_Operand2_R_ROR_R:
94         break;
95       case kMode_Operand2_R_LSL_I:
96         *first_index += 3;
97         return MemOperand(InputRegister(index + 0), InputRegister(index + 1),
98                           LSL, InputInt32(index + 2));
99       case kMode_Offset_RI:
100         *first_index += 2;
101         return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
102       case kMode_Offset_RR:
103         *first_index += 2;
104         return MemOperand(InputRegister(index + 0), InputRegister(index + 1));
105     }
106     UNREACHABLE();
107   }
108 
InputOffset(size_t first_index=0)109   MemOperand InputOffset(size_t first_index = 0) {
110     return InputOffset(&first_index);
111   }
112 
ToImmediate(InstructionOperand * operand)113   Operand ToImmediate(InstructionOperand* operand) {
114     Constant constant = ToConstant(operand);
115     switch (constant.type()) {
116       case Constant::kInt32:
117         if (RelocInfo::IsWasmReference(constant.rmode())) {
118           return Operand(constant.ToInt32(), constant.rmode());
119         } else {
120           return Operand(constant.ToInt32());
121         }
122       case Constant::kFloat32:
123         return Operand::EmbeddedNumber(constant.ToFloat32());
124       case Constant::kFloat64:
125         return Operand::EmbeddedNumber(constant.ToFloat64().value());
126       case Constant::kExternalReference:
127         return Operand(constant.ToExternalReference());
128       case Constant::kInt64:
129       case Constant::kHeapObject:
130       // TODO(dcarney): loading RPO constants on arm.
131       case Constant::kRpoNumber:
132         break;
133     }
134     UNREACHABLE();
135   }
136 
ToMemOperand(InstructionOperand * op) const137   MemOperand ToMemOperand(InstructionOperand* op) const {
138     DCHECK_NOT_NULL(op);
139     DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
140     return SlotToMemOperand(AllocatedOperand::cast(op)->index());
141   }
142 
SlotToMemOperand(int slot) const143   MemOperand SlotToMemOperand(int slot) const {
144     FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
145     return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
146   }
147 
NeonInputOperand(size_t first_index)148   NeonMemOperand NeonInputOperand(size_t first_index) {
149     const size_t index = first_index;
150     switch (AddressingModeField::decode(instr_->opcode())) {
151       case kMode_Offset_RR:
152         return NeonMemOperand(InputRegister(index + 0),
153                               InputRegister(index + 1));
154       case kMode_Operand2_R:
155         return NeonMemOperand(InputRegister(index + 0));
156       default:
157         break;
158     }
159     UNREACHABLE();
160   }
161 };
162 
163 namespace {
164 
165 class OutOfLineRecordWrite final : public OutOfLineCode {
166  public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Register index,Register value,Register scratch0,Register scratch1,RecordWriteMode mode,UnwindingInfoWriter * unwinding_info_writer)167   OutOfLineRecordWrite(CodeGenerator* gen, Register object, Register index,
168                        Register value, Register scratch0, Register scratch1,
169                        RecordWriteMode mode,
170                        UnwindingInfoWriter* unwinding_info_writer)
171       : OutOfLineCode(gen),
172         object_(object),
173         index_(index),
174         index_immediate_(0),
175         value_(value),
176         scratch0_(scratch0),
177         scratch1_(scratch1),
178         mode_(mode),
179         must_save_lr_(!gen->frame_access_state()->has_frame()),
180         unwinding_info_writer_(unwinding_info_writer),
181         zone_(gen->zone()) {}
182 
OutOfLineRecordWrite(CodeGenerator * gen,Register object,int32_t index,Register value,Register scratch0,Register scratch1,RecordWriteMode mode,UnwindingInfoWriter * unwinding_info_writer)183   OutOfLineRecordWrite(CodeGenerator* gen, Register object, int32_t index,
184                        Register value, Register scratch0, Register scratch1,
185                        RecordWriteMode mode,
186                        UnwindingInfoWriter* unwinding_info_writer)
187       : OutOfLineCode(gen),
188         object_(object),
189         index_(no_reg),
190         index_immediate_(index),
191         value_(value),
192         scratch0_(scratch0),
193         scratch1_(scratch1),
194         mode_(mode),
195         must_save_lr_(!gen->frame_access_state()->has_frame()),
196         unwinding_info_writer_(unwinding_info_writer),
197         zone_(gen->zone()) {}
198 
Generate()199   void Generate() final {
200     if (mode_ > RecordWriteMode::kValueIsPointer) {
201       __ JumpIfSmi(value_, exit());
202     }
203     __ CheckPageFlag(value_, scratch0_,
204                      MemoryChunk::kPointersToHereAreInterestingMask, eq,
205                      exit());
206     if (index_ == no_reg) {
207       __ add(scratch1_, object_, Operand(index_immediate_));
208     } else {
209       DCHECK_EQ(0, index_immediate_);
210       __ add(scratch1_, object_, Operand(index_));
211     }
212     RememberedSetAction const remembered_set_action =
213         mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
214                                              : OMIT_REMEMBERED_SET;
215     SaveFPRegsMode const save_fp_mode =
216         frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
217     if (must_save_lr_) {
218       // We need to save and restore lr if the frame was elided.
219       __ Push(lr);
220       unwinding_info_writer_->MarkLinkRegisterOnTopOfStack(__ pc_offset());
221     }
222     __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
223                            save_fp_mode);
224     if (must_save_lr_) {
225       __ Pop(lr);
226       unwinding_info_writer_->MarkPopLinkRegisterFromTopOfStack(__ pc_offset());
227     }
228   }
229 
230  private:
231   Register const object_;
232   Register const index_;
233   int32_t const index_immediate_;  // Valid if index_==no_reg.
234   Register const value_;
235   Register const scratch0_;
236   Register const scratch1_;
237   RecordWriteMode const mode_;
238   bool must_save_lr_;
239   UnwindingInfoWriter* const unwinding_info_writer_;
240   Zone* zone_;
241 };
242 
243 template <typename T>
244 class OutOfLineFloatMin final : public OutOfLineCode {
245  public:
OutOfLineFloatMin(CodeGenerator * gen,T result,T left,T right)246   OutOfLineFloatMin(CodeGenerator* gen, T result, T left, T right)
247       : OutOfLineCode(gen), result_(result), left_(left), right_(right) {}
248 
Generate()249   void Generate() final { __ FloatMinOutOfLine(result_, left_, right_); }
250 
251  private:
252   T const result_;
253   T const left_;
254   T const right_;
255 };
256 typedef OutOfLineFloatMin<SwVfpRegister> OutOfLineFloat32Min;
257 typedef OutOfLineFloatMin<DwVfpRegister> OutOfLineFloat64Min;
258 
259 template <typename T>
260 class OutOfLineFloatMax final : public OutOfLineCode {
261  public:
OutOfLineFloatMax(CodeGenerator * gen,T result,T left,T right)262   OutOfLineFloatMax(CodeGenerator* gen, T result, T left, T right)
263       : OutOfLineCode(gen), result_(result), left_(left), right_(right) {}
264 
Generate()265   void Generate() final { __ FloatMaxOutOfLine(result_, left_, right_); }
266 
267  private:
268   T const result_;
269   T const left_;
270   T const right_;
271 };
272 typedef OutOfLineFloatMax<SwVfpRegister> OutOfLineFloat32Max;
273 typedef OutOfLineFloatMax<DwVfpRegister> OutOfLineFloat64Max;
274 
FlagsConditionToCondition(FlagsCondition condition)275 Condition FlagsConditionToCondition(FlagsCondition condition) {
276   switch (condition) {
277     case kEqual:
278       return eq;
279     case kNotEqual:
280       return ne;
281     case kSignedLessThan:
282       return lt;
283     case kSignedGreaterThanOrEqual:
284       return ge;
285     case kSignedLessThanOrEqual:
286       return le;
287     case kSignedGreaterThan:
288       return gt;
289     case kUnsignedLessThan:
290       return lo;
291     case kUnsignedGreaterThanOrEqual:
292       return hs;
293     case kUnsignedLessThanOrEqual:
294       return ls;
295     case kUnsignedGreaterThan:
296       return hi;
297     case kFloatLessThanOrUnordered:
298       return lt;
299     case kFloatGreaterThanOrEqual:
300       return ge;
301     case kFloatLessThanOrEqual:
302       return ls;
303     case kFloatGreaterThanOrUnordered:
304       return hi;
305     case kFloatLessThan:
306       return lo;
307     case kFloatGreaterThanOrEqualOrUnordered:
308       return hs;
309     case kFloatLessThanOrEqualOrUnordered:
310       return le;
311     case kFloatGreaterThan:
312       return gt;
313     case kOverflow:
314       return vs;
315     case kNotOverflow:
316       return vc;
317     case kPositiveOrZero:
318       return pl;
319     case kNegative:
320       return mi;
321     default:
322       break;
323   }
324   UNREACHABLE();
325 }
326 
EmitWordLoadPoisoningIfNeeded(CodeGenerator * codegen,InstructionCode opcode,ArmOperandConverter & i)327 void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
328                                    InstructionCode opcode,
329                                    ArmOperandConverter& i) {
330   const MemoryAccessMode access_mode =
331       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
332   if (access_mode == kMemoryAccessPoisoned) {
333     Register value = i.OutputRegister();
334     codegen->tasm()->and_(value, value, Operand(kSpeculationPoisonRegister));
335   }
336 }
337 
ComputePoisonedAddressForLoad(CodeGenerator * codegen,InstructionCode opcode,ArmOperandConverter & i,Register address)338 void ComputePoisonedAddressForLoad(CodeGenerator* codegen,
339                                    InstructionCode opcode,
340                                    ArmOperandConverter& i, Register address) {
341   DCHECK_EQ(kMemoryAccessPoisoned,
342             static_cast<MemoryAccessMode>(MiscField::decode(opcode)));
343   switch (AddressingModeField::decode(opcode)) {
344     case kMode_Offset_RI:
345       codegen->tasm()->mov(address, i.InputImmediate(1));
346       codegen->tasm()->add(address, address, i.InputRegister(0));
347       break;
348     case kMode_Offset_RR:
349       codegen->tasm()->add(address, i.InputRegister(0), i.InputRegister(1));
350       break;
351     default:
352       UNREACHABLE();
353   }
354   codegen->tasm()->and_(address, address, Operand(kSpeculationPoisonRegister));
355 }
356 
357 }  // namespace
358 
359 #define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr)                       \
360   do {                                                                \
361     __ asm_instr(i.OutputRegister(),                                  \
362                  MemOperand(i.InputRegister(0), i.InputRegister(1))); \
363     __ dmb(ISH);                                                      \
364   } while (0)
365 
366 #define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr)                      \
367   do {                                                                \
368     __ dmb(ISH);                                                      \
369     __ asm_instr(i.InputRegister(2),                                  \
370                  MemOperand(i.InputRegister(0), i.InputRegister(1))); \
371     __ dmb(ISH);                                                      \
372   } while (0)
373 
374 #define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_instr, store_instr)             \
375   do {                                                                        \
376     Label exchange;                                                           \
377     __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));        \
378     __ dmb(ISH);                                                              \
379     __ bind(&exchange);                                                       \
380     __ load_instr(i.OutputRegister(0), i.TempRegister(1));                    \
381     __ store_instr(i.TempRegister(0), i.InputRegister(2), i.TempRegister(1)); \
382     __ teq(i.TempRegister(0), Operand(0));                                    \
383     __ b(ne, &exchange);                                                      \
384     __ dmb(ISH);                                                              \
385   } while (0)
386 
387 #define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_instr, store_instr,     \
388                                                  cmp_reg)                     \
389   do {                                                                        \
390     Label compareExchange;                                                    \
391     Label exit;                                                               \
392     __ dmb(ISH);                                                              \
393     __ bind(&compareExchange);                                                \
394     __ load_instr(i.OutputRegister(0), i.TempRegister(1));                    \
395     __ teq(cmp_reg, Operand(i.OutputRegister(0)));                            \
396     __ b(ne, &exit);                                                          \
397     __ store_instr(i.TempRegister(0), i.InputRegister(3), i.TempRegister(1)); \
398     __ teq(i.TempRegister(0), Operand(0));                                    \
399     __ b(ne, &compareExchange);                                               \
400     __ bind(&exit);                                                           \
401     __ dmb(ISH);                                                              \
402   } while (0)
403 
404 #define ASSEMBLE_ATOMIC_BINOP(load_instr, store_instr, bin_instr)            \
405   do {                                                                       \
406     Label binop;                                                             \
407     __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));       \
408     __ dmb(ISH);                                                             \
409     __ bind(&binop);                                                         \
410     __ load_instr(i.OutputRegister(0), i.TempRegister(1));                   \
411     __ bin_instr(i.TempRegister(0), i.OutputRegister(0),                     \
412                  Operand(i.InputRegister(2)));                               \
413     __ store_instr(i.TempRegister(2), i.TempRegister(0), i.TempRegister(1)); \
414     __ teq(i.TempRegister(2), Operand(0));                                   \
415     __ b(ne, &binop);                                                        \
416     __ dmb(ISH);                                                             \
417   } while (0)
418 
419 #define ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2)                       \
420   do {                                                                      \
421     Label binop;                                                            \
422     __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3));      \
423     __ dmb(ISH);                                                            \
424     __ bind(&binop);                                                        \
425     __ ldrexd(i.OutputRegister(0), i.OutputRegister(1), i.TempRegister(0)); \
426     __ instr1(i.TempRegister(1), i.OutputRegister(0), i.InputRegister(0),   \
427               SBit::SetCC);                                                 \
428     __ instr2(i.TempRegister(2), i.OutputRegister(1),                       \
429               Operand(i.InputRegister(1)));                                 \
430     DCHECK_EQ(LeaveCC, i.OutputSBit());                                     \
431     __ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2),      \
432               i.TempRegister(0));                                           \
433     __ teq(i.TempRegister(3), Operand(0));                                  \
434     __ b(ne, &binop);                                                       \
435     __ dmb(ISH);                                                            \
436   } while (0)
437 
438 #define ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr)                                \
439   do {                                                                      \
440     Label binop;                                                            \
441     __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3));      \
442     __ dmb(ISH);                                                            \
443     __ bind(&binop);                                                        \
444     __ ldrexd(i.OutputRegister(0), i.OutputRegister(1), i.TempRegister(0)); \
445     __ instr(i.TempRegister(1), i.OutputRegister(0),                        \
446              Operand(i.InputRegister(0)));                                  \
447     __ instr(i.TempRegister(2), i.OutputRegister(1),                        \
448              Operand(i.InputRegister(1)));                                  \
449     __ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2),      \
450               i.TempRegister(0));                                           \
451     __ teq(i.TempRegister(3), Operand(0));                                  \
452     __ b(ne, &binop);                                                       \
453     __ dmb(ISH);                                                            \
454   } while (0)
455 
456 #define ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(op)       \
457   if (arch_opcode == kArmWord64AtomicNarrow##op) { \
458     __ mov(i.OutputRegister(1), Operand(0));       \
459   }
460 
461 #define ASSEMBLE_IEEE754_BINOP(name)                                           \
462   do {                                                                         \
463     /* TODO(bmeurer): We should really get rid of this special instruction, */ \
464     /* and generate a CallAddress instruction instead. */                      \
465     FrameScope scope(tasm(), StackFrame::MANUAL);                              \
466     __ PrepareCallCFunction(0, 2);                                             \
467     __ MovToFloatParameters(i.InputDoubleRegister(0),                          \
468                             i.InputDoubleRegister(1));                         \
469     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2);    \
470     /* Move the result in the double result register. */                       \
471     __ MovFromFloatResult(i.OutputDoubleRegister());                           \
472     DCHECK_EQ(LeaveCC, i.OutputSBit());                                        \
473   } while (0)
474 
475 #define ASSEMBLE_IEEE754_UNOP(name)                                            \
476   do {                                                                         \
477     /* TODO(bmeurer): We should really get rid of this special instruction, */ \
478     /* and generate a CallAddress instruction instead. */                      \
479     FrameScope scope(tasm(), StackFrame::MANUAL);                              \
480     __ PrepareCallCFunction(0, 1);                                             \
481     __ MovToFloatParameter(i.InputDoubleRegister(0));                          \
482     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1);    \
483     /* Move the result in the double result register. */                       \
484     __ MovFromFloatResult(i.OutputDoubleRegister());                           \
485     DCHECK_EQ(LeaveCC, i.OutputSBit());                                        \
486   } while (0)
487 
488 #define ASSEMBLE_NEON_NARROWING_OP(dt)                \
489   do {                                                \
490     Simd128Register dst = i.OutputSimd128Register(),  \
491                     src0 = i.InputSimd128Register(0), \
492                     src1 = i.InputSimd128Register(1); \
493     if (dst == src0 && dst == src1) {                 \
494       __ vqmovn(dt, dst.low(), src0);                 \
495       __ vmov(dst.high(), dst.low());                 \
496     } else if (dst == src0) {                         \
497       __ vqmovn(dt, dst.low(), src0);                 \
498       __ vqmovn(dt, dst.high(), src1);                \
499     } else {                                          \
500       __ vqmovn(dt, dst.high(), src1);                \
501       __ vqmovn(dt, dst.low(), src0);                 \
502     }                                                 \
503   } while (0)
504 
505 #define ASSEMBLE_NEON_PAIRWISE_OP(op, size)               \
506   do {                                                    \
507     Simd128Register dst = i.OutputSimd128Register(),      \
508                     src0 = i.InputSimd128Register(0),     \
509                     src1 = i.InputSimd128Register(1);     \
510     if (dst == src0) {                                    \
511       __ op(size, dst.low(), src0.low(), src0.high());    \
512       if (dst == src1) {                                  \
513         __ vmov(dst.high(), dst.low());                   \
514       } else {                                            \
515         __ op(size, dst.high(), src1.low(), src1.high()); \
516       }                                                   \
517     } else {                                              \
518       __ op(size, dst.high(), src1.low(), src1.high());   \
519       __ op(size, dst.low(), src0.low(), src0.high());    \
520     }                                                     \
521   } while (0)
522 
AssembleDeconstructFrame()523 void CodeGenerator::AssembleDeconstructFrame() {
524   __ LeaveFrame(StackFrame::MANUAL);
525   unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
526 }
527 
AssemblePrepareTailCall()528 void CodeGenerator::AssemblePrepareTailCall() {
529   if (frame_access_state()->has_frame()) {
530     __ ldr(lr, MemOperand(fp, StandardFrameConstants::kCallerPCOffset));
531     __ ldr(fp, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
532   }
533   frame_access_state()->SetFrameAccessToSP();
534 }
535 
AssemblePopArgumentsAdaptorFrame(Register args_reg,Register scratch1,Register scratch2,Register scratch3)536 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
537                                                      Register scratch1,
538                                                      Register scratch2,
539                                                      Register scratch3) {
540   DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
541   Label done;
542 
543   // Check if current frame is an arguments adaptor frame.
544   __ ldr(scratch1, MemOperand(fp, StandardFrameConstants::kContextOffset));
545   __ cmp(scratch1,
546          Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
547   __ b(ne, &done);
548 
549   // Load arguments count from current arguments adaptor frame (note, it
550   // does not include receiver).
551   Register caller_args_count_reg = scratch1;
552   __ ldr(caller_args_count_reg,
553          MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset));
554   __ SmiUntag(caller_args_count_reg);
555 
556   ParameterCount callee_args_count(args_reg);
557   __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
558                         scratch3);
559   __ bind(&done);
560 }
561 
562 namespace {
563 
FlushPendingPushRegisters(TurboAssembler * tasm,FrameAccessState * frame_access_state,ZoneVector<Register> * pending_pushes)564 void FlushPendingPushRegisters(TurboAssembler* tasm,
565                                FrameAccessState* frame_access_state,
566                                ZoneVector<Register>* pending_pushes) {
567   switch (pending_pushes->size()) {
568     case 0:
569       break;
570     case 1:
571       tasm->push((*pending_pushes)[0]);
572       break;
573     case 2:
574       tasm->Push((*pending_pushes)[0], (*pending_pushes)[1]);
575       break;
576     case 3:
577       tasm->Push((*pending_pushes)[0], (*pending_pushes)[1],
578                  (*pending_pushes)[2]);
579       break;
580     default:
581       UNREACHABLE();
582       break;
583   }
584   frame_access_state->IncreaseSPDelta(pending_pushes->size());
585   pending_pushes->clear();
586 }
587 
AdjustStackPointerForTailCall(TurboAssembler * tasm,FrameAccessState * state,int new_slot_above_sp,ZoneVector<Register> * pending_pushes=nullptr,bool allow_shrinkage=true)588 void AdjustStackPointerForTailCall(
589     TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp,
590     ZoneVector<Register>* pending_pushes = nullptr,
591     bool allow_shrinkage = true) {
592   int current_sp_offset = state->GetSPToFPSlotCount() +
593                           StandardFrameConstants::kFixedSlotCountAboveFp;
594   int stack_slot_delta = new_slot_above_sp - current_sp_offset;
595   if (stack_slot_delta > 0) {
596     if (pending_pushes != nullptr) {
597       FlushPendingPushRegisters(tasm, state, pending_pushes);
598     }
599     tasm->sub(sp, sp, Operand(stack_slot_delta * kPointerSize));
600     state->IncreaseSPDelta(stack_slot_delta);
601   } else if (allow_shrinkage && stack_slot_delta < 0) {
602     if (pending_pushes != nullptr) {
603       FlushPendingPushRegisters(tasm, state, pending_pushes);
604     }
605     tasm->add(sp, sp, Operand(-stack_slot_delta * kPointerSize));
606     state->IncreaseSPDelta(stack_slot_delta);
607   }
608 }
609 
610 }  // namespace
611 
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_stack_slot)612 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
613                                               int first_unused_stack_slot) {
614   ZoneVector<MoveOperands*> pushes(zone());
615   GetPushCompatibleMoves(instr, kRegisterPush, &pushes);
616 
617   if (!pushes.empty() &&
618       (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
619        first_unused_stack_slot)) {
620     ArmOperandConverter g(this, instr);
621     ZoneVector<Register> pending_pushes(zone());
622     for (auto move : pushes) {
623       LocationOperand destination_location(
624           LocationOperand::cast(move->destination()));
625       InstructionOperand source(move->source());
626       AdjustStackPointerForTailCall(
627           tasm(), frame_access_state(),
628           destination_location.index() - pending_pushes.size(),
629           &pending_pushes);
630       // Pushes of non-register data types are not supported.
631       DCHECK(source.IsRegister());
632       LocationOperand source_location(LocationOperand::cast(source));
633       pending_pushes.push_back(source_location.GetRegister());
634       // TODO(arm): We can push more than 3 registers at once. Add support in
635       // the macro-assembler for pushing a list of registers.
636       if (pending_pushes.size() == 3) {
637         FlushPendingPushRegisters(tasm(), frame_access_state(),
638                                   &pending_pushes);
639       }
640       move->Eliminate();
641     }
642     FlushPendingPushRegisters(tasm(), frame_access_state(), &pending_pushes);
643   }
644   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
645                                 first_unused_stack_slot, nullptr, false);
646 }
647 
AssembleTailCallAfterGap(Instruction * instr,int first_unused_stack_slot)648 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
649                                              int first_unused_stack_slot) {
650   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
651                                 first_unused_stack_slot);
652 }
653 
654 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()655 void CodeGenerator::AssembleCodeStartRegisterCheck() {
656   UseScratchRegisterScope temps(tasm());
657   Register scratch = temps.Acquire();
658   __ ComputeCodeStartAddress(scratch);
659   __ cmp(scratch, kJavaScriptCallCodeStartRegister);
660   __ Assert(eq, AbortReason::kWrongFunctionCodeStart);
661 }
662 
663 // Check if the code object is marked for deoptimization. If it is, then it
664 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
665 // to:
666 //    1. read from memory the word that contains that bit, which can be found in
667 //       the flags in the referenced {CodeDataContainer} object;
668 //    2. test kMarkedForDeoptimizationBit in those flags; and
669 //    3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()670 void CodeGenerator::BailoutIfDeoptimized() {
671   UseScratchRegisterScope temps(tasm());
672   Register scratch = temps.Acquire();
673   int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
674   __ ldr(scratch, MemOperand(kJavaScriptCallCodeStartRegister, offset));
675   __ ldr(scratch,
676          FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset));
677   __ tst(scratch, Operand(1 << Code::kMarkedForDeoptimizationBit));
678   // Ensure we're not serializing (otherwise we'd need to use an indirection to
679   // access the builtin below).
680   DCHECK(!isolate()->ShouldLoadConstantsFromRootList());
681   Handle<Code> code = isolate()->builtins()->builtin_handle(
682       Builtins::kCompileLazyDeoptimizedCode);
683   __ Jump(code, RelocInfo::CODE_TARGET, ne);
684 }
685 
GenerateSpeculationPoisonFromCodeStartRegister()686 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
687   UseScratchRegisterScope temps(tasm());
688   Register scratch = temps.Acquire();
689 
690   // Set a mask which has all bits set in the normal case, but has all
691   // bits cleared if we are speculatively executing the wrong PC.
692   __ ComputeCodeStartAddress(scratch);
693   __ cmp(kJavaScriptCallCodeStartRegister, scratch);
694   __ mov(kSpeculationPoisonRegister, Operand(-1), SBit::LeaveCC, eq);
695   __ mov(kSpeculationPoisonRegister, Operand(0), SBit::LeaveCC, ne);
696   __ csdb();
697 }
698 
AssembleRegisterArgumentPoisoning()699 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
700   __ and_(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister);
701   __ and_(kContextRegister, kContextRegister, kSpeculationPoisonRegister);
702   __ and_(sp, sp, kSpeculationPoisonRegister);
703 }
704 
705 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)706 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
707     Instruction* instr) {
708   ArmOperandConverter i(this, instr);
709 
710   __ MaybeCheckConstPool();
711   InstructionCode opcode = instr->opcode();
712   ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
713   switch (arch_opcode) {
714     case kArchCallCodeObject: {
715       if (instr->InputAt(0)->IsImmediate()) {
716         __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
717       } else {
718         Register reg = i.InputRegister(0);
719         DCHECK_IMPLIES(
720             HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
721             reg == kJavaScriptCallCodeStartRegister);
722         __ add(reg, reg, Operand(Code::kHeaderSize - kHeapObjectTag));
723         __ Call(reg);
724       }
725       RecordCallPosition(instr);
726       DCHECK_EQ(LeaveCC, i.OutputSBit());
727       frame_access_state()->ClearSPDelta();
728       break;
729     }
730     case kArchCallWasmFunction: {
731       if (instr->InputAt(0)->IsImmediate()) {
732         Constant constant = i.ToConstant(instr->InputAt(0));
733         Address wasm_code = static_cast<Address>(constant.ToInt32());
734         __ Call(wasm_code, constant.rmode());
735       } else {
736         __ Call(i.InputRegister(0));
737       }
738       RecordCallPosition(instr);
739       DCHECK_EQ(LeaveCC, i.OutputSBit());
740       frame_access_state()->ClearSPDelta();
741       break;
742     }
743     case kArchTailCallCodeObjectFromJSFunction:
744     case kArchTailCallCodeObject: {
745       if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
746         AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
747                                          i.TempRegister(0), i.TempRegister(1),
748                                          i.TempRegister(2));
749       }
750       if (instr->InputAt(0)->IsImmediate()) {
751         __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
752       } else {
753         Register reg = i.InputRegister(0);
754         DCHECK_IMPLIES(
755             HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
756             reg == kJavaScriptCallCodeStartRegister);
757         __ add(reg, reg, Operand(Code::kHeaderSize - kHeapObjectTag));
758         __ Jump(reg);
759       }
760       DCHECK_EQ(LeaveCC, i.OutputSBit());
761       unwinding_info_writer_.MarkBlockWillExit();
762       frame_access_state()->ClearSPDelta();
763       frame_access_state()->SetFrameAccessToDefault();
764       break;
765     }
766     case kArchTailCallWasm: {
767       if (instr->InputAt(0)->IsImmediate()) {
768         Constant constant = i.ToConstant(instr->InputAt(0));
769         Address wasm_code = static_cast<Address>(constant.ToInt32());
770         __ Jump(wasm_code, constant.rmode());
771       } else {
772         __ Jump(i.InputRegister(0));
773       }
774       DCHECK_EQ(LeaveCC, i.OutputSBit());
775       unwinding_info_writer_.MarkBlockWillExit();
776       frame_access_state()->ClearSPDelta();
777       frame_access_state()->SetFrameAccessToDefault();
778       break;
779     }
780     case kArchTailCallAddress: {
781       CHECK(!instr->InputAt(0)->IsImmediate());
782       Register reg = i.InputRegister(0);
783       DCHECK_IMPLIES(
784           HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
785           reg == kJavaScriptCallCodeStartRegister);
786       __ Jump(reg);
787       unwinding_info_writer_.MarkBlockWillExit();
788       frame_access_state()->ClearSPDelta();
789       frame_access_state()->SetFrameAccessToDefault();
790       break;
791     }
792     case kArchCallJSFunction: {
793       Register func = i.InputRegister(0);
794       if (FLAG_debug_code) {
795         UseScratchRegisterScope temps(tasm());
796         Register scratch = temps.Acquire();
797         // Check the function's context matches the context argument.
798         __ ldr(scratch, FieldMemOperand(func, JSFunction::kContextOffset));
799         __ cmp(cp, scratch);
800         __ Assert(eq, AbortReason::kWrongFunctionContext);
801       }
802       static_assert(kJavaScriptCallCodeStartRegister == r2, "ABI mismatch");
803       __ ldr(r2, FieldMemOperand(func, JSFunction::kCodeOffset));
804       __ add(r2, r2, Operand(Code::kHeaderSize - kHeapObjectTag));
805       __ Call(r2);
806       RecordCallPosition(instr);
807       DCHECK_EQ(LeaveCC, i.OutputSBit());
808       frame_access_state()->ClearSPDelta();
809       break;
810     }
811     case kArchPrepareCallCFunction: {
812       int const num_parameters = MiscField::decode(instr->opcode());
813       __ PrepareCallCFunction(num_parameters);
814       // Frame alignment requires using FP-relative frame addressing.
815       frame_access_state()->SetFrameAccessToFP();
816       break;
817     }
818     case kArchSaveCallerRegisters: {
819       fp_mode_ =
820           static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
821       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
822       // kReturnRegister0 should have been saved before entering the stub.
823       int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
824       DCHECK_EQ(0, bytes % kPointerSize);
825       DCHECK_EQ(0, frame_access_state()->sp_delta());
826       frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
827       DCHECK(!caller_registers_saved_);
828       caller_registers_saved_ = true;
829       break;
830     }
831     case kArchRestoreCallerRegisters: {
832       DCHECK(fp_mode_ ==
833              static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
834       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
835       // Don't overwrite the returned value.
836       int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
837       frame_access_state()->IncreaseSPDelta(-(bytes / kPointerSize));
838       DCHECK_EQ(0, frame_access_state()->sp_delta());
839       DCHECK(caller_registers_saved_);
840       caller_registers_saved_ = false;
841       break;
842     }
843     case kArchPrepareTailCall:
844       AssemblePrepareTailCall();
845       break;
846     case kArchCallCFunction: {
847       int const num_parameters = MiscField::decode(instr->opcode());
848       if (instr->InputAt(0)->IsImmediate()) {
849         ExternalReference ref = i.InputExternalReference(0);
850         __ CallCFunction(ref, num_parameters);
851       } else {
852         Register func = i.InputRegister(0);
853         __ CallCFunction(func, num_parameters);
854       }
855       frame_access_state()->SetFrameAccessToDefault();
856       // Ideally, we should decrement SP delta to match the change of stack
857       // pointer in CallCFunction. However, for certain architectures (e.g.
858       // ARM), there may be more strict alignment requirement, causing old SP
859       // to be saved on the stack. In those cases, we can not calculate the SP
860       // delta statically.
861       frame_access_state()->ClearSPDelta();
862       if (caller_registers_saved_) {
863         // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
864         // Here, we assume the sequence to be:
865         //   kArchSaveCallerRegisters;
866         //   kArchCallCFunction;
867         //   kArchRestoreCallerRegisters;
868         int bytes =
869             __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
870         frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
871       }
872       break;
873     }
874     case kArchJmp:
875       AssembleArchJump(i.InputRpo(0));
876       DCHECK_EQ(LeaveCC, i.OutputSBit());
877       break;
878     case kArchBinarySearchSwitch:
879       AssembleArchBinarySearchSwitch(instr);
880       break;
881     case kArchLookupSwitch:
882       AssembleArchLookupSwitch(instr);
883       DCHECK_EQ(LeaveCC, i.OutputSBit());
884       break;
885     case kArchTableSwitch:
886       AssembleArchTableSwitch(instr);
887       DCHECK_EQ(LeaveCC, i.OutputSBit());
888       break;
889     case kArchDebugAbort:
890       DCHECK(i.InputRegister(0) == r1);
891       if (!frame_access_state()->has_frame()) {
892         // We don't actually want to generate a pile of code for this, so just
893         // claim there is a stack frame, without generating one.
894         FrameScope scope(tasm(), StackFrame::NONE);
895         __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
896                 RelocInfo::CODE_TARGET);
897       } else {
898         __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
899                 RelocInfo::CODE_TARGET);
900       }
901       __ stop("kArchDebugAbort");
902       unwinding_info_writer_.MarkBlockWillExit();
903       break;
904     case kArchDebugBreak:
905       __ stop("kArchDebugBreak");
906       break;
907     case kArchComment:
908       __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
909       break;
910     case kArchThrowTerminator:
911       DCHECK_EQ(LeaveCC, i.OutputSBit());
912       unwinding_info_writer_.MarkBlockWillExit();
913       break;
914     case kArchNop:
915       // don't emit code for nops.
916       DCHECK_EQ(LeaveCC, i.OutputSBit());
917       break;
918     case kArchDeoptimize: {
919       int deopt_state_id =
920           BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
921       CodeGenResult result =
922           AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
923       if (result != kSuccess) return result;
924       unwinding_info_writer_.MarkBlockWillExit();
925       break;
926     }
927     case kArchRet:
928       AssembleReturn(instr->InputAt(0));
929       DCHECK_EQ(LeaveCC, i.OutputSBit());
930       break;
931     case kArchStackPointer:
932       __ mov(i.OutputRegister(), sp);
933       DCHECK_EQ(LeaveCC, i.OutputSBit());
934       break;
935     case kArchFramePointer:
936       __ mov(i.OutputRegister(), fp);
937       DCHECK_EQ(LeaveCC, i.OutputSBit());
938       break;
939     case kArchParentFramePointer:
940       if (frame_access_state()->has_frame()) {
941         __ ldr(i.OutputRegister(), MemOperand(fp, 0));
942       } else {
943         __ mov(i.OutputRegister(), fp);
944       }
945       break;
946     case kArchTruncateDoubleToI:
947       __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
948                            i.InputDoubleRegister(0), DetermineStubCallMode());
949       DCHECK_EQ(LeaveCC, i.OutputSBit());
950       break;
951     case kArchStoreWithWriteBarrier: {
952       RecordWriteMode mode =
953           static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
954       Register object = i.InputRegister(0);
955       Register value = i.InputRegister(2);
956       Register scratch0 = i.TempRegister(0);
957       Register scratch1 = i.TempRegister(1);
958       OutOfLineRecordWrite* ool;
959 
960       AddressingMode addressing_mode =
961           AddressingModeField::decode(instr->opcode());
962       if (addressing_mode == kMode_Offset_RI) {
963         int32_t index = i.InputInt32(1);
964         ool = new (zone())
965             OutOfLineRecordWrite(this, object, index, value, scratch0, scratch1,
966                                  mode, &unwinding_info_writer_);
967         __ str(value, MemOperand(object, index));
968       } else {
969         DCHECK_EQ(kMode_Offset_RR, addressing_mode);
970         Register index(i.InputRegister(1));
971         ool = new (zone())
972             OutOfLineRecordWrite(this, object, index, value, scratch0, scratch1,
973                                  mode, &unwinding_info_writer_);
974         __ str(value, MemOperand(object, index));
975       }
976       __ CheckPageFlag(object, scratch0,
977                        MemoryChunk::kPointersFromHereAreInterestingMask, ne,
978                        ool->entry());
979       __ bind(ool->exit());
980       break;
981     }
982     case kArchStackSlot: {
983       FrameOffset offset =
984           frame_access_state()->GetFrameOffset(i.InputInt32(0));
985       Register base = offset.from_stack_pointer() ? sp : fp;
986       __ add(i.OutputRegister(0), base, Operand(offset.offset()));
987       break;
988     }
989     case kIeee754Float64Acos:
990       ASSEMBLE_IEEE754_UNOP(acos);
991       break;
992     case kIeee754Float64Acosh:
993       ASSEMBLE_IEEE754_UNOP(acosh);
994       break;
995     case kIeee754Float64Asin:
996       ASSEMBLE_IEEE754_UNOP(asin);
997       break;
998     case kIeee754Float64Asinh:
999       ASSEMBLE_IEEE754_UNOP(asinh);
1000       break;
1001     case kIeee754Float64Atan:
1002       ASSEMBLE_IEEE754_UNOP(atan);
1003       break;
1004     case kIeee754Float64Atanh:
1005       ASSEMBLE_IEEE754_UNOP(atanh);
1006       break;
1007     case kIeee754Float64Atan2:
1008       ASSEMBLE_IEEE754_BINOP(atan2);
1009       break;
1010     case kIeee754Float64Cbrt:
1011       ASSEMBLE_IEEE754_UNOP(cbrt);
1012       break;
1013     case kIeee754Float64Cos:
1014       ASSEMBLE_IEEE754_UNOP(cos);
1015       break;
1016     case kIeee754Float64Cosh:
1017       ASSEMBLE_IEEE754_UNOP(cosh);
1018       break;
1019     case kIeee754Float64Exp:
1020       ASSEMBLE_IEEE754_UNOP(exp);
1021       break;
1022     case kIeee754Float64Expm1:
1023       ASSEMBLE_IEEE754_UNOP(expm1);
1024       break;
1025     case kIeee754Float64Log:
1026       ASSEMBLE_IEEE754_UNOP(log);
1027       break;
1028     case kIeee754Float64Log1p:
1029       ASSEMBLE_IEEE754_UNOP(log1p);
1030       break;
1031     case kIeee754Float64Log2:
1032       ASSEMBLE_IEEE754_UNOP(log2);
1033       break;
1034     case kIeee754Float64Log10:
1035       ASSEMBLE_IEEE754_UNOP(log10);
1036       break;
1037     case kIeee754Float64Pow: {
1038       __ Call(BUILTIN_CODE(isolate(), MathPowInternal), RelocInfo::CODE_TARGET);
1039       __ vmov(d0, d2);
1040       break;
1041     }
1042     case kIeee754Float64Sin:
1043       ASSEMBLE_IEEE754_UNOP(sin);
1044       break;
1045     case kIeee754Float64Sinh:
1046       ASSEMBLE_IEEE754_UNOP(sinh);
1047       break;
1048     case kIeee754Float64Tan:
1049       ASSEMBLE_IEEE754_UNOP(tan);
1050       break;
1051     case kIeee754Float64Tanh:
1052       ASSEMBLE_IEEE754_UNOP(tanh);
1053       break;
1054     case kArmAdd:
1055       __ add(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1056              i.OutputSBit());
1057       break;
1058     case kArmAnd:
1059       __ and_(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1060               i.OutputSBit());
1061       break;
1062     case kArmBic:
1063       __ bic(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1064              i.OutputSBit());
1065       break;
1066     case kArmMul:
1067       __ mul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1068              i.OutputSBit());
1069       break;
1070     case kArmMla:
1071       __ mla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1072              i.InputRegister(2), i.OutputSBit());
1073       break;
1074     case kArmMls: {
1075       CpuFeatureScope scope(tasm(), ARMv7);
1076       __ mls(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1077              i.InputRegister(2));
1078       DCHECK_EQ(LeaveCC, i.OutputSBit());
1079       break;
1080     }
1081     case kArmSmull:
1082       __ smull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1083                i.InputRegister(1));
1084       break;
1085     case kArmSmmul:
1086       __ smmul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1087       DCHECK_EQ(LeaveCC, i.OutputSBit());
1088       break;
1089     case kArmSmmla:
1090       __ smmla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1091                i.InputRegister(2));
1092       DCHECK_EQ(LeaveCC, i.OutputSBit());
1093       break;
1094     case kArmUmull:
1095       __ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1096                i.InputRegister(1), i.OutputSBit());
1097       break;
1098     case kArmSdiv: {
1099       CpuFeatureScope scope(tasm(), SUDIV);
1100       __ sdiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1101       DCHECK_EQ(LeaveCC, i.OutputSBit());
1102       break;
1103     }
1104     case kArmUdiv: {
1105       CpuFeatureScope scope(tasm(), SUDIV);
1106       __ udiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1107       DCHECK_EQ(LeaveCC, i.OutputSBit());
1108       break;
1109     }
1110     case kArmMov:
1111       __ Move(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit());
1112       break;
1113     case kArmMvn:
1114       __ mvn(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit());
1115       break;
1116     case kArmOrr:
1117       __ orr(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1118              i.OutputSBit());
1119       break;
1120     case kArmEor:
1121       __ eor(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1122              i.OutputSBit());
1123       break;
1124     case kArmSub:
1125       __ sub(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1126              i.OutputSBit());
1127       break;
1128     case kArmRsb:
1129       __ rsb(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1130              i.OutputSBit());
1131       break;
1132     case kArmBfc: {
1133       CpuFeatureScope scope(tasm(), ARMv7);
1134       __ bfc(i.OutputRegister(), i.InputInt8(1), i.InputInt8(2));
1135       DCHECK_EQ(LeaveCC, i.OutputSBit());
1136       break;
1137     }
1138     case kArmUbfx: {
1139       CpuFeatureScope scope(tasm(), ARMv7);
1140       __ ubfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
1141               i.InputInt8(2));
1142       DCHECK_EQ(LeaveCC, i.OutputSBit());
1143       break;
1144     }
1145     case kArmSbfx: {
1146       CpuFeatureScope scope(tasm(), ARMv7);
1147       __ sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
1148               i.InputInt8(2));
1149       DCHECK_EQ(LeaveCC, i.OutputSBit());
1150       break;
1151     }
1152     case kArmSxtb:
1153       __ sxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1154       DCHECK_EQ(LeaveCC, i.OutputSBit());
1155       break;
1156     case kArmSxth:
1157       __ sxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1158       DCHECK_EQ(LeaveCC, i.OutputSBit());
1159       break;
1160     case kArmSxtab:
1161       __ sxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1162                i.InputInt32(2));
1163       DCHECK_EQ(LeaveCC, i.OutputSBit());
1164       break;
1165     case kArmSxtah:
1166       __ sxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1167                i.InputInt32(2));
1168       DCHECK_EQ(LeaveCC, i.OutputSBit());
1169       break;
1170     case kArmUxtb:
1171       __ uxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1172       DCHECK_EQ(LeaveCC, i.OutputSBit());
1173       break;
1174     case kArmUxth:
1175       __ uxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1176       DCHECK_EQ(LeaveCC, i.OutputSBit());
1177       break;
1178     case kArmUxtab:
1179       __ uxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1180                i.InputInt32(2));
1181       DCHECK_EQ(LeaveCC, i.OutputSBit());
1182       break;
1183     case kArmUxtah:
1184       __ uxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1185                i.InputInt32(2));
1186       DCHECK_EQ(LeaveCC, i.OutputSBit());
1187       break;
1188     case kArmRbit: {
1189       CpuFeatureScope scope(tasm(), ARMv7);
1190       __ rbit(i.OutputRegister(), i.InputRegister(0));
1191       DCHECK_EQ(LeaveCC, i.OutputSBit());
1192       break;
1193     }
1194     case kArmRev:
1195       __ rev(i.OutputRegister(), i.InputRegister(0));
1196       DCHECK_EQ(LeaveCC, i.OutputSBit());
1197       break;
1198     case kArmClz:
1199       __ clz(i.OutputRegister(), i.InputRegister(0));
1200       DCHECK_EQ(LeaveCC, i.OutputSBit());
1201       break;
1202     case kArmCmp:
1203       __ cmp(i.InputRegister(0), i.InputOperand2(1));
1204       DCHECK_EQ(SetCC, i.OutputSBit());
1205       break;
1206     case kArmCmn:
1207       __ cmn(i.InputRegister(0), i.InputOperand2(1));
1208       DCHECK_EQ(SetCC, i.OutputSBit());
1209       break;
1210     case kArmTst:
1211       __ tst(i.InputRegister(0), i.InputOperand2(1));
1212       DCHECK_EQ(SetCC, i.OutputSBit());
1213       break;
1214     case kArmTeq:
1215       __ teq(i.InputRegister(0), i.InputOperand2(1));
1216       DCHECK_EQ(SetCC, i.OutputSBit());
1217       break;
1218     case kArmAddPair:
1219       // i.InputRegister(0) ... left low word.
1220       // i.InputRegister(1) ... left high word.
1221       // i.InputRegister(2) ... right low word.
1222       // i.InputRegister(3) ... right high word.
1223       __ add(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2),
1224              SBit::SetCC);
1225       __ adc(i.OutputRegister(1), i.InputRegister(1),
1226              Operand(i.InputRegister(3)));
1227       DCHECK_EQ(LeaveCC, i.OutputSBit());
1228       break;
1229     case kArmSubPair:
1230       // i.InputRegister(0) ... left low word.
1231       // i.InputRegister(1) ... left high word.
1232       // i.InputRegister(2) ... right low word.
1233       // i.InputRegister(3) ... right high word.
1234       __ sub(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2),
1235              SBit::SetCC);
1236       __ sbc(i.OutputRegister(1), i.InputRegister(1),
1237              Operand(i.InputRegister(3)));
1238       DCHECK_EQ(LeaveCC, i.OutputSBit());
1239       break;
1240     case kArmMulPair:
1241       // i.InputRegister(0) ... left low word.
1242       // i.InputRegister(1) ... left high word.
1243       // i.InputRegister(2) ... right low word.
1244       // i.InputRegister(3) ... right high word.
1245       __ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1246                i.InputRegister(2));
1247       __ mla(i.OutputRegister(1), i.InputRegister(0), i.InputRegister(3),
1248              i.OutputRegister(1));
1249       __ mla(i.OutputRegister(1), i.InputRegister(2), i.InputRegister(1),
1250              i.OutputRegister(1));
1251       break;
1252     case kArmLslPair: {
1253       Register second_output =
1254           instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1255       if (instr->InputAt(2)->IsImmediate()) {
1256         __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1257                    i.InputRegister(1), i.InputInt32(2));
1258       } else {
1259         __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1260                    i.InputRegister(1), i.InputRegister(2));
1261       }
1262       break;
1263     }
1264     case kArmLsrPair: {
1265       Register second_output =
1266           instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1267       if (instr->InputAt(2)->IsImmediate()) {
1268         __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1269                    i.InputRegister(1), i.InputInt32(2));
1270       } else {
1271         __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1272                    i.InputRegister(1), i.InputRegister(2));
1273       }
1274       break;
1275     }
1276     case kArmAsrPair: {
1277       Register second_output =
1278           instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1279       if (instr->InputAt(2)->IsImmediate()) {
1280         __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1281                    i.InputRegister(1), i.InputInt32(2));
1282       } else {
1283         __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1284                    i.InputRegister(1), i.InputRegister(2));
1285       }
1286       break;
1287     }
1288     case kArmVcmpF32:
1289       if (instr->InputAt(1)->IsFPRegister()) {
1290         __ VFPCompareAndSetFlags(i.InputFloatRegister(0),
1291                                  i.InputFloatRegister(1));
1292       } else {
1293         DCHECK(instr->InputAt(1)->IsImmediate());
1294         // 0.0 is the only immediate supported by vcmp instructions.
1295         DCHECK_EQ(0.0f, i.InputFloat32(1));
1296         __ VFPCompareAndSetFlags(i.InputFloatRegister(0), i.InputFloat32(1));
1297       }
1298       DCHECK_EQ(SetCC, i.OutputSBit());
1299       break;
1300     case kArmVaddF32:
1301       __ vadd(i.OutputFloatRegister(), i.InputFloatRegister(0),
1302               i.InputFloatRegister(1));
1303       DCHECK_EQ(LeaveCC, i.OutputSBit());
1304       break;
1305     case kArmVsubF32:
1306       __ vsub(i.OutputFloatRegister(), i.InputFloatRegister(0),
1307               i.InputFloatRegister(1));
1308       DCHECK_EQ(LeaveCC, i.OutputSBit());
1309       break;
1310     case kArmVmulF32:
1311       __ vmul(i.OutputFloatRegister(), i.InputFloatRegister(0),
1312               i.InputFloatRegister(1));
1313       DCHECK_EQ(LeaveCC, i.OutputSBit());
1314       break;
1315     case kArmVmlaF32:
1316       __ vmla(i.OutputFloatRegister(), i.InputFloatRegister(1),
1317               i.InputFloatRegister(2));
1318       DCHECK_EQ(LeaveCC, i.OutputSBit());
1319       break;
1320     case kArmVmlsF32:
1321       __ vmls(i.OutputFloatRegister(), i.InputFloatRegister(1),
1322               i.InputFloatRegister(2));
1323       DCHECK_EQ(LeaveCC, i.OutputSBit());
1324       break;
1325     case kArmVdivF32:
1326       __ vdiv(i.OutputFloatRegister(), i.InputFloatRegister(0),
1327               i.InputFloatRegister(1));
1328       DCHECK_EQ(LeaveCC, i.OutputSBit());
1329       break;
1330     case kArmVsqrtF32:
1331       __ vsqrt(i.OutputFloatRegister(), i.InputFloatRegister(0));
1332       break;
1333     case kArmVabsF32:
1334       __ vabs(i.OutputFloatRegister(), i.InputFloatRegister(0));
1335       break;
1336     case kArmVnegF32:
1337       __ vneg(i.OutputFloatRegister(), i.InputFloatRegister(0));
1338       break;
1339     case kArmVcmpF64:
1340       if (instr->InputAt(1)->IsFPRegister()) {
1341         __ VFPCompareAndSetFlags(i.InputDoubleRegister(0),
1342                                  i.InputDoubleRegister(1));
1343       } else {
1344         DCHECK(instr->InputAt(1)->IsImmediate());
1345         // 0.0 is the only immediate supported by vcmp instructions.
1346         DCHECK_EQ(0.0, i.InputDouble(1));
1347         __ VFPCompareAndSetFlags(i.InputDoubleRegister(0), i.InputDouble(1));
1348       }
1349       DCHECK_EQ(SetCC, i.OutputSBit());
1350       break;
1351     case kArmVaddF64:
1352       __ vadd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1353               i.InputDoubleRegister(1));
1354       DCHECK_EQ(LeaveCC, i.OutputSBit());
1355       break;
1356     case kArmVsubF64:
1357       __ vsub(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1358               i.InputDoubleRegister(1));
1359       DCHECK_EQ(LeaveCC, i.OutputSBit());
1360       break;
1361     case kArmVmulF64:
1362       __ vmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1363               i.InputDoubleRegister(1));
1364       DCHECK_EQ(LeaveCC, i.OutputSBit());
1365       break;
1366     case kArmVmlaF64:
1367       __ vmla(i.OutputDoubleRegister(), i.InputDoubleRegister(1),
1368               i.InputDoubleRegister(2));
1369       DCHECK_EQ(LeaveCC, i.OutputSBit());
1370       break;
1371     case kArmVmlsF64:
1372       __ vmls(i.OutputDoubleRegister(), i.InputDoubleRegister(1),
1373               i.InputDoubleRegister(2));
1374       DCHECK_EQ(LeaveCC, i.OutputSBit());
1375       break;
1376     case kArmVdivF64:
1377       __ vdiv(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1378               i.InputDoubleRegister(1));
1379       DCHECK_EQ(LeaveCC, i.OutputSBit());
1380       break;
1381     case kArmVmodF64: {
1382       // TODO(bmeurer): We should really get rid of this special instruction,
1383       // and generate a CallAddress instruction instead.
1384       FrameScope scope(tasm(), StackFrame::MANUAL);
1385       __ PrepareCallCFunction(0, 2);
1386       __ MovToFloatParameters(i.InputDoubleRegister(0),
1387                               i.InputDoubleRegister(1));
1388       __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
1389       // Move the result in the double result register.
1390       __ MovFromFloatResult(i.OutputDoubleRegister());
1391       DCHECK_EQ(LeaveCC, i.OutputSBit());
1392       break;
1393     }
1394     case kArmVsqrtF64:
1395       __ vsqrt(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1396       break;
1397     case kArmVabsF64:
1398       __ vabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1399       break;
1400     case kArmVnegF64:
1401       __ vneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1402       break;
1403     case kArmVrintmF32: {
1404       CpuFeatureScope scope(tasm(), ARMv8);
1405       __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0));
1406       break;
1407     }
1408     case kArmVrintmF64: {
1409       CpuFeatureScope scope(tasm(), ARMv8);
1410       __ vrintm(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1411       break;
1412     }
1413     case kArmVrintpF32: {
1414       CpuFeatureScope scope(tasm(), ARMv8);
1415       __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0));
1416       break;
1417     }
1418     case kArmVrintpF64: {
1419       CpuFeatureScope scope(tasm(), ARMv8);
1420       __ vrintp(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1421       break;
1422     }
1423     case kArmVrintzF32: {
1424       CpuFeatureScope scope(tasm(), ARMv8);
1425       __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0));
1426       break;
1427     }
1428     case kArmVrintzF64: {
1429       CpuFeatureScope scope(tasm(), ARMv8);
1430       __ vrintz(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1431       break;
1432     }
1433     case kArmVrintaF64: {
1434       CpuFeatureScope scope(tasm(), ARMv8);
1435       __ vrinta(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1436       break;
1437     }
1438     case kArmVrintnF32: {
1439       CpuFeatureScope scope(tasm(), ARMv8);
1440       __ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0));
1441       break;
1442     }
1443     case kArmVrintnF64: {
1444       CpuFeatureScope scope(tasm(), ARMv8);
1445       __ vrintn(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1446       break;
1447     }
1448     case kArmVcvtF32F64: {
1449       __ vcvt_f32_f64(i.OutputFloatRegister(), i.InputDoubleRegister(0));
1450       DCHECK_EQ(LeaveCC, i.OutputSBit());
1451       break;
1452     }
1453     case kArmVcvtF64F32: {
1454       __ vcvt_f64_f32(i.OutputDoubleRegister(), i.InputFloatRegister(0));
1455       DCHECK_EQ(LeaveCC, i.OutputSBit());
1456       break;
1457     }
1458     case kArmVcvtF32S32: {
1459       UseScratchRegisterScope temps(tasm());
1460       SwVfpRegister scratch = temps.AcquireS();
1461       __ vmov(scratch, i.InputRegister(0));
1462       __ vcvt_f32_s32(i.OutputFloatRegister(), scratch);
1463       DCHECK_EQ(LeaveCC, i.OutputSBit());
1464       break;
1465     }
1466     case kArmVcvtF32U32: {
1467       UseScratchRegisterScope temps(tasm());
1468       SwVfpRegister scratch = temps.AcquireS();
1469       __ vmov(scratch, i.InputRegister(0));
1470       __ vcvt_f32_u32(i.OutputFloatRegister(), scratch);
1471       DCHECK_EQ(LeaveCC, i.OutputSBit());
1472       break;
1473     }
1474     case kArmVcvtF64S32: {
1475       UseScratchRegisterScope temps(tasm());
1476       SwVfpRegister scratch = temps.AcquireS();
1477       __ vmov(scratch, i.InputRegister(0));
1478       __ vcvt_f64_s32(i.OutputDoubleRegister(), scratch);
1479       DCHECK_EQ(LeaveCC, i.OutputSBit());
1480       break;
1481     }
1482     case kArmVcvtF64U32: {
1483       UseScratchRegisterScope temps(tasm());
1484       SwVfpRegister scratch = temps.AcquireS();
1485       __ vmov(scratch, i.InputRegister(0));
1486       __ vcvt_f64_u32(i.OutputDoubleRegister(), scratch);
1487       DCHECK_EQ(LeaveCC, i.OutputSBit());
1488       break;
1489     }
1490     case kArmVcvtS32F32: {
1491       UseScratchRegisterScope temps(tasm());
1492       SwVfpRegister scratch = temps.AcquireS();
1493       __ vcvt_s32_f32(scratch, i.InputFloatRegister(0));
1494       __ vmov(i.OutputRegister(), scratch);
1495       // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
1496       // because INT32_MIN allows easier out-of-bounds detection.
1497       __ cmn(i.OutputRegister(), Operand(1));
1498       __ mov(i.OutputRegister(), Operand(INT32_MIN), SBit::LeaveCC, vs);
1499       DCHECK_EQ(LeaveCC, i.OutputSBit());
1500       break;
1501     }
1502     case kArmVcvtU32F32: {
1503       UseScratchRegisterScope temps(tasm());
1504       SwVfpRegister scratch = temps.AcquireS();
1505       __ vcvt_u32_f32(scratch, i.InputFloatRegister(0));
1506       __ vmov(i.OutputRegister(), scratch);
1507       // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
1508       // because 0 allows easier out-of-bounds detection.
1509       __ cmn(i.OutputRegister(), Operand(1));
1510       __ adc(i.OutputRegister(), i.OutputRegister(), Operand::Zero());
1511       DCHECK_EQ(LeaveCC, i.OutputSBit());
1512       break;
1513     }
1514     case kArmVcvtS32F64: {
1515       UseScratchRegisterScope temps(tasm());
1516       SwVfpRegister scratch = temps.AcquireS();
1517       __ vcvt_s32_f64(scratch, i.InputDoubleRegister(0));
1518       __ vmov(i.OutputRegister(), scratch);
1519       DCHECK_EQ(LeaveCC, i.OutputSBit());
1520       break;
1521     }
1522     case kArmVcvtU32F64: {
1523       UseScratchRegisterScope temps(tasm());
1524       SwVfpRegister scratch = temps.AcquireS();
1525       __ vcvt_u32_f64(scratch, i.InputDoubleRegister(0));
1526       __ vmov(i.OutputRegister(), scratch);
1527       DCHECK_EQ(LeaveCC, i.OutputSBit());
1528       break;
1529     }
1530     case kArmVmovU32F32:
1531       __ vmov(i.OutputRegister(), i.InputFloatRegister(0));
1532       DCHECK_EQ(LeaveCC, i.OutputSBit());
1533       break;
1534     case kArmVmovF32U32:
1535       __ vmov(i.OutputFloatRegister(), i.InputRegister(0));
1536       DCHECK_EQ(LeaveCC, i.OutputSBit());
1537       break;
1538     case kArmVmovLowU32F64:
1539       __ VmovLow(i.OutputRegister(), i.InputDoubleRegister(0));
1540       DCHECK_EQ(LeaveCC, i.OutputSBit());
1541       break;
1542     case kArmVmovLowF64U32:
1543       __ VmovLow(i.OutputDoubleRegister(), i.InputRegister(1));
1544       DCHECK_EQ(LeaveCC, i.OutputSBit());
1545       break;
1546     case kArmVmovHighU32F64:
1547       __ VmovHigh(i.OutputRegister(), i.InputDoubleRegister(0));
1548       DCHECK_EQ(LeaveCC, i.OutputSBit());
1549       break;
1550     case kArmVmovHighF64U32:
1551       __ VmovHigh(i.OutputDoubleRegister(), i.InputRegister(1));
1552       DCHECK_EQ(LeaveCC, i.OutputSBit());
1553       break;
1554     case kArmVmovF64U32U32:
1555       __ vmov(i.OutputDoubleRegister(), i.InputRegister(0), i.InputRegister(1));
1556       DCHECK_EQ(LeaveCC, i.OutputSBit());
1557       break;
1558     case kArmVmovU32U32F64:
1559       __ vmov(i.OutputRegister(0), i.OutputRegister(1),
1560               i.InputDoubleRegister(0));
1561       DCHECK_EQ(LeaveCC, i.OutputSBit());
1562       break;
1563     case kArmLdrb:
1564       __ ldrb(i.OutputRegister(), i.InputOffset());
1565       DCHECK_EQ(LeaveCC, i.OutputSBit());
1566       EmitWordLoadPoisoningIfNeeded(this, opcode, i);
1567       break;
1568     case kArmLdrsb:
1569       __ ldrsb(i.OutputRegister(), i.InputOffset());
1570       DCHECK_EQ(LeaveCC, i.OutputSBit());
1571       EmitWordLoadPoisoningIfNeeded(this, opcode, i);
1572       break;
1573     case kArmStrb:
1574       __ strb(i.InputRegister(0), i.InputOffset(1));
1575       DCHECK_EQ(LeaveCC, i.OutputSBit());
1576       break;
1577     case kArmLdrh:
1578       __ ldrh(i.OutputRegister(), i.InputOffset());
1579       EmitWordLoadPoisoningIfNeeded(this, opcode, i);
1580       break;
1581     case kArmLdrsh:
1582       __ ldrsh(i.OutputRegister(), i.InputOffset());
1583       EmitWordLoadPoisoningIfNeeded(this, opcode, i);
1584       break;
1585     case kArmStrh:
1586       __ strh(i.InputRegister(0), i.InputOffset(1));
1587       DCHECK_EQ(LeaveCC, i.OutputSBit());
1588       break;
1589     case kArmLdr:
1590       __ ldr(i.OutputRegister(), i.InputOffset());
1591       EmitWordLoadPoisoningIfNeeded(this, opcode, i);
1592       break;
1593     case kArmStr:
1594       __ str(i.InputRegister(0), i.InputOffset(1));
1595       DCHECK_EQ(LeaveCC, i.OutputSBit());
1596       break;
1597     case kArmVldrF32: {
1598       const MemoryAccessMode access_mode =
1599           static_cast<MemoryAccessMode>(MiscField::decode(opcode));
1600       if (access_mode == kMemoryAccessPoisoned) {
1601         UseScratchRegisterScope temps(tasm());
1602         Register address = temps.Acquire();
1603         ComputePoisonedAddressForLoad(this, opcode, i, address);
1604         __ vldr(i.OutputFloatRegister(), address, 0);
1605       } else {
1606         __ vldr(i.OutputFloatRegister(), i.InputOffset());
1607       }
1608       DCHECK_EQ(LeaveCC, i.OutputSBit());
1609       break;
1610     }
1611     case kArmVstrF32:
1612       __ vstr(i.InputFloatRegister(0), i.InputOffset(1));
1613       DCHECK_EQ(LeaveCC, i.OutputSBit());
1614       break;
1615     case kArmVld1F64: {
1616       __ vld1(Neon8, NeonListOperand(i.OutputDoubleRegister()),
1617               i.NeonInputOperand(0));
1618       break;
1619     }
1620     case kArmVst1F64: {
1621       __ vst1(Neon8, NeonListOperand(i.InputDoubleRegister(0)),
1622               i.NeonInputOperand(1));
1623       break;
1624     }
1625     case kArmVld1S128: {
1626       __ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
1627               i.NeonInputOperand(0));
1628       break;
1629     }
1630     case kArmVst1S128: {
1631       __ vst1(Neon8, NeonListOperand(i.InputSimd128Register(0)),
1632               i.NeonInputOperand(1));
1633       break;
1634     }
1635     case kArmVldrF64: {
1636       const MemoryAccessMode access_mode =
1637           static_cast<MemoryAccessMode>(MiscField::decode(opcode));
1638       if (access_mode == kMemoryAccessPoisoned) {
1639         UseScratchRegisterScope temps(tasm());
1640         Register address = temps.Acquire();
1641         ComputePoisonedAddressForLoad(this, opcode, i, address);
1642         __ vldr(i.OutputDoubleRegister(), address, 0);
1643       } else {
1644         __ vldr(i.OutputDoubleRegister(), i.InputOffset());
1645       }
1646       DCHECK_EQ(LeaveCC, i.OutputSBit());
1647       break;
1648     }
1649     case kArmVstrF64:
1650       __ vstr(i.InputDoubleRegister(0), i.InputOffset(1));
1651       DCHECK_EQ(LeaveCC, i.OutputSBit());
1652       break;
1653     case kArmFloat32Max: {
1654       SwVfpRegister result = i.OutputFloatRegister();
1655       SwVfpRegister left = i.InputFloatRegister(0);
1656       SwVfpRegister right = i.InputFloatRegister(1);
1657       if (left == right) {
1658         __ Move(result, left);
1659       } else {
1660         auto ool = new (zone()) OutOfLineFloat32Max(this, result, left, right);
1661         __ FloatMax(result, left, right, ool->entry());
1662         __ bind(ool->exit());
1663       }
1664       DCHECK_EQ(LeaveCC, i.OutputSBit());
1665       break;
1666     }
1667     case kArmFloat64Max: {
1668       DwVfpRegister result = i.OutputDoubleRegister();
1669       DwVfpRegister left = i.InputDoubleRegister(0);
1670       DwVfpRegister right = i.InputDoubleRegister(1);
1671       if (left == right) {
1672         __ Move(result, left);
1673       } else {
1674         auto ool = new (zone()) OutOfLineFloat64Max(this, result, left, right);
1675         __ FloatMax(result, left, right, ool->entry());
1676         __ bind(ool->exit());
1677       }
1678       DCHECK_EQ(LeaveCC, i.OutputSBit());
1679       break;
1680     }
1681     case kArmFloat32Min: {
1682       SwVfpRegister result = i.OutputFloatRegister();
1683       SwVfpRegister left = i.InputFloatRegister(0);
1684       SwVfpRegister right = i.InputFloatRegister(1);
1685       if (left == right) {
1686         __ Move(result, left);
1687       } else {
1688         auto ool = new (zone()) OutOfLineFloat32Min(this, result, left, right);
1689         __ FloatMin(result, left, right, ool->entry());
1690         __ bind(ool->exit());
1691       }
1692       DCHECK_EQ(LeaveCC, i.OutputSBit());
1693       break;
1694     }
1695     case kArmFloat64Min: {
1696       DwVfpRegister result = i.OutputDoubleRegister();
1697       DwVfpRegister left = i.InputDoubleRegister(0);
1698       DwVfpRegister right = i.InputDoubleRegister(1);
1699       if (left == right) {
1700         __ Move(result, left);
1701       } else {
1702         auto ool = new (zone()) OutOfLineFloat64Min(this, result, left, right);
1703         __ FloatMin(result, left, right, ool->entry());
1704         __ bind(ool->exit());
1705       }
1706       DCHECK_EQ(LeaveCC, i.OutputSBit());
1707       break;
1708     }
1709     case kArmFloat64SilenceNaN: {
1710       DwVfpRegister value = i.InputDoubleRegister(0);
1711       DwVfpRegister result = i.OutputDoubleRegister();
1712       __ VFPCanonicalizeNaN(result, value);
1713       break;
1714     }
1715     case kArmPush:
1716       if (instr->InputAt(0)->IsFPRegister()) {
1717         LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
1718         switch (op->representation()) {
1719           case MachineRepresentation::kFloat32:
1720             __ vpush(i.InputFloatRegister(0));
1721             frame_access_state()->IncreaseSPDelta(1);
1722             break;
1723           case MachineRepresentation::kFloat64:
1724             __ vpush(i.InputDoubleRegister(0));
1725             frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
1726             break;
1727           case MachineRepresentation::kSimd128: {
1728             __ vpush(i.InputSimd128Register(0));
1729             frame_access_state()->IncreaseSPDelta(kSimd128Size / kPointerSize);
1730             break;
1731           }
1732           default:
1733             UNREACHABLE();
1734             break;
1735         }
1736       } else {
1737         __ push(i.InputRegister(0));
1738         frame_access_state()->IncreaseSPDelta(1);
1739       }
1740       DCHECK_EQ(LeaveCC, i.OutputSBit());
1741       break;
1742     case kArmPoke: {
1743       int const slot = MiscField::decode(instr->opcode());
1744       __ str(i.InputRegister(0), MemOperand(sp, slot * kPointerSize));
1745       DCHECK_EQ(LeaveCC, i.OutputSBit());
1746       break;
1747     }
1748     case kArmPeek: {
1749       // The incoming value is 0-based, but we need a 1-based value.
1750       int reverse_slot = i.InputInt32(0) + 1;
1751       int offset =
1752           FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1753       if (instr->OutputAt(0)->IsFPRegister()) {
1754         LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1755         if (op->representation() == MachineRepresentation::kFloat64) {
1756           __ vldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
1757         } else {
1758           DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
1759           __ vldr(i.OutputFloatRegister(), MemOperand(fp, offset));
1760         }
1761       } else {
1762         __ ldr(i.OutputRegister(), MemOperand(fp, offset));
1763       }
1764       break;
1765     }
1766     case kArmDsbIsb: {
1767       __ dsb(SY);
1768       __ isb(SY);
1769       break;
1770     }
1771     case kArchWordPoisonOnSpeculation:
1772       __ and_(i.OutputRegister(0), i.InputRegister(0),
1773               Operand(kSpeculationPoisonRegister));
1774       break;
1775     case kArmF32x4Splat: {
1776       int src_code = i.InputFloatRegister(0).code();
1777       __ vdup(Neon32, i.OutputSimd128Register(),
1778               DwVfpRegister::from_code(src_code / 2), src_code % 2);
1779       break;
1780     }
1781     case kArmF32x4ExtractLane: {
1782       __ ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
1783                      i.InputInt8(1));
1784       break;
1785     }
1786     case kArmF32x4ReplaceLane: {
1787       __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
1788                      i.InputFloatRegister(2), i.InputInt8(1));
1789       break;
1790     }
1791     case kArmF32x4SConvertI32x4: {
1792       __ vcvt_f32_s32(i.OutputSimd128Register(), i.InputSimd128Register(0));
1793       break;
1794     }
1795     case kArmF32x4UConvertI32x4: {
1796       __ vcvt_f32_u32(i.OutputSimd128Register(), i.InputSimd128Register(0));
1797       break;
1798     }
1799     case kArmF32x4Abs: {
1800       __ vabs(i.OutputSimd128Register(), i.InputSimd128Register(0));
1801       break;
1802     }
1803     case kArmF32x4Neg: {
1804       __ vneg(i.OutputSimd128Register(), i.InputSimd128Register(0));
1805       break;
1806     }
1807     case kArmF32x4RecipApprox: {
1808       __ vrecpe(i.OutputSimd128Register(), i.InputSimd128Register(0));
1809       break;
1810     }
1811     case kArmF32x4RecipSqrtApprox: {
1812       __ vrsqrte(i.OutputSimd128Register(), i.InputSimd128Register(0));
1813       break;
1814     }
1815     case kArmF32x4Add: {
1816       __ vadd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1817               i.InputSimd128Register(1));
1818       break;
1819     }
1820     case kArmF32x4AddHoriz: {
1821       Simd128Register dst = i.OutputSimd128Register(),
1822                       src0 = i.InputSimd128Register(0),
1823                       src1 = i.InputSimd128Register(1);
1824       // Make sure we don't overwrite source data before it's used.
1825       if (dst == src0) {
1826         __ vpadd(dst.low(), src0.low(), src0.high());
1827         if (dst == src1) {
1828           __ vmov(dst.high(), dst.low());
1829         } else {
1830           __ vpadd(dst.high(), src1.low(), src1.high());
1831         }
1832       } else {
1833         __ vpadd(dst.high(), src1.low(), src1.high());
1834         __ vpadd(dst.low(), src0.low(), src0.high());
1835       }
1836       break;
1837     }
1838     case kArmF32x4Sub: {
1839       __ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0),
1840               i.InputSimd128Register(1));
1841       break;
1842     }
1843     case kArmF32x4Mul: {
1844       __ vmul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1845               i.InputSimd128Register(1));
1846       break;
1847     }
1848     case kArmF32x4Min: {
1849       __ vmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
1850               i.InputSimd128Register(1));
1851       break;
1852     }
1853     case kArmF32x4Max: {
1854       __ vmax(i.OutputSimd128Register(), i.InputSimd128Register(0),
1855               i.InputSimd128Register(1));
1856       break;
1857     }
1858     case kArmF32x4Eq: {
1859       __ vceq(i.OutputSimd128Register(), i.InputSimd128Register(0),
1860               i.InputSimd128Register(1));
1861       break;
1862     }
1863     case kArmF32x4Ne: {
1864       Simd128Register dst = i.OutputSimd128Register();
1865       __ vceq(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
1866       __ vmvn(dst, dst);
1867       break;
1868     }
1869     case kArmF32x4Lt: {
1870       __ vcgt(i.OutputSimd128Register(), i.InputSimd128Register(1),
1871               i.InputSimd128Register(0));
1872       break;
1873     }
1874     case kArmF32x4Le: {
1875       __ vcge(i.OutputSimd128Register(), i.InputSimd128Register(1),
1876               i.InputSimd128Register(0));
1877       break;
1878     }
1879     case kArmI32x4Splat: {
1880       __ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0));
1881       break;
1882     }
1883     case kArmI32x4ExtractLane: {
1884       __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS32,
1885                      i.InputInt8(1));
1886       break;
1887     }
1888     case kArmI32x4ReplaceLane: {
1889       __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
1890                      i.InputRegister(2), NeonS32, i.InputInt8(1));
1891       break;
1892     }
1893     case kArmI32x4SConvertF32x4: {
1894       __ vcvt_s32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
1895       break;
1896     }
1897     case kArmI32x4SConvertI16x8Low: {
1898       __ vmovl(NeonS16, i.OutputSimd128Register(),
1899                i.InputSimd128Register(0).low());
1900       break;
1901     }
1902     case kArmI32x4SConvertI16x8High: {
1903       __ vmovl(NeonS16, i.OutputSimd128Register(),
1904                i.InputSimd128Register(0).high());
1905       break;
1906     }
1907     case kArmI32x4Neg: {
1908       __ vneg(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
1909       break;
1910     }
1911     case kArmI32x4Shl: {
1912       __ vshl(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1913               i.InputInt5(1));
1914       break;
1915     }
1916     case kArmI32x4ShrS: {
1917       __ vshr(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1918               i.InputInt5(1));
1919       break;
1920     }
1921     case kArmI32x4Add: {
1922       __ vadd(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1923               i.InputSimd128Register(1));
1924       break;
1925     }
1926     case kArmI32x4AddHoriz:
1927       ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon32);
1928       break;
1929     case kArmI32x4Sub: {
1930       __ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1931               i.InputSimd128Register(1));
1932       break;
1933     }
1934     case kArmI32x4Mul: {
1935       __ vmul(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1936               i.InputSimd128Register(1));
1937       break;
1938     }
1939     case kArmI32x4MinS: {
1940       __ vmin(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1941               i.InputSimd128Register(1));
1942       break;
1943     }
1944     case kArmI32x4MaxS: {
1945       __ vmax(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1946               i.InputSimd128Register(1));
1947       break;
1948     }
1949     case kArmI32x4Eq: {
1950       __ vceq(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1951               i.InputSimd128Register(1));
1952       break;
1953     }
1954     case kArmI32x4Ne: {
1955       Simd128Register dst = i.OutputSimd128Register();
1956       __ vceq(Neon32, dst, i.InputSimd128Register(0),
1957               i.InputSimd128Register(1));
1958       __ vmvn(dst, dst);
1959       break;
1960     }
1961     case kArmI32x4GtS: {
1962       __ vcgt(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1963               i.InputSimd128Register(1));
1964       break;
1965     }
1966     case kArmI32x4GeS: {
1967       __ vcge(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1968               i.InputSimd128Register(1));
1969       break;
1970     }
1971     case kArmI32x4UConvertF32x4: {
1972       __ vcvt_u32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
1973       break;
1974     }
1975     case kArmI32x4UConvertI16x8Low: {
1976       __ vmovl(NeonU16, i.OutputSimd128Register(),
1977                i.InputSimd128Register(0).low());
1978       break;
1979     }
1980     case kArmI32x4UConvertI16x8High: {
1981       __ vmovl(NeonU16, i.OutputSimd128Register(),
1982                i.InputSimd128Register(0).high());
1983       break;
1984     }
1985     case kArmI32x4ShrU: {
1986       __ vshr(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1987               i.InputInt5(1));
1988       break;
1989     }
1990     case kArmI32x4MinU: {
1991       __ vmin(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1992               i.InputSimd128Register(1));
1993       break;
1994     }
1995     case kArmI32x4MaxU: {
1996       __ vmax(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1997               i.InputSimd128Register(1));
1998       break;
1999     }
2000     case kArmI32x4GtU: {
2001       __ vcgt(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2002               i.InputSimd128Register(1));
2003       break;
2004     }
2005     case kArmI32x4GeU: {
2006       __ vcge(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2007               i.InputSimd128Register(1));
2008       break;
2009     }
2010     case kArmI16x8Splat: {
2011       __ vdup(Neon16, i.OutputSimd128Register(), i.InputRegister(0));
2012       break;
2013     }
2014     case kArmI16x8ExtractLane: {
2015       __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS16,
2016                      i.InputInt8(1));
2017       break;
2018     }
2019     case kArmI16x8ReplaceLane: {
2020       __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2021                      i.InputRegister(2), NeonS16, i.InputInt8(1));
2022       break;
2023     }
2024     case kArmI16x8SConvertI8x16Low: {
2025       __ vmovl(NeonS8, i.OutputSimd128Register(),
2026                i.InputSimd128Register(0).low());
2027       break;
2028     }
2029     case kArmI16x8SConvertI8x16High: {
2030       __ vmovl(NeonS8, i.OutputSimd128Register(),
2031                i.InputSimd128Register(0).high());
2032       break;
2033     }
2034     case kArmI16x8Neg: {
2035       __ vneg(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2036       break;
2037     }
2038     case kArmI16x8Shl: {
2039       __ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2040               i.InputInt4(1));
2041       break;
2042     }
2043     case kArmI16x8ShrS: {
2044       __ vshr(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2045               i.InputInt4(1));
2046       break;
2047     }
2048     case kArmI16x8SConvertI32x4:
2049       ASSEMBLE_NEON_NARROWING_OP(NeonS16);
2050       break;
2051     case kArmI16x8Add: {
2052       __ vadd(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2053               i.InputSimd128Register(1));
2054       break;
2055     }
2056     case kArmI16x8AddSaturateS: {
2057       __ vqadd(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2058                i.InputSimd128Register(1));
2059       break;
2060     }
2061     case kArmI16x8AddHoriz:
2062       ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon16);
2063       break;
2064     case kArmI16x8Sub: {
2065       __ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2066               i.InputSimd128Register(1));
2067       break;
2068     }
2069     case kArmI16x8SubSaturateS: {
2070       __ vqsub(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2071                i.InputSimd128Register(1));
2072       break;
2073     }
2074     case kArmI16x8Mul: {
2075       __ vmul(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2076               i.InputSimd128Register(1));
2077       break;
2078     }
2079     case kArmI16x8MinS: {
2080       __ vmin(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2081               i.InputSimd128Register(1));
2082       break;
2083     }
2084     case kArmI16x8MaxS: {
2085       __ vmax(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2086               i.InputSimd128Register(1));
2087       break;
2088     }
2089     case kArmI16x8Eq: {
2090       __ vceq(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2091               i.InputSimd128Register(1));
2092       break;
2093     }
2094     case kArmI16x8Ne: {
2095       Simd128Register dst = i.OutputSimd128Register();
2096       __ vceq(Neon16, dst, i.InputSimd128Register(0),
2097               i.InputSimd128Register(1));
2098       __ vmvn(dst, dst);
2099       break;
2100     }
2101     case kArmI16x8GtS: {
2102       __ vcgt(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2103               i.InputSimd128Register(1));
2104       break;
2105     }
2106     case kArmI16x8GeS: {
2107       __ vcge(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2108               i.InputSimd128Register(1));
2109       break;
2110     }
2111     case kArmI16x8UConvertI8x16Low: {
2112       __ vmovl(NeonU8, i.OutputSimd128Register(),
2113                i.InputSimd128Register(0).low());
2114       break;
2115     }
2116     case kArmI16x8UConvertI8x16High: {
2117       __ vmovl(NeonU8, i.OutputSimd128Register(),
2118                i.InputSimd128Register(0).high());
2119       break;
2120     }
2121     case kArmI16x8ShrU: {
2122       __ vshr(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2123               i.InputInt4(1));
2124       break;
2125     }
2126     case kArmI16x8UConvertI32x4:
2127       ASSEMBLE_NEON_NARROWING_OP(NeonU16);
2128       break;
2129     case kArmI16x8AddSaturateU: {
2130       __ vqadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2131                i.InputSimd128Register(1));
2132       break;
2133     }
2134     case kArmI16x8SubSaturateU: {
2135       __ vqsub(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2136                i.InputSimd128Register(1));
2137       break;
2138     }
2139     case kArmI16x8MinU: {
2140       __ vmin(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2141               i.InputSimd128Register(1));
2142       break;
2143     }
2144     case kArmI16x8MaxU: {
2145       __ vmax(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2146               i.InputSimd128Register(1));
2147       break;
2148     }
2149     case kArmI16x8GtU: {
2150       __ vcgt(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2151               i.InputSimd128Register(1));
2152       break;
2153     }
2154     case kArmI16x8GeU: {
2155       __ vcge(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2156               i.InputSimd128Register(1));
2157       break;
2158     }
2159     case kArmI8x16Splat: {
2160       __ vdup(Neon8, i.OutputSimd128Register(), i.InputRegister(0));
2161       break;
2162     }
2163     case kArmI8x16ExtractLane: {
2164       __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS8,
2165                      i.InputInt8(1));
2166       break;
2167     }
2168     case kArmI8x16ReplaceLane: {
2169       __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2170                      i.InputRegister(2), NeonS8, i.InputInt8(1));
2171       break;
2172     }
2173     case kArmI8x16Neg: {
2174       __ vneg(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2175       break;
2176     }
2177     case kArmI8x16Shl: {
2178       __ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2179               i.InputInt3(1));
2180       break;
2181     }
2182     case kArmI8x16ShrS: {
2183       __ vshr(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2184               i.InputInt3(1));
2185       break;
2186     }
2187     case kArmI8x16SConvertI16x8:
2188       ASSEMBLE_NEON_NARROWING_OP(NeonS8);
2189       break;
2190     case kArmI8x16Add: {
2191       __ vadd(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2192               i.InputSimd128Register(1));
2193       break;
2194     }
2195     case kArmI8x16AddSaturateS: {
2196       __ vqadd(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2197                i.InputSimd128Register(1));
2198       break;
2199     }
2200     case kArmI8x16Sub: {
2201       __ vsub(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2202               i.InputSimd128Register(1));
2203       break;
2204     }
2205     case kArmI8x16SubSaturateS: {
2206       __ vqsub(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2207                i.InputSimd128Register(1));
2208       break;
2209     }
2210     case kArmI8x16Mul: {
2211       __ vmul(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2212               i.InputSimd128Register(1));
2213       break;
2214     }
2215     case kArmI8x16MinS: {
2216       __ vmin(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2217               i.InputSimd128Register(1));
2218       break;
2219     }
2220     case kArmI8x16MaxS: {
2221       __ vmax(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2222               i.InputSimd128Register(1));
2223       break;
2224     }
2225     case kArmI8x16Eq: {
2226       __ vceq(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2227               i.InputSimd128Register(1));
2228       break;
2229     }
2230     case kArmI8x16Ne: {
2231       Simd128Register dst = i.OutputSimd128Register();
2232       __ vceq(Neon8, dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
2233       __ vmvn(dst, dst);
2234       break;
2235     }
2236     case kArmI8x16GtS: {
2237       __ vcgt(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2238               i.InputSimd128Register(1));
2239       break;
2240     }
2241     case kArmI8x16GeS: {
2242       __ vcge(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2243               i.InputSimd128Register(1));
2244       break;
2245     }
2246     case kArmI8x16ShrU: {
2247       __ vshr(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2248               i.InputInt3(1));
2249       break;
2250     }
2251     case kArmI8x16UConvertI16x8:
2252       ASSEMBLE_NEON_NARROWING_OP(NeonU8);
2253       break;
2254     case kArmI8x16AddSaturateU: {
2255       __ vqadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2256                i.InputSimd128Register(1));
2257       break;
2258     }
2259     case kArmI8x16SubSaturateU: {
2260       __ vqsub(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2261                i.InputSimd128Register(1));
2262       break;
2263     }
2264     case kArmI8x16MinU: {
2265       __ vmin(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2266               i.InputSimd128Register(1));
2267       break;
2268     }
2269     case kArmI8x16MaxU: {
2270       __ vmax(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2271               i.InputSimd128Register(1));
2272       break;
2273     }
2274     case kArmI8x16GtU: {
2275       __ vcgt(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2276               i.InputSimd128Register(1));
2277       break;
2278     }
2279     case kArmI8x16GeU: {
2280       __ vcge(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2281               i.InputSimd128Register(1));
2282       break;
2283     }
2284     case kArmS128Zero: {
2285       __ veor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2286               i.OutputSimd128Register());
2287       break;
2288     }
2289     case kArmS128Dup: {
2290       NeonSize size = static_cast<NeonSize>(i.InputInt32(1));
2291       int lanes = kSimd128Size >> size;
2292       int index = i.InputInt32(2);
2293       DCHECK(index < lanes);
2294       int d_lanes = lanes / 2;
2295       int src_d_index = index & (d_lanes - 1);
2296       int src_d_code = i.InputSimd128Register(0).low().code() + index / d_lanes;
2297       __ vdup(size, i.OutputSimd128Register(),
2298               DwVfpRegister::from_code(src_d_code), src_d_index);
2299       break;
2300     }
2301     case kArmS128And: {
2302       __ vand(i.OutputSimd128Register(), i.InputSimd128Register(0),
2303               i.InputSimd128Register(1));
2304       break;
2305     }
2306     case kArmS128Or: {
2307       __ vorr(i.OutputSimd128Register(), i.InputSimd128Register(0),
2308               i.InputSimd128Register(1));
2309       break;
2310     }
2311     case kArmS128Xor: {
2312       __ veor(i.OutputSimd128Register(), i.InputSimd128Register(0),
2313               i.InputSimd128Register(1));
2314       break;
2315     }
2316     case kArmS128Not: {
2317       __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0));
2318       break;
2319     }
2320     case kArmS128Select: {
2321       Simd128Register dst = i.OutputSimd128Register();
2322       DCHECK(dst == i.InputSimd128Register(0));
2323       __ vbsl(dst, i.InputSimd128Register(1), i.InputSimd128Register(2));
2324       break;
2325     }
2326     case kArmS32x4ZipLeft: {
2327       Simd128Register dst = i.OutputSimd128Register(),
2328                       src1 = i.InputSimd128Register(1);
2329       DCHECK(dst == i.InputSimd128Register(0));
2330       // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2331       __ vmov(dst.high(), src1.low());         // dst = [0, 1, 4, 5]
2332       __ vtrn(Neon32, dst.low(), dst.high());  // dst = [0, 4, 1, 5]
2333       break;
2334     }
2335     case kArmS32x4ZipRight: {
2336       Simd128Register dst = i.OutputSimd128Register(),
2337                       src1 = i.InputSimd128Register(1);
2338       DCHECK(dst == i.InputSimd128Register(0));
2339       // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from ZipLeft).
2340       __ vmov(dst.low(), src1.high());         // dst = [2, 3, 6, 7]
2341       __ vtrn(Neon32, dst.low(), dst.high());  // dst = [2, 6, 3, 7]
2342       break;
2343     }
2344     case kArmS32x4UnzipLeft: {
2345       Simd128Register dst = i.OutputSimd128Register(),
2346                       src1 = i.InputSimd128Register(1);
2347       DCHECK(dst == i.InputSimd128Register(0));
2348       UseScratchRegisterScope temps(tasm());
2349       Simd128Register scratch = temps.AcquireQ();
2350       // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2351       __ vmov(scratch, src1);
2352       __ vuzp(Neon32, dst, scratch);  // dst = [0, 2, 4, 6]
2353       break;
2354     }
2355     case kArmS32x4UnzipRight: {
2356       Simd128Register dst = i.OutputSimd128Register(),
2357                       src1 = i.InputSimd128Register(1);
2358       DCHECK(dst == i.InputSimd128Register(0));
2359       UseScratchRegisterScope temps(tasm());
2360       Simd128Register scratch = temps.AcquireQ();
2361       // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft).
2362       __ vmov(scratch, src1);
2363       __ vuzp(Neon32, scratch, dst);  // dst = [1, 3, 5, 7]
2364       break;
2365     }
2366     case kArmS32x4TransposeLeft: {
2367       Simd128Register dst = i.OutputSimd128Register(),
2368                       src1 = i.InputSimd128Register(1);
2369       DCHECK(dst == i.InputSimd128Register(0));
2370       UseScratchRegisterScope temps(tasm());
2371       Simd128Register scratch = temps.AcquireQ();
2372       // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2373       __ vmov(scratch, src1);
2374       __ vtrn(Neon32, dst, scratch);  // dst = [0, 4, 2, 6]
2375       break;
2376     }
2377     case kArmS32x4Shuffle: {
2378       Simd128Register dst = i.OutputSimd128Register(),
2379                       src0 = i.InputSimd128Register(0),
2380                       src1 = i.InputSimd128Register(1);
2381       DCHECK_NE(dst, src0);
2382       DCHECK_NE(dst, src1);
2383       // Perform shuffle as a vmov per lane.
2384       int dst_code = dst.code() * 4;
2385       int src0_code = src0.code() * 4;
2386       int src1_code = src1.code() * 4;
2387       int32_t shuffle = i.InputInt32(2);
2388       for (int i = 0; i < 4; i++) {
2389         int lane = shuffle & 0x7;
2390         int src_code = src0_code;
2391         if (lane >= 4) {
2392           src_code = src1_code;
2393           lane &= 0x3;
2394         }
2395         __ VmovExtended(dst_code + i, src_code + lane);
2396         shuffle >>= 8;
2397       }
2398       break;
2399     }
2400     case kArmS32x4TransposeRight: {
2401       Simd128Register dst = i.OutputSimd128Register(),
2402                       src1 = i.InputSimd128Register(1);
2403       UseScratchRegisterScope temps(tasm());
2404       Simd128Register scratch = temps.AcquireQ();
2405       DCHECK(dst == i.InputSimd128Register(0));
2406       // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft).
2407       __ vmov(scratch, src1);
2408       __ vtrn(Neon32, scratch, dst);  // dst = [1, 5, 3, 7]
2409       break;
2410     }
2411     case kArmS16x8ZipLeft: {
2412       Simd128Register dst = i.OutputSimd128Register(),
2413                       src1 = i.InputSimd128Register(1);
2414       // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2415       DCHECK(dst == i.InputSimd128Register(0));
2416       __ vmov(dst.high(), src1.low());         // dst = [0, 1, 2, 3, 8, ... 11]
2417       __ vzip(Neon16, dst.low(), dst.high());  // dst = [0, 8, 1, 9, ... 11]
2418       break;
2419     }
2420     case kArmS16x8ZipRight: {
2421       Simd128Register dst = i.OutputSimd128Register(),
2422                       src1 = i.InputSimd128Register(1);
2423       DCHECK(dst == i.InputSimd128Register(0));
2424       // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2425       __ vmov(dst.low(), src1.high());
2426       __ vzip(Neon16, dst.low(), dst.high());  // dst = [4, 12, 5, 13, ... 15]
2427       break;
2428     }
2429     case kArmS16x8UnzipLeft: {
2430       Simd128Register dst = i.OutputSimd128Register(),
2431                       src1 = i.InputSimd128Register(1);
2432       UseScratchRegisterScope temps(tasm());
2433       Simd128Register scratch = temps.AcquireQ();
2434       DCHECK(dst == i.InputSimd128Register(0));
2435       // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2436       __ vmov(scratch, src1);
2437       __ vuzp(Neon16, dst, scratch);  // dst = [0, 2, 4, 6, ... 14]
2438       break;
2439     }
2440     case kArmS16x8UnzipRight: {
2441       Simd128Register dst = i.OutputSimd128Register(),
2442                       src1 = i.InputSimd128Register(1);
2443       UseScratchRegisterScope temps(tasm());
2444       Simd128Register scratch = temps.AcquireQ();
2445       DCHECK(dst == i.InputSimd128Register(0));
2446       // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2447       __ vmov(scratch, src1);
2448       __ vuzp(Neon16, scratch, dst);  // dst = [1, 3, 5, 7, ... 15]
2449       break;
2450     }
2451     case kArmS16x8TransposeLeft: {
2452       Simd128Register dst = i.OutputSimd128Register(),
2453                       src1 = i.InputSimd128Register(1);
2454       UseScratchRegisterScope temps(tasm());
2455       Simd128Register scratch = temps.AcquireQ();
2456       DCHECK(dst == i.InputSimd128Register(0));
2457       // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2458       __ vmov(scratch, src1);
2459       __ vtrn(Neon16, dst, scratch);  // dst = [0, 8, 2, 10, ... 14]
2460       break;
2461     }
2462     case kArmS16x8TransposeRight: {
2463       Simd128Register dst = i.OutputSimd128Register(),
2464                       src1 = i.InputSimd128Register(1);
2465       UseScratchRegisterScope temps(tasm());
2466       Simd128Register scratch = temps.AcquireQ();
2467       DCHECK(dst == i.InputSimd128Register(0));
2468       // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2469       __ vmov(scratch, src1);
2470       __ vtrn(Neon16, scratch, dst);  // dst = [1, 9, 3, 11, ... 15]
2471       break;
2472     }
2473     case kArmS8x16ZipLeft: {
2474       Simd128Register dst = i.OutputSimd128Register(),
2475                       src1 = i.InputSimd128Register(1);
2476       DCHECK(dst == i.InputSimd128Register(0));
2477       // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2478       __ vmov(dst.high(), src1.low());
2479       __ vzip(Neon8, dst.low(), dst.high());  // dst = [0, 16, 1, 17, ... 23]
2480       break;
2481     }
2482     case kArmS8x16ZipRight: {
2483       Simd128Register dst = i.OutputSimd128Register(),
2484                       src1 = i.InputSimd128Register(1);
2485       DCHECK(dst == i.InputSimd128Register(0));
2486       // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
2487       __ vmov(dst.low(), src1.high());
2488       __ vzip(Neon8, dst.low(), dst.high());  // dst = [8, 24, 9, 25, ... 31]
2489       break;
2490     }
2491     case kArmS8x16UnzipLeft: {
2492       Simd128Register dst = i.OutputSimd128Register(),
2493                       src1 = i.InputSimd128Register(1);
2494       UseScratchRegisterScope temps(tasm());
2495       Simd128Register scratch = temps.AcquireQ();
2496       DCHECK(dst == i.InputSimd128Register(0));
2497       // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2498       __ vmov(scratch, src1);
2499       __ vuzp(Neon8, dst, scratch);  // dst = [0, 2, 4, 6, ... 30]
2500       break;
2501     }
2502     case kArmS8x16UnzipRight: {
2503       Simd128Register dst = i.OutputSimd128Register(),
2504                       src1 = i.InputSimd128Register(1);
2505       UseScratchRegisterScope temps(tasm());
2506       Simd128Register scratch = temps.AcquireQ();
2507       DCHECK(dst == i.InputSimd128Register(0));
2508       // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
2509       __ vmov(scratch, src1);
2510       __ vuzp(Neon8, scratch, dst);  // dst = [1, 3, 5, 7, ... 31]
2511       break;
2512     }
2513     case kArmS8x16TransposeLeft: {
2514       Simd128Register dst = i.OutputSimd128Register(),
2515                       src1 = i.InputSimd128Register(1);
2516       UseScratchRegisterScope temps(tasm());
2517       Simd128Register scratch = temps.AcquireQ();
2518       DCHECK(dst == i.InputSimd128Register(0));
2519       // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2520       __ vmov(scratch, src1);
2521       __ vtrn(Neon8, dst, scratch);  // dst = [0, 16, 2, 18, ... 30]
2522       break;
2523     }
2524     case kArmS8x16TransposeRight: {
2525       Simd128Register dst = i.OutputSimd128Register(),
2526                       src1 = i.InputSimd128Register(1);
2527       UseScratchRegisterScope temps(tasm());
2528       Simd128Register scratch = temps.AcquireQ();
2529       DCHECK(dst == i.InputSimd128Register(0));
2530       // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
2531       __ vmov(scratch, src1);
2532       __ vtrn(Neon8, scratch, dst);  // dst = [1, 17, 3, 19, ... 31]
2533       break;
2534     }
2535     case kArmS8x16Concat: {
2536       __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),
2537               i.InputSimd128Register(1), i.InputInt4(2));
2538       break;
2539     }
2540     case kArmS8x16Shuffle: {
2541       Simd128Register dst = i.OutputSimd128Register(),
2542                       src0 = i.InputSimd128Register(0),
2543                       src1 = i.InputSimd128Register(1);
2544       DwVfpRegister table_base = src0.low();
2545       UseScratchRegisterScope temps(tasm());
2546       Simd128Register scratch = temps.AcquireQ();
2547       // If unary shuffle, table is src0 (2 d-registers), otherwise src0 and
2548       // src1. They must be consecutive.
2549       int table_size = src0 == src1 ? 2 : 4;
2550       DCHECK_IMPLIES(src0 != src1, src0.code() + 1 == src1.code());
2551       // The shuffle lane mask is a byte mask, materialize in scratch.
2552       int scratch_s_base = scratch.code() * 4;
2553       for (int j = 0; j < 4; j++) {
2554         uint32_t four_lanes = i.InputUint32(2 + j);
2555         // Ensure byte indices are in [0, 31] so masks are never NaNs.
2556         four_lanes &= 0x1F1F1F1F;
2557         __ vmov(SwVfpRegister::from_code(scratch_s_base + j),
2558                 Float32::FromBits(four_lanes));
2559       }
2560       NeonListOperand table(table_base, table_size);
2561       if (dst != src0 && dst != src1) {
2562         __ vtbl(dst.low(), table, scratch.low());
2563         __ vtbl(dst.high(), table, scratch.high());
2564       } else {
2565         __ vtbl(scratch.low(), table, scratch.low());
2566         __ vtbl(scratch.high(), table, scratch.high());
2567         __ vmov(dst, scratch);
2568       }
2569       break;
2570     }
2571     case kArmS32x2Reverse: {
2572       __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
2573       break;
2574     }
2575     case kArmS16x4Reverse: {
2576       __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2577       break;
2578     }
2579     case kArmS16x2Reverse: {
2580       __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2581       break;
2582     }
2583     case kArmS8x8Reverse: {
2584       __ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2585       break;
2586     }
2587     case kArmS8x4Reverse: {
2588       __ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2589       break;
2590     }
2591     case kArmS8x2Reverse: {
2592       __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2593       break;
2594     }
2595     case kArmS1x4AnyTrue: {
2596       const QwNeonRegister& src = i.InputSimd128Register(0);
2597       UseScratchRegisterScope temps(tasm());
2598       DwVfpRegister scratch = temps.AcquireD();
2599       __ vpmax(NeonU32, scratch, src.low(), src.high());
2600       __ vpmax(NeonU32, scratch, scratch, scratch);
2601       __ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
2602       break;
2603     }
2604     case kArmS1x4AllTrue: {
2605       const QwNeonRegister& src = i.InputSimd128Register(0);
2606       UseScratchRegisterScope temps(tasm());
2607       DwVfpRegister scratch = temps.AcquireD();
2608       __ vpmin(NeonU32, scratch, src.low(), src.high());
2609       __ vpmin(NeonU32, scratch, scratch, scratch);
2610       __ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
2611       break;
2612     }
2613     case kArmS1x8AnyTrue: {
2614       const QwNeonRegister& src = i.InputSimd128Register(0);
2615       UseScratchRegisterScope temps(tasm());
2616       DwVfpRegister scratch = temps.AcquireD();
2617       __ vpmax(NeonU16, scratch, src.low(), src.high());
2618       __ vpmax(NeonU16, scratch, scratch, scratch);
2619       __ vpmax(NeonU16, scratch, scratch, scratch);
2620       __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0);
2621       break;
2622     }
2623     case kArmS1x8AllTrue: {
2624       const QwNeonRegister& src = i.InputSimd128Register(0);
2625       UseScratchRegisterScope temps(tasm());
2626       DwVfpRegister scratch = temps.AcquireD();
2627       __ vpmin(NeonU16, scratch, src.low(), src.high());
2628       __ vpmin(NeonU16, scratch, scratch, scratch);
2629       __ vpmin(NeonU16, scratch, scratch, scratch);
2630       __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0);
2631       break;
2632     }
2633     case kArmS1x16AnyTrue: {
2634       const QwNeonRegister& src = i.InputSimd128Register(0);
2635       UseScratchRegisterScope temps(tasm());
2636       QwNeonRegister q_scratch = temps.AcquireQ();
2637       DwVfpRegister d_scratch = q_scratch.low();
2638       __ vpmax(NeonU8, d_scratch, src.low(), src.high());
2639       __ vpmax(NeonU8, d_scratch, d_scratch, d_scratch);
2640       // vtst to detect any bits in the bottom 32 bits of d_scratch.
2641       // This saves an instruction vs. the naive sequence of vpmax.
2642       // kDoubleRegZero is not changed, since it is 0.
2643       __ vtst(Neon32, q_scratch, q_scratch, q_scratch);
2644       __ ExtractLane(i.OutputRegister(), d_scratch, NeonS32, 0);
2645       break;
2646     }
2647     case kArmS1x16AllTrue: {
2648       const QwNeonRegister& src = i.InputSimd128Register(0);
2649       UseScratchRegisterScope temps(tasm());
2650       DwVfpRegister scratch = temps.AcquireD();
2651       __ vpmin(NeonU8, scratch, src.low(), src.high());
2652       __ vpmin(NeonU8, scratch, scratch, scratch);
2653       __ vpmin(NeonU8, scratch, scratch, scratch);
2654       __ vpmin(NeonU8, scratch, scratch, scratch);
2655       __ ExtractLane(i.OutputRegister(), scratch, NeonS8, 0);
2656       break;
2657     }
2658     case kWord32AtomicLoadInt8:
2659       ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrsb);
2660       break;
2661     case kWord32AtomicLoadUint8:
2662       ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrb);
2663       break;
2664     case kWord32AtomicLoadInt16:
2665       ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrsh);
2666       break;
2667     case kWord32AtomicLoadUint16:
2668       ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrh);
2669       break;
2670     case kWord32AtomicLoadWord32:
2671       ASSEMBLE_ATOMIC_LOAD_INTEGER(ldr);
2672       break;
2673     case kWord32AtomicStoreWord8:
2674       ASSEMBLE_ATOMIC_STORE_INTEGER(strb);
2675       break;
2676     case kWord32AtomicStoreWord16:
2677       ASSEMBLE_ATOMIC_STORE_INTEGER(strh);
2678       break;
2679     case kWord32AtomicStoreWord32:
2680       ASSEMBLE_ATOMIC_STORE_INTEGER(str);
2681       break;
2682     case kWord32AtomicExchangeInt8:
2683       ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb);
2684       __ sxtb(i.OutputRegister(0), i.OutputRegister(0));
2685       break;
2686     case kWord32AtomicExchangeUint8:
2687     case kArmWord64AtomicNarrowExchangeUint8:
2688       ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb);
2689       ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(ExchangeUint8);
2690       break;
2691     case kWord32AtomicExchangeInt16:
2692       ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh);
2693       __ sxth(i.OutputRegister(0), i.OutputRegister(0));
2694       break;
2695     case kWord32AtomicExchangeUint16:
2696     case kArmWord64AtomicNarrowExchangeUint16:
2697       ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh);
2698       ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(ExchangeUint16);
2699       break;
2700     case kWord32AtomicExchangeWord32:
2701     case kArmWord64AtomicNarrowExchangeUint32:
2702       ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrex, strex);
2703       ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(ExchangeUint32);
2704       break;
2705     case kWord32AtomicCompareExchangeInt8:
2706       __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
2707       __ uxtb(i.TempRegister(2), i.InputRegister(2));
2708       ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexb, strexb,
2709                                                i.TempRegister(2));
2710       __ sxtb(i.OutputRegister(0), i.OutputRegister(0));
2711       break;
2712     case kWord32AtomicCompareExchangeUint8:
2713     case kArmWord64AtomicNarrowCompareExchangeUint8:
2714       __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
2715       __ uxtb(i.TempRegister(2), i.InputRegister(2));
2716       ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexb, strexb,
2717                                                i.TempRegister(2));
2718       ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(CompareExchangeUint8);
2719       break;
2720     case kWord32AtomicCompareExchangeInt16:
2721       __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
2722       __ uxth(i.TempRegister(2), i.InputRegister(2));
2723       ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexh, strexh,
2724                                                i.TempRegister(2));
2725       __ sxth(i.OutputRegister(0), i.OutputRegister(0));
2726       break;
2727     case kWord32AtomicCompareExchangeUint16:
2728     case kArmWord64AtomicNarrowCompareExchangeUint16:
2729       __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
2730       __ uxth(i.TempRegister(2), i.InputRegister(2));
2731       ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexh, strexh,
2732                                                i.TempRegister(2));
2733       ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(CompareExchangeUint16);
2734       break;
2735     case kWord32AtomicCompareExchangeWord32:
2736     case kArmWord64AtomicNarrowCompareExchangeUint32:
2737       __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
2738       ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrex, strex,
2739                                                i.InputRegister(2));
2740       ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(CompareExchangeUint32);
2741       break;
2742 #define ATOMIC_BINOP_CASE(op, inst)                    \
2743   case kWord32Atomic##op##Int8:                        \
2744     ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst);       \
2745     __ sxtb(i.OutputRegister(0), i.OutputRegister(0)); \
2746     break;                                             \
2747   case kWord32Atomic##op##Uint8:                       \
2748   case kArmWord64AtomicNarrow##op##Uint8:              \
2749     ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst);       \
2750     ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(op##Uint8);       \
2751     break;                                             \
2752   case kWord32Atomic##op##Int16:                       \
2753     ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst);       \
2754     __ sxth(i.OutputRegister(0), i.OutputRegister(0)); \
2755     break;                                             \
2756   case kWord32Atomic##op##Uint16:                      \
2757   case kArmWord64AtomicNarrow##op##Uint16:             \
2758     ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst);       \
2759     ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(op##Uint16);      \
2760     break;                                             \
2761   case kWord32Atomic##op##Word32:                      \
2762   case kArmWord64AtomicNarrow##op##Uint32:             \
2763     ASSEMBLE_ATOMIC_BINOP(ldrex, strex, inst);         \
2764     ATOMIC_NARROW_OP_CLEAR_HIGH_WORD(op##Uint32);      \
2765     break;
2766       ATOMIC_BINOP_CASE(Add, add)
2767       ATOMIC_BINOP_CASE(Sub, sub)
2768       ATOMIC_BINOP_CASE(And, and_)
2769       ATOMIC_BINOP_CASE(Or, orr)
2770       ATOMIC_BINOP_CASE(Xor, eor)
2771 #undef ATOMIC_BINOP_CASE
2772     case kArmWord32AtomicPairLoad:
2773       __ add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
2774       __ ldrexd(i.OutputRegister(0), i.OutputRegister(1), i.TempRegister(0));
2775       __ dmb(ISH);
2776       break;
2777     case kArmWord32AtomicPairStore: {
2778       Label store;
2779       __ add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
2780       __ dmb(ISH);
2781       __ bind(&store);
2782       __ ldrexd(i.TempRegister(1), i.TempRegister(2), i.TempRegister(0));
2783       __ strexd(i.TempRegister(1), i.InputRegister(2), i.InputRegister(3),
2784                 i.TempRegister(0));
2785       __ teq(i.TempRegister(1), Operand(0));
2786       __ b(ne, &store);
2787       __ dmb(ISH);
2788       break;
2789     }
2790 #define ATOMIC_ARITH_BINOP_CASE(op, instr1, instr2) \
2791   case kArmWord32AtomicPair##op: {                  \
2792     ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2);  \
2793     break;                                          \
2794   }
2795       ATOMIC_ARITH_BINOP_CASE(Add, add, adc)
2796       ATOMIC_ARITH_BINOP_CASE(Sub, sub, sbc)
2797 #undef ATOMIC_ARITH_BINOP_CASE
2798 #define ATOMIC_LOGIC_BINOP_CASE(op, instr) \
2799   case kArmWord32AtomicPair##op: {         \
2800     ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr);  \
2801     break;                                 \
2802   }
2803       ATOMIC_LOGIC_BINOP_CASE(And, and_)
2804       ATOMIC_LOGIC_BINOP_CASE(Or, orr)
2805       ATOMIC_LOGIC_BINOP_CASE(Xor, eor)
2806     case kArmWord32AtomicPairExchange: {
2807       Label exchange;
2808       __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3));
2809       __ dmb(ISH);
2810       __ bind(&exchange);
2811       __ ldrexd(i.OutputRegister(0), i.OutputRegister(1), i.TempRegister(0));
2812       __ strexd(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1),
2813                 i.TempRegister(0));
2814       __ teq(i.TempRegister(1), Operand(0));
2815       __ b(ne, &exchange);
2816       __ dmb(ISH);
2817       break;
2818     }
2819     case kArmWord32AtomicPairCompareExchange: {
2820       __ add(i.TempRegister(0), i.InputRegister(4), i.InputRegister(5));
2821       Label compareExchange;
2822       Label exit;
2823       __ dmb(ISH);
2824       __ bind(&compareExchange);
2825       __ ldrexd(i.OutputRegister(0), i.OutputRegister(1), i.TempRegister(0));
2826       __ teq(i.InputRegister(0), Operand(i.OutputRegister(0)));
2827       __ b(ne, &exit);
2828       __ teq(i.InputRegister(1), Operand(i.OutputRegister(1)));
2829       __ b(ne, &exit);
2830       __ strexd(i.TempRegister(1), i.InputRegister(2), i.InputRegister(3),
2831                 i.TempRegister(0));
2832       __ teq(i.TempRegister(1), Operand(0));
2833       __ b(ne, &compareExchange);
2834       __ bind(&exit);
2835       __ dmb(ISH);
2836       break;
2837     }
2838 #undef ATOMIC_LOGIC_BINOP_CASE
2839 #undef ATOMIC_NARROW_OP_CLEAR_HIGH_WORD
2840 #undef ASSEMBLE_ATOMIC_LOAD_INTEGER
2841 #undef ASSEMBLE_ATOMIC_STORE_INTEGER
2842 #undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
2843 #undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
2844 #undef ASSEMBLE_ATOMIC_BINOP
2845 #undef ASSEMBLE_ATOMIC64_ARITH_BINOP
2846 #undef ASSEMBLE_ATOMIC64_LOGIC_BINOP
2847 #undef ASSEMBLE_IEEE754_BINOP
2848 #undef ASSEMBLE_IEEE754_UNOP
2849 #undef ASSEMBLE_NEON_NARROWING_OP
2850 #undef ASSEMBLE_NEON_PAIRWISE_OP
2851   }
2852   return kSuccess;
2853 }  // NOLINT(readability/fn_size)
2854 
2855 
2856 // Assembles branches after an instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)2857 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
2858   ArmOperandConverter i(this, instr);
2859   Label* tlabel = branch->true_label;
2860   Label* flabel = branch->false_label;
2861   Condition cc = FlagsConditionToCondition(branch->condition);
2862   __ b(cc, tlabel);
2863   if (!branch->fallthru) __ b(flabel);  // no fallthru to flabel.
2864 }
2865 
AssembleBranchPoisoning(FlagsCondition condition,Instruction * instr)2866 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
2867                                             Instruction* instr) {
2868   // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
2869   if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
2870     return;
2871   }
2872 
2873   condition = NegateFlagsCondition(condition);
2874   __ eor(kSpeculationPoisonRegister, kSpeculationPoisonRegister,
2875          Operand(kSpeculationPoisonRegister), SBit::LeaveCC,
2876          FlagsConditionToCondition(condition));
2877   __ csdb();
2878 }
2879 
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)2880 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
2881                                             BranchInfo* branch) {
2882   AssembleArchBranch(instr, branch);
2883 }
2884 
AssembleArchJump(RpoNumber target)2885 void CodeGenerator::AssembleArchJump(RpoNumber target) {
2886   if (!IsNextInAssemblyOrder(target)) __ b(GetLabel(target));
2887 }
2888 
AssembleArchTrap(Instruction * instr,FlagsCondition condition)2889 void CodeGenerator::AssembleArchTrap(Instruction* instr,
2890                                      FlagsCondition condition) {
2891   class OutOfLineTrap final : public OutOfLineCode {
2892    public:
2893     OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
2894         : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
2895 
2896     void Generate() final {
2897       ArmOperandConverter i(gen_, instr_);
2898       TrapId trap_id =
2899           static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
2900       GenerateCallToTrap(trap_id);
2901     }
2902 
2903    private:
2904     void GenerateCallToTrap(TrapId trap_id) {
2905       if (trap_id == TrapId::kInvalid) {
2906         // We cannot test calls to the runtime in cctest/test-run-wasm.
2907         // Therefore we emit a call to C here instead of a call to the runtime.
2908         // We use the context register as the scratch register, because we do
2909         // not have a context here.
2910         __ PrepareCallCFunction(0, 0);
2911         __ CallCFunction(
2912             ExternalReference::wasm_call_trap_callback_for_testing(), 0);
2913         __ LeaveFrame(StackFrame::WASM_COMPILED);
2914         auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
2915         int pop_count =
2916             static_cast<int>(call_descriptor->StackParameterCount());
2917         __ Drop(pop_count);
2918         __ Ret();
2919       } else {
2920         gen_->AssembleSourcePosition(instr_);
2921         // A direct call to a wasm runtime stub defined in this module.
2922         // Just encode the stub index. This will be patched at relocation.
2923         __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
2924         ReferenceMap* reference_map =
2925             new (gen_->zone()) ReferenceMap(gen_->zone());
2926         gen_->RecordSafepoint(reference_map, Safepoint::kSimple, 0,
2927                               Safepoint::kNoLazyDeopt);
2928         if (FLAG_debug_code) {
2929           __ stop(GetAbortReason(AbortReason::kUnexpectedReturnFromWasmTrap));
2930         }
2931       }
2932     }
2933 
2934     Instruction* instr_;
2935     CodeGenerator* gen_;
2936   };
2937   auto ool = new (zone()) OutOfLineTrap(this, instr);
2938   Label* tlabel = ool->entry();
2939   Condition cc = FlagsConditionToCondition(condition);
2940   __ b(cc, tlabel);
2941 }
2942 
2943 // Assembles boolean materializations after an instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)2944 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
2945                                         FlagsCondition condition) {
2946   ArmOperandConverter i(this, instr);
2947 
2948   // Materialize a full 32-bit 1 or 0 value. The result register is always the
2949   // last output of the instruction.
2950   DCHECK_NE(0u, instr->OutputCount());
2951   Register reg = i.OutputRegister(instr->OutputCount() - 1);
2952   Condition cc = FlagsConditionToCondition(condition);
2953   __ mov(reg, Operand(0));
2954   __ mov(reg, Operand(1), LeaveCC, cc);
2955 }
2956 
AssembleArchBinarySearchSwitch(Instruction * instr)2957 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
2958   ArmOperandConverter i(this, instr);
2959   Register input = i.InputRegister(0);
2960   std::vector<std::pair<int32_t, Label*>> cases;
2961   for (size_t index = 2; index < instr->InputCount(); index += 2) {
2962     cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
2963   }
2964   AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
2965                                       cases.data() + cases.size());
2966 }
2967 
AssembleArchLookupSwitch(Instruction * instr)2968 void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
2969   ArmOperandConverter i(this, instr);
2970   Register input = i.InputRegister(0);
2971   for (size_t index = 2; index < instr->InputCount(); index += 2) {
2972     __ cmp(input, Operand(i.InputInt32(index + 0)));
2973     __ b(eq, GetLabel(i.InputRpo(index + 1)));
2974   }
2975   AssembleArchJump(i.InputRpo(1));
2976 }
2977 
2978 
AssembleArchTableSwitch(Instruction * instr)2979 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
2980   ArmOperandConverter i(this, instr);
2981   Register input = i.InputRegister(0);
2982   size_t const case_count = instr->InputCount() - 2;
2983   // Ensure to emit the constant pool first if necessary.
2984   __ CheckConstPool(true, true);
2985   __ cmp(input, Operand(case_count));
2986   __ BlockConstPoolFor(case_count + 2);
2987   __ add(pc, pc, Operand(input, LSL, 2), LeaveCC, lo);
2988   __ b(GetLabel(i.InputRpo(1)));
2989   for (size_t index = 0; index < case_count; ++index) {
2990     __ b(GetLabel(i.InputRpo(index + 2)));
2991   }
2992 }
2993 
FinishFrame(Frame * frame)2994 void CodeGenerator::FinishFrame(Frame* frame) {
2995   auto call_descriptor = linkage()->GetIncomingDescriptor();
2996 
2997   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
2998   if (saves_fp != 0) {
2999     frame->AlignSavedCalleeRegisterSlots();
3000   }
3001 
3002   if (saves_fp != 0) {
3003     // Save callee-saved FP registers.
3004     STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
3005     uint32_t last = base::bits::CountLeadingZeros32(saves_fp) - 1;
3006     uint32_t first = base::bits::CountTrailingZeros32(saves_fp);
3007     DCHECK_EQ((last - first + 1), base::bits::CountPopulation(saves_fp));
3008     frame->AllocateSavedCalleeRegisterSlots((last - first + 1) *
3009                                             (kDoubleSize / kPointerSize));
3010   }
3011   const RegList saves = call_descriptor->CalleeSavedRegisters();
3012   if (saves != 0) {
3013     // Save callee-saved registers.
3014     frame->AllocateSavedCalleeRegisterSlots(base::bits::CountPopulation(saves));
3015   }
3016 }
3017 
AssembleConstructFrame()3018 void CodeGenerator::AssembleConstructFrame() {
3019   auto call_descriptor = linkage()->GetIncomingDescriptor();
3020   if (frame_access_state()->has_frame()) {
3021     if (call_descriptor->IsCFunctionCall()) {
3022       __ Push(lr, fp);
3023       __ mov(fp, sp);
3024     } else if (call_descriptor->IsJSFunctionCall()) {
3025       __ Prologue();
3026       if (call_descriptor->PushArgumentCount()) {
3027         __ Push(kJavaScriptCallArgCountRegister);
3028       }
3029     } else {
3030       __ StubPrologue(info()->GetOutputStackFrameType());
3031       if (call_descriptor->IsWasmFunctionCall()) {
3032         __ Push(kWasmInstanceRegister);
3033       }
3034     }
3035 
3036     unwinding_info_writer_.MarkFrameConstructed(__ pc_offset());
3037   }
3038 
3039   int shrink_slots = frame()->GetTotalFrameSlotCount() -
3040                      call_descriptor->CalculateFixedFrameSize();
3041 
3042   if (info()->is_osr()) {
3043     // TurboFan OSR-compiled functions cannot be entered directly.
3044     __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
3045 
3046     // Unoptimized code jumps directly to this entrypoint while the unoptimized
3047     // frame is still on the stack. Optimized code uses OSR values directly from
3048     // the unoptimized frame. Thus, all that needs to be done is to allocate the
3049     // remaining stack slots.
3050     if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
3051     osr_pc_offset_ = __ pc_offset();
3052     shrink_slots -= osr_helper()->UnoptimizedFrameSlots();
3053     ResetSpeculationPoison();
3054   }
3055 
3056   const RegList saves = call_descriptor->CalleeSavedRegisters();
3057   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3058 
3059   if (shrink_slots > 0) {
3060     DCHECK(frame_access_state()->has_frame());
3061     if (info()->IsWasm() && shrink_slots > 128) {
3062       // For WebAssembly functions with big frames we have to do the stack
3063       // overflow check before we construct the frame. Otherwise we may not
3064       // have enough space on the stack to call the runtime for the stack
3065       // overflow.
3066       Label done;
3067 
3068       // If the frame is bigger than the stack, we throw the stack overflow
3069       // exception unconditionally. Thereby we can avoid the integer overflow
3070       // check in the condition code.
3071       if ((shrink_slots * kPointerSize) < (FLAG_stack_size * 1024)) {
3072         UseScratchRegisterScope temps(tasm());
3073         Register scratch = temps.Acquire();
3074         __ ldr(scratch, FieldMemOperand(
3075                             kWasmInstanceRegister,
3076                             WasmInstanceObject::kRealStackLimitAddressOffset));
3077         __ ldr(scratch, MemOperand(scratch));
3078         __ add(scratch, scratch, Operand(shrink_slots * kPointerSize));
3079         __ cmp(sp, scratch);
3080         __ b(cs, &done);
3081       }
3082 
3083       __ ldr(r2, FieldMemOperand(kWasmInstanceRegister,
3084                                  WasmInstanceObject::kCEntryStubOffset));
3085       __ Move(cp, Smi::kZero);
3086       __ CallRuntimeWithCEntry(Runtime::kThrowWasmStackOverflow, r2);
3087       // We come from WebAssembly, there are no references for the GC.
3088       ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
3089       RecordSafepoint(reference_map, Safepoint::kSimple, 0,
3090                       Safepoint::kNoLazyDeopt);
3091       if (FLAG_debug_code) {
3092         __ stop(GetAbortReason(AbortReason::kUnexpectedReturnFromThrow));
3093       }
3094 
3095       __ bind(&done);
3096     }
3097 
3098     // Skip callee-saved and return slots, which are pushed below.
3099     shrink_slots -= base::bits::CountPopulation(saves);
3100     shrink_slots -= frame()->GetReturnSlotCount();
3101     shrink_slots -= 2 * base::bits::CountPopulation(saves_fp);
3102     if (shrink_slots > 0) {
3103       __ sub(sp, sp, Operand(shrink_slots * kPointerSize));
3104     }
3105   }
3106 
3107   if (saves_fp != 0) {
3108     // Save callee-saved FP registers.
3109     STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
3110     uint32_t last = base::bits::CountLeadingZeros32(saves_fp) - 1;
3111     uint32_t first = base::bits::CountTrailingZeros32(saves_fp);
3112     DCHECK_EQ((last - first + 1), base::bits::CountPopulation(saves_fp));
3113     __ vstm(db_w, sp, DwVfpRegister::from_code(first),
3114             DwVfpRegister::from_code(last));
3115   }
3116 
3117   if (saves != 0) {
3118     // Save callee-saved registers.
3119     __ stm(db_w, sp, saves);
3120   }
3121 
3122   const int returns = frame()->GetReturnSlotCount();
3123   if (returns != 0) {
3124     // Create space for returns.
3125     __ sub(sp, sp, Operand(returns * kPointerSize));
3126   }
3127 }
3128 
AssembleReturn(InstructionOperand * pop)3129 void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
3130   auto call_descriptor = linkage()->GetIncomingDescriptor();
3131   int pop_count = static_cast<int>(call_descriptor->StackParameterCount());
3132 
3133   const int returns = frame()->GetReturnSlotCount();
3134   if (returns != 0) {
3135     // Free space of returns.
3136     __ add(sp, sp, Operand(returns * kPointerSize));
3137   }
3138 
3139   // Restore registers.
3140   const RegList saves = call_descriptor->CalleeSavedRegisters();
3141   if (saves != 0) {
3142     __ ldm(ia_w, sp, saves);
3143   }
3144 
3145   // Restore FP registers.
3146   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3147   if (saves_fp != 0) {
3148     STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
3149     uint32_t last = base::bits::CountLeadingZeros32(saves_fp) - 1;
3150     uint32_t first = base::bits::CountTrailingZeros32(saves_fp);
3151     __ vldm(ia_w, sp, DwVfpRegister::from_code(first),
3152             DwVfpRegister::from_code(last));
3153   }
3154 
3155   unwinding_info_writer_.MarkBlockWillExit();
3156 
3157   ArmOperandConverter g(this, nullptr);
3158   if (call_descriptor->IsCFunctionCall()) {
3159     AssembleDeconstructFrame();
3160   } else if (frame_access_state()->has_frame()) {
3161     // Canonicalize JSFunction return sites for now unless they have an variable
3162     // number of stack slot pops.
3163     if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
3164       if (return_label_.is_bound()) {
3165         __ b(&return_label_);
3166         return;
3167       } else {
3168         __ bind(&return_label_);
3169         AssembleDeconstructFrame();
3170       }
3171     } else {
3172       AssembleDeconstructFrame();
3173     }
3174   }
3175 
3176   if (pop->IsImmediate()) {
3177     DCHECK_EQ(Constant::kInt32, g.ToConstant(pop).type());
3178     pop_count += g.ToConstant(pop).ToInt32();
3179   } else {
3180     __ Drop(g.ToRegister(pop));
3181   }
3182   __ Drop(pop_count);
3183   __ Ret();
3184 }
3185 
FinishCode()3186 void CodeGenerator::FinishCode() { __ CheckConstPool(true, false); }
3187 
AssembleMove(InstructionOperand * source,InstructionOperand * destination)3188 void CodeGenerator::AssembleMove(InstructionOperand* source,
3189                                  InstructionOperand* destination) {
3190   ArmOperandConverter g(this, nullptr);
3191   // Helper function to write the given constant to the dst register.
3192   auto MoveConstantToRegister = [&](Register dst, Constant src) {
3193     if (src.type() == Constant::kHeapObject) {
3194       Handle<HeapObject> src_object = src.ToHeapObject();
3195       Heap::RootListIndex index;
3196       if (IsMaterializableFromRoot(src_object, &index)) {
3197         __ LoadRoot(dst, index);
3198       } else {
3199         __ Move(dst, src_object);
3200       }
3201     } else if (src.type() == Constant::kExternalReference) {
3202       __ Move(dst, src.ToExternalReference());
3203     } else {
3204       __ mov(dst, g.ToImmediate(source));
3205     }
3206   };
3207   switch (MoveType::InferMove(source, destination)) {
3208     case MoveType::kRegisterToRegister:
3209       if (source->IsRegister()) {
3210         __ mov(g.ToRegister(destination), g.ToRegister(source));
3211       } else if (source->IsFloatRegister()) {
3212         DCHECK(destination->IsFloatRegister());
3213         // GapResolver may give us reg codes that don't map to actual
3214         // s-registers. Generate code to work around those cases.
3215         int src_code = LocationOperand::cast(source)->register_code();
3216         int dst_code = LocationOperand::cast(destination)->register_code();
3217         __ VmovExtended(dst_code, src_code);
3218       } else if (source->IsDoubleRegister()) {
3219         __ Move(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
3220       } else {
3221         __ Move(g.ToSimd128Register(destination), g.ToSimd128Register(source));
3222       }
3223       return;
3224     case MoveType::kRegisterToStack: {
3225       MemOperand dst = g.ToMemOperand(destination);
3226       if (source->IsRegister()) {
3227         __ str(g.ToRegister(source), dst);
3228       } else if (source->IsFloatRegister()) {
3229         // GapResolver may give us reg codes that don't map to actual
3230         // s-registers. Generate code to work around those cases.
3231         int src_code = LocationOperand::cast(source)->register_code();
3232         __ VmovExtended(dst, src_code);
3233       } else if (source->IsDoubleRegister()) {
3234         __ vstr(g.ToDoubleRegister(source), dst);
3235       } else {
3236         UseScratchRegisterScope temps(tasm());
3237         Register temp = temps.Acquire();
3238         QwNeonRegister src = g.ToSimd128Register(source);
3239         __ add(temp, dst.rn(), Operand(dst.offset()));
3240         __ vst1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp));
3241       }
3242       return;
3243     }
3244     case MoveType::kStackToRegister: {
3245       MemOperand src = g.ToMemOperand(source);
3246       if (source->IsStackSlot()) {
3247         __ ldr(g.ToRegister(destination), src);
3248       } else if (source->IsFloatStackSlot()) {
3249         DCHECK(destination->IsFloatRegister());
3250         // GapResolver may give us reg codes that don't map to actual
3251         // s-registers. Generate code to work around those cases.
3252         int dst_code = LocationOperand::cast(destination)->register_code();
3253         __ VmovExtended(dst_code, src);
3254       } else if (source->IsDoubleStackSlot()) {
3255         __ vldr(g.ToDoubleRegister(destination), src);
3256       } else {
3257         UseScratchRegisterScope temps(tasm());
3258         Register temp = temps.Acquire();
3259         QwNeonRegister dst = g.ToSimd128Register(destination);
3260         __ add(temp, src.rn(), Operand(src.offset()));
3261         __ vld1(Neon8, NeonListOperand(dst.low(), 2), NeonMemOperand(temp));
3262       }
3263       return;
3264     }
3265     case MoveType::kStackToStack: {
3266       MemOperand src = g.ToMemOperand(source);
3267       MemOperand dst = g.ToMemOperand(destination);
3268       UseScratchRegisterScope temps(tasm());
3269       if (source->IsStackSlot() || source->IsFloatStackSlot()) {
3270         SwVfpRegister temp = temps.AcquireS();
3271         __ vldr(temp, src);
3272         __ vstr(temp, dst);
3273       } else if (source->IsDoubleStackSlot()) {
3274         DwVfpRegister temp = temps.AcquireD();
3275         __ vldr(temp, src);
3276         __ vstr(temp, dst);
3277       } else {
3278         DCHECK(source->IsSimd128StackSlot());
3279         Register temp = temps.Acquire();
3280         QwNeonRegister temp_q = temps.AcquireQ();
3281         __ add(temp, src.rn(), Operand(src.offset()));
3282         __ vld1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
3283         __ add(temp, dst.rn(), Operand(dst.offset()));
3284         __ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
3285       }
3286       return;
3287     }
3288     case MoveType::kConstantToRegister: {
3289       Constant src = g.ToConstant(source);
3290       if (destination->IsRegister()) {
3291         MoveConstantToRegister(g.ToRegister(destination), src);
3292       } else if (destination->IsFloatRegister()) {
3293         __ vmov(g.ToFloatRegister(destination),
3294                 Float32::FromBits(src.ToFloat32AsInt()));
3295       } else {
3296         // TODO(arm): Look into optimizing this further if possible. Supporting
3297         // the NEON version of VMOV may help.
3298         __ vmov(g.ToDoubleRegister(destination), src.ToFloat64());
3299       }
3300       return;
3301     }
3302     case MoveType::kConstantToStack: {
3303       Constant src = g.ToConstant(source);
3304       MemOperand dst = g.ToMemOperand(destination);
3305       if (destination->IsStackSlot()) {
3306         UseScratchRegisterScope temps(tasm());
3307         // Acquire a S register instead of a general purpose register in case
3308         // `vstr` needs one to compute the address of `dst`.
3309         SwVfpRegister s_temp = temps.AcquireS();
3310         {
3311           // TODO(arm): This sequence could be optimized further if necessary by
3312           // writing the constant directly into `s_temp`.
3313           UseScratchRegisterScope temps(tasm());
3314           Register temp = temps.Acquire();
3315           MoveConstantToRegister(temp, src);
3316           __ vmov(s_temp, temp);
3317         }
3318         __ vstr(s_temp, dst);
3319       } else if (destination->IsFloatStackSlot()) {
3320         UseScratchRegisterScope temps(tasm());
3321         SwVfpRegister temp = temps.AcquireS();
3322         __ vmov(temp, Float32::FromBits(src.ToFloat32AsInt()));
3323         __ vstr(temp, dst);
3324       } else {
3325         DCHECK(destination->IsDoubleStackSlot());
3326         UseScratchRegisterScope temps(tasm());
3327         DwVfpRegister temp = temps.AcquireD();
3328         // TODO(arm): Look into optimizing this further if possible. Supporting
3329         // the NEON version of VMOV may help.
3330         __ vmov(temp, src.ToFloat64());
3331         __ vstr(temp, g.ToMemOperand(destination));
3332       }
3333       return;
3334     }
3335   }
3336   UNREACHABLE();
3337 }
3338 
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)3339 void CodeGenerator::AssembleSwap(InstructionOperand* source,
3340                                  InstructionOperand* destination) {
3341   ArmOperandConverter g(this, nullptr);
3342   switch (MoveType::InferSwap(source, destination)) {
3343     case MoveType::kRegisterToRegister:
3344       if (source->IsRegister()) {
3345         __ Swap(g.ToRegister(source), g.ToRegister(destination));
3346       } else if (source->IsFloatRegister()) {
3347         DCHECK(destination->IsFloatRegister());
3348         // GapResolver may give us reg codes that don't map to actual
3349         // s-registers. Generate code to work around those cases.
3350         UseScratchRegisterScope temps(tasm());
3351         LowDwVfpRegister temp = temps.AcquireLowD();
3352         int src_code = LocationOperand::cast(source)->register_code();
3353         int dst_code = LocationOperand::cast(destination)->register_code();
3354         __ VmovExtended(temp.low().code(), src_code);
3355         __ VmovExtended(src_code, dst_code);
3356         __ VmovExtended(dst_code, temp.low().code());
3357       } else if (source->IsDoubleRegister()) {
3358         __ Swap(g.ToDoubleRegister(source), g.ToDoubleRegister(destination));
3359       } else {
3360         __ Swap(g.ToSimd128Register(source), g.ToSimd128Register(destination));
3361       }
3362       return;
3363     case MoveType::kRegisterToStack: {
3364       MemOperand dst = g.ToMemOperand(destination);
3365       if (source->IsRegister()) {
3366         Register src = g.ToRegister(source);
3367         UseScratchRegisterScope temps(tasm());
3368         SwVfpRegister temp = temps.AcquireS();
3369         __ vmov(temp, src);
3370         __ ldr(src, dst);
3371         __ vstr(temp, dst);
3372       } else if (source->IsFloatRegister()) {
3373         int src_code = LocationOperand::cast(source)->register_code();
3374         UseScratchRegisterScope temps(tasm());
3375         LowDwVfpRegister temp = temps.AcquireLowD();
3376         __ VmovExtended(temp.low().code(), src_code);
3377         __ VmovExtended(src_code, dst);
3378         __ vstr(temp.low(), dst);
3379       } else if (source->IsDoubleRegister()) {
3380         UseScratchRegisterScope temps(tasm());
3381         DwVfpRegister temp = temps.AcquireD();
3382         DwVfpRegister src = g.ToDoubleRegister(source);
3383         __ Move(temp, src);
3384         __ vldr(src, dst);
3385         __ vstr(temp, dst);
3386       } else {
3387         QwNeonRegister src = g.ToSimd128Register(source);
3388         UseScratchRegisterScope temps(tasm());
3389         Register temp = temps.Acquire();
3390         QwNeonRegister temp_q = temps.AcquireQ();
3391         __ Move(temp_q, src);
3392         __ add(temp, dst.rn(), Operand(dst.offset()));
3393         __ vld1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp));
3394         __ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
3395       }
3396       return;
3397     }
3398     case MoveType::kStackToStack: {
3399       MemOperand src = g.ToMemOperand(source);
3400       MemOperand dst = g.ToMemOperand(destination);
3401       if (source->IsStackSlot() || source->IsFloatStackSlot()) {
3402         UseScratchRegisterScope temps(tasm());
3403         SwVfpRegister temp_0 = temps.AcquireS();
3404         SwVfpRegister temp_1 = temps.AcquireS();
3405         __ vldr(temp_0, dst);
3406         __ vldr(temp_1, src);
3407         __ vstr(temp_0, src);
3408         __ vstr(temp_1, dst);
3409       } else if (source->IsDoubleStackSlot()) {
3410         UseScratchRegisterScope temps(tasm());
3411         LowDwVfpRegister temp = temps.AcquireLowD();
3412         if (temps.CanAcquireD()) {
3413           DwVfpRegister temp_0 = temp;
3414           DwVfpRegister temp_1 = temps.AcquireD();
3415           __ vldr(temp_0, dst);
3416           __ vldr(temp_1, src);
3417           __ vstr(temp_0, src);
3418           __ vstr(temp_1, dst);
3419         } else {
3420           // We only have a single D register available. However, we can split
3421           // it into 2 S registers and swap the slots 32 bits at a time.
3422           MemOperand src0 = src;
3423           MemOperand dst0 = dst;
3424           MemOperand src1(src.rn(), src.offset() + kFloatSize);
3425           MemOperand dst1(dst.rn(), dst.offset() + kFloatSize);
3426           SwVfpRegister temp_0 = temp.low();
3427           SwVfpRegister temp_1 = temp.high();
3428           __ vldr(temp_0, dst0);
3429           __ vldr(temp_1, src0);
3430           __ vstr(temp_0, src0);
3431           __ vstr(temp_1, dst0);
3432           __ vldr(temp_0, dst1);
3433           __ vldr(temp_1, src1);
3434           __ vstr(temp_0, src1);
3435           __ vstr(temp_1, dst1);
3436         }
3437       } else {
3438         DCHECK(source->IsSimd128StackSlot());
3439         MemOperand src0 = src;
3440         MemOperand dst0 = dst;
3441         MemOperand src1(src.rn(), src.offset() + kDoubleSize);
3442         MemOperand dst1(dst.rn(), dst.offset() + kDoubleSize);
3443         UseScratchRegisterScope temps(tasm());
3444         DwVfpRegister temp_0 = temps.AcquireD();
3445         DwVfpRegister temp_1 = temps.AcquireD();
3446         __ vldr(temp_0, dst0);
3447         __ vldr(temp_1, src0);
3448         __ vstr(temp_0, src0);
3449         __ vstr(temp_1, dst0);
3450         __ vldr(temp_0, dst1);
3451         __ vldr(temp_1, src1);
3452         __ vstr(temp_0, src1);
3453         __ vstr(temp_1, dst1);
3454       }
3455       return;
3456     }
3457     default:
3458       UNREACHABLE();
3459       break;
3460   }
3461 }
3462 
AssembleJumpTable(Label ** targets,size_t target_count)3463 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
3464   // On 32-bit ARM we emit the jump tables inline.
3465   UNREACHABLE();
3466 }
3467 
3468 #undef __
3469 
3470 }  // namespace compiler
3471 }  // namespace internal
3472 }  // namespace v8
3473