• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions
6 // are met:
7 //
8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 //
11 // - Redistribution in binary form must reproduce the above copyright
12 // notice, this list of conditions and the following disclaimer in the
13 // documentation and/or other materials provided with the
14 // distribution.
15 //
16 // - Neither the name of Sun Microsystems or the names of contributors may
17 // be used to endorse or promote products derived from this software without
18 // specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 // COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29 // STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
31 // OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 // The original source code covered by the above license above has been
34 // modified significantly by Google Inc.
35 // Copyright 2012 the V8 project authors. All rights reserved.
36 
37 #include "src/codegen/arm/assembler-arm.h"
38 
39 #if V8_TARGET_ARCH_ARM
40 
41 #include "src/base/bits.h"
42 #include "src/base/cpu.h"
43 #include "src/base/overflowing-math.h"
44 #include "src/codegen/arm/assembler-arm-inl.h"
45 #include "src/codegen/assembler-inl.h"
46 #include "src/codegen/machine-type.h"
47 #include "src/codegen/macro-assembler.h"
48 #include "src/codegen/string-constants.h"
49 #include "src/deoptimizer/deoptimizer.h"
50 #include "src/objects/objects-inl.h"
51 
52 namespace v8 {
53 namespace internal {
54 
55 static const unsigned kArmv6 = 0u;
56 static const unsigned kArmv7 = kArmv6 | (1u << ARMv7);
57 static const unsigned kArmv7WithSudiv = kArmv7 | (1u << ARMv7_SUDIV);
58 static const unsigned kArmv8 = kArmv7WithSudiv | (1u << ARMv8);
59 
CpuFeaturesFromCommandLine()60 static unsigned CpuFeaturesFromCommandLine() {
61   unsigned result;
62   if (strcmp(FLAG_arm_arch, "armv8") == 0) {
63     result = kArmv8;
64   } else if (strcmp(FLAG_arm_arch, "armv7+sudiv") == 0) {
65     result = kArmv7WithSudiv;
66   } else if (strcmp(FLAG_arm_arch, "armv7") == 0) {
67     result = kArmv7;
68   } else if (strcmp(FLAG_arm_arch, "armv6") == 0) {
69     result = kArmv6;
70   } else {
71     fprintf(stderr, "Error: unrecognised value for --arm-arch ('%s').\n",
72             FLAG_arm_arch);
73     fprintf(stderr,
74             "Supported values are:  armv8\n"
75             "                       armv7+sudiv\n"
76             "                       armv7\n"
77             "                       armv6\n");
78     FATAL("arm-arch");
79   }
80 
81   // If any of the old (deprecated) flags are specified, print a warning, but
82   // otherwise try to respect them for now.
83   // TODO(jbramley): When all the old bots have been updated, remove this.
84   if (FLAG_enable_armv7.has_value || FLAG_enable_vfp3.has_value ||
85       FLAG_enable_32dregs.has_value || FLAG_enable_neon.has_value ||
86       FLAG_enable_sudiv.has_value || FLAG_enable_armv8.has_value) {
87     // As an approximation of the old behaviour, set the default values from the
88     // arm_arch setting, then apply the flags over the top.
89     bool enable_armv7 = (result & (1u << ARMv7)) != 0;
90     bool enable_vfp3 = (result & (1u << ARMv7)) != 0;
91     bool enable_32dregs = (result & (1u << ARMv7)) != 0;
92     bool enable_neon = (result & (1u << ARMv7)) != 0;
93     bool enable_sudiv = (result & (1u << ARMv7_SUDIV)) != 0;
94     bool enable_armv8 = (result & (1u << ARMv8)) != 0;
95     if (FLAG_enable_armv7.has_value) {
96       fprintf(stderr,
97               "Warning: --enable_armv7 is deprecated. "
98               "Use --arm_arch instead.\n");
99       enable_armv7 = FLAG_enable_armv7.value;
100     }
101     if (FLAG_enable_vfp3.has_value) {
102       fprintf(stderr,
103               "Warning: --enable_vfp3 is deprecated. "
104               "Use --arm_arch instead.\n");
105       enable_vfp3 = FLAG_enable_vfp3.value;
106     }
107     if (FLAG_enable_32dregs.has_value) {
108       fprintf(stderr,
109               "Warning: --enable_32dregs is deprecated. "
110               "Use --arm_arch instead.\n");
111       enable_32dregs = FLAG_enable_32dregs.value;
112     }
113     if (FLAG_enable_neon.has_value) {
114       fprintf(stderr,
115               "Warning: --enable_neon is deprecated. "
116               "Use --arm_arch instead.\n");
117       enable_neon = FLAG_enable_neon.value;
118     }
119     if (FLAG_enable_sudiv.has_value) {
120       fprintf(stderr,
121               "Warning: --enable_sudiv is deprecated. "
122               "Use --arm_arch instead.\n");
123       enable_sudiv = FLAG_enable_sudiv.value;
124     }
125     if (FLAG_enable_armv8.has_value) {
126       fprintf(stderr,
127               "Warning: --enable_armv8 is deprecated. "
128               "Use --arm_arch instead.\n");
129       enable_armv8 = FLAG_enable_armv8.value;
130     }
131     // Emulate the old implications.
132     if (enable_armv8) {
133       enable_vfp3 = true;
134       enable_neon = true;
135       enable_32dregs = true;
136       enable_sudiv = true;
137     }
138     // Select the best available configuration.
139     if (enable_armv7 && enable_vfp3 && enable_32dregs && enable_neon) {
140       if (enable_sudiv) {
141         if (enable_armv8) {
142           result = kArmv8;
143         } else {
144           result = kArmv7WithSudiv;
145         }
146       } else {
147         result = kArmv7;
148       }
149     } else {
150       result = kArmv6;
151     }
152   }
153   return result;
154 }
155 
156 // Get the CPU features enabled by the build.
157 // For cross compilation the preprocessor symbols such as
158 // CAN_USE_ARMV7_INSTRUCTIONS and CAN_USE_VFP3_INSTRUCTIONS can be used to
159 // enable ARMv7 and VFPv3 instructions when building the snapshot. However,
160 // these flags should be consistent with a supported ARM configuration:
161 //  "armv6":       ARMv6 + VFPv2
162 //  "armv7":       ARMv7 + VFPv3-D32 + NEON
163 //  "armv7+sudiv": ARMv7 + VFPv4-D32 + NEON + SUDIV
164 //  "armv8":       ARMv8 (+ all of the above)
CpuFeaturesFromCompiler()165 static constexpr unsigned CpuFeaturesFromCompiler() {
166 // TODO(jbramley): Once the build flags are simplified, these tests should
167 // also be simplified.
168 
169 // Check *architectural* implications.
170 #if defined(CAN_USE_ARMV8_INSTRUCTIONS) && !defined(CAN_USE_ARMV7_INSTRUCTIONS)
171 #error "CAN_USE_ARMV8_INSTRUCTIONS should imply CAN_USE_ARMV7_INSTRUCTIONS"
172 #endif
173 #if defined(CAN_USE_ARMV8_INSTRUCTIONS) && !defined(CAN_USE_SUDIV)
174 #error "CAN_USE_ARMV8_INSTRUCTIONS should imply CAN_USE_SUDIV"
175 #endif
176 #if defined(CAN_USE_ARMV7_INSTRUCTIONS) != defined(CAN_USE_VFP3_INSTRUCTIONS)
177 // V8 requires VFP, and all ARMv7 devices with VFP have VFPv3. Similarly,
178 // VFPv3 isn't available before ARMv7.
179 #error "CAN_USE_ARMV7_INSTRUCTIONS should match CAN_USE_VFP3_INSTRUCTIONS"
180 #endif
181 #if defined(CAN_USE_NEON) && !defined(CAN_USE_ARMV7_INSTRUCTIONS)
182 #error "CAN_USE_NEON should imply CAN_USE_ARMV7_INSTRUCTIONS"
183 #endif
184 
185 // Find compiler-implied features.
186 #if defined(CAN_USE_ARMV8_INSTRUCTIONS) &&                           \
187     defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_SUDIV) && \
188     defined(CAN_USE_NEON) && defined(CAN_USE_VFP3_INSTRUCTIONS)
189   return kArmv8;
190 #elif defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_SUDIV) && \
191     defined(CAN_USE_NEON) && defined(CAN_USE_VFP3_INSTRUCTIONS)
192   return kArmv7WithSudiv;
193 #elif defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_NEON) && \
194     defined(CAN_USE_VFP3_INSTRUCTIONS)
195   return kArmv7;
196 #else
197   return kArmv6;
198 #endif
199 }
200 
SupportsWasmSimd128()201 bool CpuFeatures::SupportsWasmSimd128() { return IsSupported(NEON); }
202 
ProbeImpl(bool cross_compile)203 void CpuFeatures::ProbeImpl(bool cross_compile) {
204   dcache_line_size_ = 64;
205 
206   unsigned command_line = CpuFeaturesFromCommandLine();
207   // Only use statically determined features for cross compile (snapshot).
208   if (cross_compile) {
209     supported_ |= command_line & CpuFeaturesFromCompiler();
210     return;
211   }
212 
213 #ifndef __arm__
214   // For the simulator build, use whatever the flags specify.
215   supported_ |= command_line;
216 
217 #else  // __arm__
218   // Probe for additional features at runtime.
219   base::CPU cpu;
220   // Runtime detection is slightly fuzzy, and some inferences are necessary.
221   unsigned runtime = kArmv6;
222   // NEON and VFPv3 imply at least ARMv7-A.
223   if (cpu.has_neon() && cpu.has_vfp3_d32()) {
224     DCHECK(cpu.has_vfp3());
225     runtime |= kArmv7;
226     if (cpu.has_idiva()) {
227       runtime |= kArmv7WithSudiv;
228       if (cpu.architecture() >= 8) {
229         runtime |= kArmv8;
230       }
231     }
232   }
233 
234   // Use the best of the features found by CPU detection and those inferred from
235   // the build system. In both cases, restrict available features using the
236   // command-line. Note that the command-line flags are very permissive (kArmv8)
237   // by default.
238   supported_ |= command_line & CpuFeaturesFromCompiler();
239   supported_ |= command_line & runtime;
240 
241   // Additional tuning options.
242 
243   // ARM Cortex-A9 and Cortex-A5 have 32 byte cachelines.
244   if (cpu.implementer() == base::CPU::kArm &&
245       (cpu.part() == base::CPU::kArmCortexA5 ||
246        cpu.part() == base::CPU::kArmCortexA9)) {
247     dcache_line_size_ = 32;
248   }
249 #endif
250 
251   DCHECK_IMPLIES(IsSupported(ARMv7_SUDIV), IsSupported(ARMv7));
252   DCHECK_IMPLIES(IsSupported(ARMv8), IsSupported(ARMv7_SUDIV));
253 
254   // Set a static value on whether Simd is supported.
255   // This variable is only used for certain archs to query SupportWasmSimd128()
256   // at runtime in builtins using an extern ref. Other callers should use
257   // CpuFeatures::SupportWasmSimd128().
258   CpuFeatures::supports_wasm_simd_128_ = CpuFeatures::SupportsWasmSimd128();
259 }
260 
PrintTarget()261 void CpuFeatures::PrintTarget() {
262   const char* arm_arch = nullptr;
263   const char* arm_target_type = "";
264   const char* arm_no_probe = "";
265   const char* arm_fpu = "";
266   const char* arm_thumb = "";
267   const char* arm_float_abi = nullptr;
268 
269 #if !defined __arm__
270   arm_target_type = " simulator";
271 #endif
272 
273 #if defined ARM_TEST_NO_FEATURE_PROBE
274   arm_no_probe = " noprobe";
275 #endif
276 
277 #if defined CAN_USE_ARMV8_INSTRUCTIONS
278   arm_arch = "arm v8";
279 #elif defined CAN_USE_ARMV7_INSTRUCTIONS
280   arm_arch = "arm v7";
281 #else
282   arm_arch = "arm v6";
283 #endif
284 
285 #if defined CAN_USE_NEON
286   arm_fpu = " neon";
287 #elif defined CAN_USE_VFP3_INSTRUCTIONS
288 #if defined CAN_USE_VFP32DREGS
289   arm_fpu = " vfp3";
290 #else
291   arm_fpu = " vfp3-d16";
292 #endif
293 #else
294   arm_fpu = " vfp2";
295 #endif
296 
297 #ifdef __arm__
298   arm_float_abi = base::OS::ArmUsingHardFloat() ? "hard" : "softfp";
299 #elif USE_EABI_HARDFLOAT
300   arm_float_abi = "hard";
301 #else
302   arm_float_abi = "softfp";
303 #endif
304 
305 #if defined __arm__ && (defined __thumb__) || (defined __thumb2__)
306   arm_thumb = " thumb";
307 #endif
308 
309   printf("target%s%s %s%s%s %s\n", arm_target_type, arm_no_probe, arm_arch,
310          arm_fpu, arm_thumb, arm_float_abi);
311 }
312 
PrintFeatures()313 void CpuFeatures::PrintFeatures() {
314   printf("ARMv8=%d ARMv7=%d VFPv3=%d VFP32DREGS=%d NEON=%d SUDIV=%d",
315          CpuFeatures::IsSupported(ARMv8), CpuFeatures::IsSupported(ARMv7),
316          CpuFeatures::IsSupported(VFPv3), CpuFeatures::IsSupported(VFP32DREGS),
317          CpuFeatures::IsSupported(NEON), CpuFeatures::IsSupported(SUDIV));
318 #ifdef __arm__
319   bool eabi_hardfloat = base::OS::ArmUsingHardFloat();
320 #elif USE_EABI_HARDFLOAT
321   bool eabi_hardfloat = true;
322 #else
323   bool eabi_hardfloat = false;
324 #endif
325   printf(" USE_EABI_HARDFLOAT=%d\n", eabi_hardfloat);
326 }
327 
328 // -----------------------------------------------------------------------------
329 // Implementation of RelocInfo
330 
331 // static
332 const int RelocInfo::kApplyMask =
333     RelocInfo::ModeMask(RelocInfo::RELATIVE_CODE_TARGET);
334 
IsCodedSpecially()335 bool RelocInfo::IsCodedSpecially() {
336   // The deserializer needs to know whether a pointer is specially coded.  Being
337   // specially coded on ARM means that it is a movw/movt instruction. We don't
338   // generate those for relocatable pointers.
339   return false;
340 }
341 
IsInConstantPool()342 bool RelocInfo::IsInConstantPool() {
343   return Assembler::is_constant_pool_load(pc_);
344 }
345 
wasm_call_tag() const346 uint32_t RelocInfo::wasm_call_tag() const {
347   DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
348   return static_cast<uint32_t>(
349       Assembler::target_address_at(pc_, constant_pool_));
350 }
351 
352 // -----------------------------------------------------------------------------
353 // Implementation of Operand and MemOperand
354 // See assembler-arm-inl.h for inlined constructors
355 
Operand(Handle<HeapObject> handle)356 Operand::Operand(Handle<HeapObject> handle) {
357   rm_ = no_reg;
358   value_.immediate = static_cast<intptr_t>(handle.address());
359   rmode_ = RelocInfo::FULL_EMBEDDED_OBJECT;
360 }
361 
Operand(Register rm,ShiftOp shift_op,int shift_imm)362 Operand::Operand(Register rm, ShiftOp shift_op, int shift_imm) {
363   DCHECK(is_uint5(shift_imm));
364 
365   rm_ = rm;
366   rs_ = no_reg;
367   shift_op_ = shift_op;
368   shift_imm_ = shift_imm & 31;
369 
370   if ((shift_op == ROR) && (shift_imm == 0)) {
371     // ROR #0 is functionally equivalent to LSL #0 and this allow us to encode
372     // RRX as ROR #0 (See below).
373     shift_op = LSL;
374   } else if (shift_op == RRX) {
375     // encoded as ROR with shift_imm == 0
376     DCHECK_EQ(shift_imm, 0);
377     shift_op_ = ROR;
378     shift_imm_ = 0;
379   }
380 }
381 
Operand(Register rm,ShiftOp shift_op,Register rs)382 Operand::Operand(Register rm, ShiftOp shift_op, Register rs) {
383   DCHECK(shift_op != RRX);
384   rm_ = rm;
385   rs_ = no_reg;
386   shift_op_ = shift_op;
387   rs_ = rs;
388 }
389 
EmbeddedNumber(double value)390 Operand Operand::EmbeddedNumber(double value) {
391   int32_t smi;
392   if (DoubleToSmiInteger(value, &smi)) return Operand(Smi::FromInt(smi));
393   Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT);
394   result.is_heap_object_request_ = true;
395   result.value_.heap_object_request = HeapObjectRequest(value);
396   return result;
397 }
398 
EmbeddedStringConstant(const StringConstantBase * str)399 Operand Operand::EmbeddedStringConstant(const StringConstantBase* str) {
400   Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT);
401   result.is_heap_object_request_ = true;
402   result.value_.heap_object_request = HeapObjectRequest(str);
403   return result;
404 }
405 
MemOperand(Register rn,int32_t offset,AddrMode am)406 MemOperand::MemOperand(Register rn, int32_t offset, AddrMode am)
407     : rn_(rn), rm_(no_reg), offset_(offset), am_(am) {
408   // Accesses below the stack pointer are not safe, and are prohibited by the
409   // ABI. We can check obvious violations here.
410   if (rn == sp) {
411     if (am == Offset) DCHECK_LE(0, offset);
412     if (am == NegOffset) DCHECK_GE(0, offset);
413   }
414 }
415 
MemOperand(Register rn,Register rm,AddrMode am)416 MemOperand::MemOperand(Register rn, Register rm, AddrMode am)
417     : rn_(rn), rm_(rm), shift_op_(LSL), shift_imm_(0), am_(am) {}
418 
MemOperand(Register rn,Register rm,ShiftOp shift_op,int shift_imm,AddrMode am)419 MemOperand::MemOperand(Register rn, Register rm, ShiftOp shift_op,
420                        int shift_imm, AddrMode am)
421     : rn_(rn),
422       rm_(rm),
423       shift_op_(shift_op),
424       shift_imm_(shift_imm & 31),
425       am_(am) {
426   DCHECK(is_uint5(shift_imm));
427 }
428 
NeonMemOperand(Register rn,AddrMode am,int align)429 NeonMemOperand::NeonMemOperand(Register rn, AddrMode am, int align)
430     : rn_(rn), rm_(am == Offset ? pc : sp) {
431   DCHECK((am == Offset) || (am == PostIndex));
432   SetAlignment(align);
433 }
434 
NeonMemOperand(Register rn,Register rm,int align)435 NeonMemOperand::NeonMemOperand(Register rn, Register rm, int align)
436     : rn_(rn), rm_(rm) {
437   SetAlignment(align);
438 }
439 
SetAlignment(int align)440 void NeonMemOperand::SetAlignment(int align) {
441   switch (align) {
442     case 0:
443       align_ = 0;
444       break;
445     case 64:
446       align_ = 1;
447       break;
448     case 128:
449       align_ = 2;
450       break;
451     case 256:
452       align_ = 3;
453       break;
454     default:
455       UNREACHABLE();
456   }
457 }
458 
AllocateAndInstallRequestedHeapObjects(Isolate * isolate)459 void Assembler::AllocateAndInstallRequestedHeapObjects(Isolate* isolate) {
460   DCHECK_IMPLIES(isolate == nullptr, heap_object_requests_.empty());
461   for (auto& request : heap_object_requests_) {
462     Handle<HeapObject> object;
463     switch (request.kind()) {
464       case HeapObjectRequest::kHeapNumber:
465         object = isolate->factory()->NewHeapNumber<AllocationType::kOld>(
466             request.heap_number());
467         break;
468       case HeapObjectRequest::kStringConstant: {
469         const StringConstantBase* str = request.string();
470         CHECK_NOT_NULL(str);
471         object = str->AllocateStringConstant(isolate);
472         break;
473       }
474     }
475     Address pc = reinterpret_cast<Address>(buffer_start_) + request.offset();
476     Memory<Address>(constant_pool_entry_address(pc, 0 /* unused */)) =
477         object.address();
478   }
479 }
480 
481 // -----------------------------------------------------------------------------
482 // Specific instructions, constants, and masks.
483 
484 // str(r, MemOperand(sp, 4, NegPreIndex), al) instruction (aka push(r))
485 // register r is not encoded.
486 const Instr kPushRegPattern = al | B26 | 4 | NegPreIndex | sp.code() * B16;
487 // ldr(r, MemOperand(sp, 4, PostIndex), al) instruction (aka pop(r))
488 // register r is not encoded.
489 const Instr kPopRegPattern = al | B26 | L | 4 | PostIndex | sp.code() * B16;
490 // ldr rd, [pc, #offset]
491 const Instr kLdrPCImmedMask = 15 * B24 | 7 * B20 | 15 * B16;
492 const Instr kLdrPCImmedPattern = 5 * B24 | L | pc.code() * B16;
493 // Pc-relative call or jump to a signed imm24 offset.
494 // bl pc + #offset
495 // b  pc + #offset
496 const Instr kBOrBlPCImmedMask = 0xE * B24;
497 const Instr kBOrBlPCImmedPattern = 0xA * B24;
498 // vldr dd, [pc, #offset]
499 const Instr kVldrDPCMask = 15 * B24 | 3 * B20 | 15 * B16 | 15 * B8;
500 const Instr kVldrDPCPattern = 13 * B24 | L | pc.code() * B16 | 11 * B8;
501 // blxcc rm
502 const Instr kBlxRegMask =
503     15 * B24 | 15 * B20 | 15 * B16 | 15 * B12 | 15 * B8 | 15 * B4;
504 const Instr kBlxRegPattern = B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | BLX;
505 const Instr kBlxIp = al | kBlxRegPattern | ip.code();
506 const Instr kMovMvnMask = 0x6D * B21 | 0xF * B16;
507 const Instr kMovMvnPattern = 0xD * B21;
508 const Instr kMovMvnFlip = B22;
509 const Instr kMovLeaveCCMask = 0xDFF * B16;
510 const Instr kMovLeaveCCPattern = 0x1A0 * B16;
511 const Instr kMovwPattern = 0x30 * B20;
512 const Instr kMovtPattern = 0x34 * B20;
513 const Instr kMovwLeaveCCFlip = 0x5 * B21;
514 const Instr kMovImmedMask = 0x7F * B21;
515 const Instr kMovImmedPattern = 0x1D * B21;
516 const Instr kOrrImmedMask = 0x7F * B21;
517 const Instr kOrrImmedPattern = 0x1C * B21;
518 const Instr kCmpCmnMask = 0xDD * B20 | 0xF * B12;
519 const Instr kCmpCmnPattern = 0x15 * B20;
520 const Instr kCmpCmnFlip = B21;
521 const Instr kAddSubFlip = 0x6 * B21;
522 const Instr kAndBicFlip = 0xE * B21;
523 
524 // A mask for the Rd register for push, pop, ldr, str instructions.
525 const Instr kLdrRegFpOffsetPattern = al | B26 | L | Offset | fp.code() * B16;
526 const Instr kStrRegFpOffsetPattern = al | B26 | Offset | fp.code() * B16;
527 const Instr kLdrRegFpNegOffsetPattern =
528     al | B26 | L | NegOffset | fp.code() * B16;
529 const Instr kStrRegFpNegOffsetPattern = al | B26 | NegOffset | fp.code() * B16;
530 const Instr kLdrStrInstrTypeMask = 0xFFFF0000;
531 
Assembler(const AssemblerOptions & options,std::unique_ptr<AssemblerBuffer> buffer)532 Assembler::Assembler(const AssemblerOptions& options,
533                      std::unique_ptr<AssemblerBuffer> buffer)
534     : AssemblerBase(options, std::move(buffer)),
535       pending_32_bit_constants_(),
536       scratch_register_list_({ip}) {
537   reloc_info_writer.Reposition(buffer_start_ + buffer_->size(), pc_);
538   constant_pool_deadline_ = kMaxInt;
539   const_pool_blocked_nesting_ = 0;
540   no_const_pool_before_ = 0;
541   first_const_pool_32_use_ = -1;
542   last_bound_pos_ = 0;
543   if (CpuFeatures::IsSupported(VFP32DREGS)) {
544     // Register objects tend to be abstracted and survive between scopes, so
545     // it's awkward to use CpuFeatures::VFP32DREGS with CpuFeatureScope. To make
546     // its use consistent with other features, we always enable it if we can.
547     EnableCpuFeature(VFP32DREGS);
548     // Make sure we pick two D registers which alias a Q register. This way, we
549     // can use a Q as a scratch if NEON is supported.
550     scratch_vfp_register_list_ = d14.ToVfpRegList() | d15.ToVfpRegList();
551   } else {
552     // When VFP32DREGS is not supported, d15 become allocatable. Therefore we
553     // cannot use it as a scratch.
554     scratch_vfp_register_list_ = d14.ToVfpRegList();
555   }
556 }
557 
~Assembler()558 Assembler::~Assembler() {
559   DCHECK_EQ(const_pool_blocked_nesting_, 0);
560   DCHECK_EQ(first_const_pool_32_use_, -1);
561 }
562 
GetCode(Isolate * isolate,CodeDesc * desc,SafepointTableBuilder * safepoint_table_builder,int handler_table_offset)563 void Assembler::GetCode(Isolate* isolate, CodeDesc* desc,
564                         SafepointTableBuilder* safepoint_table_builder,
565                         int handler_table_offset) {
566   // As a crutch to avoid having to add manual Align calls wherever we use a
567   // raw workflow to create Code objects (mostly in tests), add another Align
568   // call here. It does no harm - the end of the Code object is aligned to the
569   // (larger) kCodeAlignment anyways.
570   // TODO(jgruber): Consider moving responsibility for proper alignment to
571   // metadata table builders (safepoint, handler, constant pool, code
572   // comments).
573   DataAlign(Code::kMetadataAlignment);
574 
575   // Emit constant pool if necessary.
576   CheckConstPool(true, false);
577   DCHECK(pending_32_bit_constants_.empty());
578 
579   int code_comments_size = WriteCodeComments();
580 
581   AllocateAndInstallRequestedHeapObjects(isolate);
582 
583   // Set up code descriptor.
584   // TODO(jgruber): Reconsider how these offsets and sizes are maintained up to
585   // this point to make CodeDesc initialization less fiddly.
586 
587   static constexpr int kConstantPoolSize = 0;
588   const int instruction_size = pc_offset();
589   const int code_comments_offset = instruction_size - code_comments_size;
590   const int constant_pool_offset = code_comments_offset - kConstantPoolSize;
591   const int handler_table_offset2 = (handler_table_offset == kNoHandlerTable)
592                                         ? constant_pool_offset
593                                         : handler_table_offset;
594   const int safepoint_table_offset =
595       (safepoint_table_builder == kNoSafepointTable)
596           ? handler_table_offset2
597           : safepoint_table_builder->safepoint_table_offset();
598   const int reloc_info_offset =
599       static_cast<int>(reloc_info_writer.pos() - buffer_->start());
600   CodeDesc::Initialize(desc, this, safepoint_table_offset,
601                        handler_table_offset2, constant_pool_offset,
602                        code_comments_offset, reloc_info_offset);
603 }
604 
Align(int m)605 void Assembler::Align(int m) {
606   DCHECK(m >= 4 && base::bits::IsPowerOfTwo(m));
607   DCHECK_EQ(pc_offset() & (kInstrSize - 1), 0);
608   while ((pc_offset() & (m - 1)) != 0) {
609     nop();
610   }
611 }
612 
CodeTargetAlign()613 void Assembler::CodeTargetAlign() {
614   // Preferred alignment of jump targets on some ARM chips.
615   Align(8);
616 }
617 
GetCondition(Instr instr)618 Condition Assembler::GetCondition(Instr instr) {
619   return Instruction::ConditionField(instr);
620 }
621 
IsLdrRegisterImmediate(Instr instr)622 bool Assembler::IsLdrRegisterImmediate(Instr instr) {
623   return (instr & (B27 | B26 | B25 | B22 | B20)) == (B26 | B20);
624 }
625 
IsVldrDRegisterImmediate(Instr instr)626 bool Assembler::IsVldrDRegisterImmediate(Instr instr) {
627   return (instr & (15 * B24 | 3 * B20 | 15 * B8)) == (13 * B24 | B20 | 11 * B8);
628 }
629 
GetLdrRegisterImmediateOffset(Instr instr)630 int Assembler::GetLdrRegisterImmediateOffset(Instr instr) {
631   DCHECK(IsLdrRegisterImmediate(instr));
632   bool positive = (instr & B23) == B23;
633   int offset = instr & kOff12Mask;  // Zero extended offset.
634   return positive ? offset : -offset;
635 }
636 
GetVldrDRegisterImmediateOffset(Instr instr)637 int Assembler::GetVldrDRegisterImmediateOffset(Instr instr) {
638   DCHECK(IsVldrDRegisterImmediate(instr));
639   bool positive = (instr & B23) == B23;
640   int offset = instr & kOff8Mask;  // Zero extended offset.
641   offset <<= 2;
642   return positive ? offset : -offset;
643 }
644 
SetLdrRegisterImmediateOffset(Instr instr,int offset)645 Instr Assembler::SetLdrRegisterImmediateOffset(Instr instr, int offset) {
646   DCHECK(IsLdrRegisterImmediate(instr));
647   bool positive = offset >= 0;
648   if (!positive) offset = -offset;
649   DCHECK(is_uint12(offset));
650   // Set bit indicating whether the offset should be added.
651   instr = (instr & ~B23) | (positive ? B23 : 0);
652   // Set the actual offset.
653   return (instr & ~kOff12Mask) | offset;
654 }
655 
SetVldrDRegisterImmediateOffset(Instr instr,int offset)656 Instr Assembler::SetVldrDRegisterImmediateOffset(Instr instr, int offset) {
657   DCHECK(IsVldrDRegisterImmediate(instr));
658   DCHECK((offset & ~3) == offset);  // Must be 64-bit aligned.
659   bool positive = offset >= 0;
660   if (!positive) offset = -offset;
661   DCHECK(is_uint10(offset));
662   // Set bit indicating whether the offset should be added.
663   instr = (instr & ~B23) | (positive ? B23 : 0);
664   // Set the actual offset. Its bottom 2 bits are zero.
665   return (instr & ~kOff8Mask) | (offset >> 2);
666 }
667 
IsStrRegisterImmediate(Instr instr)668 bool Assembler::IsStrRegisterImmediate(Instr instr) {
669   return (instr & (B27 | B26 | B25 | B22 | B20)) == B26;
670 }
671 
SetStrRegisterImmediateOffset(Instr instr,int offset)672 Instr Assembler::SetStrRegisterImmediateOffset(Instr instr, int offset) {
673   DCHECK(IsStrRegisterImmediate(instr));
674   bool positive = offset >= 0;
675   if (!positive) offset = -offset;
676   DCHECK(is_uint12(offset));
677   // Set bit indicating whether the offset should be added.
678   instr = (instr & ~B23) | (positive ? B23 : 0);
679   // Set the actual offset.
680   return (instr & ~kOff12Mask) | offset;
681 }
682 
IsAddRegisterImmediate(Instr instr)683 bool Assembler::IsAddRegisterImmediate(Instr instr) {
684   return (instr & (B27 | B26 | B25 | B24 | B23 | B22 | B21)) == (B25 | B23);
685 }
686 
SetAddRegisterImmediateOffset(Instr instr,int offset)687 Instr Assembler::SetAddRegisterImmediateOffset(Instr instr, int offset) {
688   DCHECK(IsAddRegisterImmediate(instr));
689   DCHECK_GE(offset, 0);
690   DCHECK(is_uint12(offset));
691   // Set the offset.
692   return (instr & ~kOff12Mask) | offset;
693 }
694 
GetRd(Instr instr)695 Register Assembler::GetRd(Instr instr) {
696   return Register::from_code(Instruction::RdValue(instr));
697 }
698 
GetRn(Instr instr)699 Register Assembler::GetRn(Instr instr) {
700   return Register::from_code(Instruction::RnValue(instr));
701 }
702 
GetRm(Instr instr)703 Register Assembler::GetRm(Instr instr) {
704   return Register::from_code(Instruction::RmValue(instr));
705 }
706 
IsPush(Instr instr)707 bool Assembler::IsPush(Instr instr) {
708   return ((instr & ~kRdMask) == kPushRegPattern);
709 }
710 
IsPop(Instr instr)711 bool Assembler::IsPop(Instr instr) {
712   return ((instr & ~kRdMask) == kPopRegPattern);
713 }
714 
IsStrRegFpOffset(Instr instr)715 bool Assembler::IsStrRegFpOffset(Instr instr) {
716   return ((instr & kLdrStrInstrTypeMask) == kStrRegFpOffsetPattern);
717 }
718 
IsLdrRegFpOffset(Instr instr)719 bool Assembler::IsLdrRegFpOffset(Instr instr) {
720   return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpOffsetPattern);
721 }
722 
IsStrRegFpNegOffset(Instr instr)723 bool Assembler::IsStrRegFpNegOffset(Instr instr) {
724   return ((instr & kLdrStrInstrTypeMask) == kStrRegFpNegOffsetPattern);
725 }
726 
IsLdrRegFpNegOffset(Instr instr)727 bool Assembler::IsLdrRegFpNegOffset(Instr instr) {
728   return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpNegOffsetPattern);
729 }
730 
IsLdrPcImmediateOffset(Instr instr)731 bool Assembler::IsLdrPcImmediateOffset(Instr instr) {
732   // Check the instruction is indeed a
733   // ldr<cond> <Rd>, [pc +/- offset_12].
734   return (instr & kLdrPCImmedMask) == kLdrPCImmedPattern;
735 }
736 
IsBOrBlPcImmediateOffset(Instr instr)737 bool Assembler::IsBOrBlPcImmediateOffset(Instr instr) {
738   return (instr & kBOrBlPCImmedMask) == kBOrBlPCImmedPattern;
739 }
740 
IsVldrDPcImmediateOffset(Instr instr)741 bool Assembler::IsVldrDPcImmediateOffset(Instr instr) {
742   // Check the instruction is indeed a
743   // vldr<cond> <Dd>, [pc +/- offset_10].
744   return (instr & kVldrDPCMask) == kVldrDPCPattern;
745 }
746 
IsBlxReg(Instr instr)747 bool Assembler::IsBlxReg(Instr instr) {
748   // Check the instruction is indeed a
749   // blxcc <Rm>
750   return (instr & kBlxRegMask) == kBlxRegPattern;
751 }
752 
IsBlxIp(Instr instr)753 bool Assembler::IsBlxIp(Instr instr) {
754   // Check the instruction is indeed a
755   // blx ip
756   return instr == kBlxIp;
757 }
758 
IsTstImmediate(Instr instr)759 bool Assembler::IsTstImmediate(Instr instr) {
760   return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask)) == (I | TST | S);
761 }
762 
IsCmpRegister(Instr instr)763 bool Assembler::IsCmpRegister(Instr instr) {
764   return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask | B4)) ==
765          (CMP | S);
766 }
767 
IsCmpImmediate(Instr instr)768 bool Assembler::IsCmpImmediate(Instr instr) {
769   return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask)) == (I | CMP | S);
770 }
771 
GetCmpImmediateRegister(Instr instr)772 Register Assembler::GetCmpImmediateRegister(Instr instr) {
773   DCHECK(IsCmpImmediate(instr));
774   return GetRn(instr);
775 }
776 
GetCmpImmediateRawImmediate(Instr instr)777 int Assembler::GetCmpImmediateRawImmediate(Instr instr) {
778   DCHECK(IsCmpImmediate(instr));
779   return instr & kOff12Mask;
780 }
781 
782 // Labels refer to positions in the (to be) generated code.
783 // There are bound, linked, and unused labels.
784 //
785 // Bound labels refer to known positions in the already
786 // generated code. pos() is the position the label refers to.
787 //
788 // Linked labels refer to unknown positions in the code
789 // to be generated; pos() is the position of the last
790 // instruction using the label.
791 //
792 // The linked labels form a link chain by making the branch offset
793 // in the instruction steam to point to the previous branch
794 // instruction using the same label.
795 //
796 // The link chain is terminated by a branch offset pointing to the
797 // same position.
798 
target_at(int pos)799 int Assembler::target_at(int pos) {
800   Instr instr = instr_at(pos);
801   if (is_uint24(instr)) {
802     // Emitted link to a label, not part of a branch.
803     return instr;
804   }
805   DCHECK_EQ(5 * B25, instr & 7 * B25);  // b, bl, or blx imm24
806   int imm26 = ((instr & kImm24Mask) << 8) >> 6;
807   if ((Instruction::ConditionField(instr) == kSpecialCondition) &&
808       ((instr & B24) != 0)) {
809     // blx uses bit 24 to encode bit 2 of imm26
810     imm26 += 2;
811   }
812   return pos + Instruction::kPcLoadDelta + imm26;
813 }
814 
target_at_put(int pos,int target_pos)815 void Assembler::target_at_put(int pos, int target_pos) {
816   Instr instr = instr_at(pos);
817   if (is_uint24(instr)) {
818     DCHECK(target_pos == pos || target_pos >= 0);
819     // Emitted link to a label, not part of a branch.
820     // Load the position of the label relative to the generated code object
821     // pointer in a register.
822 
823     // The existing code must be a single 24-bit label chain link, followed by
824     // nops encoding the destination register. See mov_label_offset.
825 
826     // Extract the destination register from the first nop instructions.
827     Register dst =
828         Register::from_code(Instruction::RmValue(instr_at(pos + kInstrSize)));
829     // In addition to the 24-bit label chain link, we expect to find one nop for
830     // ARMv7 and above, or two nops for ARMv6. See mov_label_offset.
831     DCHECK(IsNop(instr_at(pos + kInstrSize), dst.code()));
832     if (!CpuFeatures::IsSupported(ARMv7)) {
833       DCHECK(IsNop(instr_at(pos + 2 * kInstrSize), dst.code()));
834     }
835 
836     // Here are the instructions we need to emit:
837     //   For ARMv7: target24 => target16_1:target16_0
838     //      movw dst, #target16_0
839     //      movt dst, #target16_1
840     //   For ARMv6: target24 => target8_2:target8_1:target8_0
841     //      mov dst, #target8_0
842     //      orr dst, dst, #target8_1 << 8
843     //      orr dst, dst, #target8_2 << 16
844 
845     uint32_t target24 = target_pos + (Code::kHeaderSize - kHeapObjectTag);
846     CHECK(is_uint24(target24));
847     if (is_uint8(target24)) {
848       // If the target fits in a byte then only patch with a mov
849       // instruction.
850       PatchingAssembler patcher(
851           options(), reinterpret_cast<byte*>(buffer_start_ + pos), 1);
852       patcher.mov(dst, Operand(target24));
853     } else {
854       uint16_t target16_0 = target24 & kImm16Mask;
855       uint16_t target16_1 = target24 >> 16;
856       if (CpuFeatures::IsSupported(ARMv7)) {
857         // Patch with movw/movt.
858         if (target16_1 == 0) {
859           PatchingAssembler patcher(
860               options(), reinterpret_cast<byte*>(buffer_start_ + pos), 1);
861           CpuFeatureScope scope(&patcher, ARMv7);
862           patcher.movw(dst, target16_0);
863         } else {
864           PatchingAssembler patcher(
865               options(), reinterpret_cast<byte*>(buffer_start_ + pos), 2);
866           CpuFeatureScope scope(&patcher, ARMv7);
867           patcher.movw(dst, target16_0);
868           patcher.movt(dst, target16_1);
869         }
870       } else {
871         // Patch with a sequence of mov/orr/orr instructions.
872         uint8_t target8_0 = target16_0 & kImm8Mask;
873         uint8_t target8_1 = target16_0 >> 8;
874         uint8_t target8_2 = target16_1 & kImm8Mask;
875         if (target8_2 == 0) {
876           PatchingAssembler patcher(
877               options(), reinterpret_cast<byte*>(buffer_start_ + pos), 2);
878           patcher.mov(dst, Operand(target8_0));
879           patcher.orr(dst, dst, Operand(target8_1 << 8));
880         } else {
881           PatchingAssembler patcher(
882               options(), reinterpret_cast<byte*>(buffer_start_ + pos), 3);
883           patcher.mov(dst, Operand(target8_0));
884           patcher.orr(dst, dst, Operand(target8_1 << 8));
885           patcher.orr(dst, dst, Operand(target8_2 << 16));
886         }
887       }
888     }
889     return;
890   }
891   int imm26 = target_pos - (pos + Instruction::kPcLoadDelta);
892   DCHECK_EQ(5 * B25, instr & 7 * B25);  // b, bl, or blx imm24
893   if (Instruction::ConditionField(instr) == kSpecialCondition) {
894     // blx uses bit 24 to encode bit 2 of imm26
895     DCHECK_EQ(0, imm26 & 1);
896     instr = (instr & ~(B24 | kImm24Mask)) | ((imm26 & 2) >> 1) * B24;
897   } else {
898     DCHECK_EQ(0, imm26 & 3);
899     instr &= ~kImm24Mask;
900   }
901   int imm24 = imm26 >> 2;
902   CHECK(is_int24(imm24));
903   instr_at_put(pos, instr | (imm24 & kImm24Mask));
904 }
905 
print(const Label * L)906 void Assembler::print(const Label* L) {
907   if (L->is_unused()) {
908     PrintF("unused label\n");
909   } else if (L->is_bound()) {
910     PrintF("bound label to %d\n", L->pos());
911   } else if (L->is_linked()) {
912     Label l;
913     l.link_to(L->pos());
914     PrintF("unbound label");
915     while (l.is_linked()) {
916       PrintF("@ %d ", l.pos());
917       Instr instr = instr_at(l.pos());
918       if ((instr & ~kImm24Mask) == 0) {
919         PrintF("value\n");
920       } else {
921         DCHECK_EQ(instr & 7 * B25, 5 * B25);  // b, bl, or blx
922         Condition cond = Instruction::ConditionField(instr);
923         const char* b;
924         const char* c;
925         if (cond == kSpecialCondition) {
926           b = "blx";
927           c = "";
928         } else {
929           if ((instr & B24) != 0)
930             b = "bl";
931           else
932             b = "b";
933 
934           switch (cond) {
935             case eq:
936               c = "eq";
937               break;
938             case ne:
939               c = "ne";
940               break;
941             case hs:
942               c = "hs";
943               break;
944             case lo:
945               c = "lo";
946               break;
947             case mi:
948               c = "mi";
949               break;
950             case pl:
951               c = "pl";
952               break;
953             case vs:
954               c = "vs";
955               break;
956             case vc:
957               c = "vc";
958               break;
959             case hi:
960               c = "hi";
961               break;
962             case ls:
963               c = "ls";
964               break;
965             case ge:
966               c = "ge";
967               break;
968             case lt:
969               c = "lt";
970               break;
971             case gt:
972               c = "gt";
973               break;
974             case le:
975               c = "le";
976               break;
977             case al:
978               c = "";
979               break;
980             default:
981               c = "";
982               UNREACHABLE();
983           }
984         }
985         PrintF("%s%s\n", b, c);
986       }
987       next(&l);
988     }
989   } else {
990     PrintF("label in inconsistent state (pos = %d)\n", L->pos_);
991   }
992 }
993 
bind_to(Label * L,int pos)994 void Assembler::bind_to(Label* L, int pos) {
995   DCHECK(0 <= pos && pos <= pc_offset());  // must have a valid binding position
996   while (L->is_linked()) {
997     int fixup_pos = L->pos();
998     next(L);  // call next before overwriting link with target at fixup_pos
999     target_at_put(fixup_pos, pos);
1000   }
1001   L->bind_to(pos);
1002 
1003   // Keep track of the last bound label so we don't eliminate any instructions
1004   // before a bound label.
1005   if (pos > last_bound_pos_) last_bound_pos_ = pos;
1006 }
1007 
bind(Label * L)1008 void Assembler::bind(Label* L) {
1009   DCHECK(!L->is_bound());  // label can only be bound once
1010   bind_to(L, pc_offset());
1011 }
1012 
next(Label * L)1013 void Assembler::next(Label* L) {
1014   DCHECK(L->is_linked());
1015   int link = target_at(L->pos());
1016   if (link == L->pos()) {
1017     // Branch target points to the same instruction. This is the end of the link
1018     // chain.
1019     L->Unuse();
1020   } else {
1021     DCHECK_GE(link, 0);
1022     L->link_to(link);
1023   }
1024 }
1025 
1026 namespace {
1027 
1028 // Low-level code emission routines depending on the addressing mode.
1029 // If this returns true then you have to use the rotate_imm and immed_8
1030 // that it returns, because it may have already changed the instruction
1031 // to match them!
FitsShifter(uint32_t imm32,uint32_t * rotate_imm,uint32_t * immed_8,Instr * instr)1032 bool FitsShifter(uint32_t imm32, uint32_t* rotate_imm, uint32_t* immed_8,
1033                  Instr* instr) {
1034   // imm32 must be unsigned.
1035   {
1036     // 32-bit immediates can be encoded as:
1037     //   (8-bit value, 2*N bit left rotation)
1038     // e.g. 0xab00 can be encoded as 0xab shifted left by 8 == 2*4, i.e.
1039     //   (0xab, 4)
1040     //
1041     // Check three categories which cover all possible shifter fits:
1042     //   1. 0x000000FF: The value is already 8-bit (no shifting necessary),
1043     //   2. 0x000FF000: The 8-bit value is somewhere in the middle of the 32-bit
1044     //                  value, and
1045     //   3. 0xF000000F: The 8-bit value is split over the beginning and end of
1046     //                  the 32-bit value.
1047 
1048     // For 0x000000FF.
1049     if (imm32 <= 0xFF) {
1050       *rotate_imm = 0;
1051       *immed_8 = imm32;
1052       return true;
1053     }
1054     // For 0x000FF000, count trailing zeros and shift down to 0x000000FF. Note
1055     // that we have to round the trailing zeros down to the nearest multiple of
1056     // two, since we can only encode shifts of 2*N. Note also that we know that
1057     // imm32 isn't zero, since we already checked if it's less than 0xFF.
1058     int half_trailing_zeros = base::bits::CountTrailingZerosNonZero(imm32) / 2;
1059     uint32_t imm8 = imm32 >> (half_trailing_zeros * 2);
1060     if (imm8 <= 0xFF) {
1061       DCHECK_GT(half_trailing_zeros, 0);
1062       // Rotating right by trailing_zeros is equivalent to rotating left by
1063       // 32 - trailing_zeros. We return rotate_right / 2, so calculate
1064       // (32 - trailing_zeros)/2 == 16 - trailing_zeros/2.
1065       *rotate_imm = (16 - half_trailing_zeros);
1066       *immed_8 = imm8;
1067       return true;
1068     }
1069     // For 0xF000000F, rotate by 16 to get 0x000FF000 and continue as if it
1070     // were that case.
1071     uint32_t imm32_rot16 = base::bits::RotateLeft32(imm32, 16);
1072     half_trailing_zeros =
1073         base::bits::CountTrailingZerosNonZero(imm32_rot16) / 2;
1074     imm8 = imm32_rot16 >> (half_trailing_zeros * 2);
1075     if (imm8 <= 0xFF) {
1076       // We've rotated left by 2*8, so we can't have more than that many
1077       // trailing zeroes.
1078       DCHECK_LT(half_trailing_zeros, 8);
1079       // We've already rotated by 2*8, before calculating trailing_zeros/2,
1080       // so we need (32 - (16 + trailing_zeros))/2 == 8 - trailing_zeros/2.
1081       *rotate_imm = 8 - half_trailing_zeros;
1082       *immed_8 = imm8;
1083       return true;
1084     }
1085   }
1086   // If the opcode is one with a complementary version and the complementary
1087   // immediate fits, change the opcode.
1088   if (instr != nullptr) {
1089     if ((*instr & kMovMvnMask) == kMovMvnPattern) {
1090       if (FitsShifter(~imm32, rotate_imm, immed_8, nullptr)) {
1091         *instr ^= kMovMvnFlip;
1092         return true;
1093       } else if ((*instr & kMovLeaveCCMask) == kMovLeaveCCPattern) {
1094         if (CpuFeatures::IsSupported(ARMv7)) {
1095           if (imm32 < 0x10000) {
1096             *instr ^= kMovwLeaveCCFlip;
1097             *instr |= Assembler::EncodeMovwImmediate(imm32);
1098             *rotate_imm = *immed_8 = 0;  // Not used for movw.
1099             return true;
1100           }
1101         }
1102       }
1103     } else if ((*instr & kCmpCmnMask) == kCmpCmnPattern) {
1104       if (FitsShifter(-static_cast<int>(imm32), rotate_imm, immed_8, nullptr)) {
1105         *instr ^= kCmpCmnFlip;
1106         return true;
1107       }
1108     } else {
1109       Instr alu_insn = (*instr & kALUMask);
1110       if (alu_insn == ADD || alu_insn == SUB) {
1111         if (FitsShifter(-static_cast<int>(imm32), rotate_imm, immed_8,
1112                         nullptr)) {
1113           *instr ^= kAddSubFlip;
1114           return true;
1115         }
1116       } else if (alu_insn == AND || alu_insn == BIC) {
1117         if (FitsShifter(~imm32, rotate_imm, immed_8, nullptr)) {
1118           *instr ^= kAndBicFlip;
1119           return true;
1120         }
1121       }
1122     }
1123   }
1124   return false;
1125 }
1126 
1127 // We have to use the temporary register for things that can be relocated even
1128 // if they can be encoded in the ARM's 12 bits of immediate-offset instruction
1129 // space.  There is no guarantee that the relocated location can be similarly
1130 // encoded.
MustOutputRelocInfo(RelocInfo::Mode rmode,const Assembler * assembler)1131 bool MustOutputRelocInfo(RelocInfo::Mode rmode, const Assembler* assembler) {
1132   if (RelocInfo::IsOnlyForSerializer(rmode)) {
1133     if (assembler->predictable_code_size()) return true;
1134     return assembler->options().record_reloc_info_for_serialization;
1135   } else if (RelocInfo::IsNoInfo(rmode)) {
1136     return false;
1137   }
1138   return true;
1139 }
1140 
UseMovImmediateLoad(const Operand & x,const Assembler * assembler)1141 bool UseMovImmediateLoad(const Operand& x, const Assembler* assembler) {
1142   DCHECK_NOT_NULL(assembler);
1143   if (x.MustOutputRelocInfo(assembler)) {
1144     // Prefer constant pool if data is likely to be patched.
1145     return false;
1146   } else {
1147     // Otherwise, use immediate load if movw / movt is available.
1148     return CpuFeatures::IsSupported(ARMv7);
1149   }
1150 }
1151 
1152 }  // namespace
1153 
MustOutputRelocInfo(const Assembler * assembler) const1154 bool Operand::MustOutputRelocInfo(const Assembler* assembler) const {
1155   return v8::internal::MustOutputRelocInfo(rmode_, assembler);
1156 }
1157 
InstructionsRequired(const Assembler * assembler,Instr instr) const1158 int Operand::InstructionsRequired(const Assembler* assembler,
1159                                   Instr instr) const {
1160   DCHECK_NOT_NULL(assembler);
1161   if (rm_.is_valid()) return 1;
1162   uint32_t dummy1, dummy2;
1163   if (MustOutputRelocInfo(assembler) ||
1164       !FitsShifter(immediate(), &dummy1, &dummy2, &instr)) {
1165     // The immediate operand cannot be encoded as a shifter operand, or use of
1166     // constant pool is required.  First account for the instructions required
1167     // for the constant pool or immediate load
1168     int instructions;
1169     if (UseMovImmediateLoad(*this, assembler)) {
1170       DCHECK(CpuFeatures::IsSupported(ARMv7));
1171       // A movw / movt immediate load.
1172       instructions = 2;
1173     } else {
1174       // A small constant pool load.
1175       instructions = 1;
1176     }
1177     if ((instr & ~kCondMask) != 13 * B21) {  // mov, S not set
1178       // For a mov or mvn instruction which doesn't set the condition
1179       // code, the constant pool or immediate load is enough, otherwise we need
1180       // to account for the actual instruction being requested.
1181       instructions += 1;
1182     }
1183     return instructions;
1184   } else {
1185     // No use of constant pool and the immediate operand can be encoded as a
1186     // shifter operand.
1187     return 1;
1188   }
1189 }
1190 
Move32BitImmediate(Register rd,const Operand & x,Condition cond)1191 void Assembler::Move32BitImmediate(Register rd, const Operand& x,
1192                                    Condition cond) {
1193   if (UseMovImmediateLoad(x, this)) {
1194     CpuFeatureScope scope(this, ARMv7);
1195     // UseMovImmediateLoad should return false when we need to output
1196     // relocation info, since we prefer the constant pool for values that
1197     // can be patched.
1198     DCHECK(!x.MustOutputRelocInfo(this));
1199     UseScratchRegisterScope temps(this);
1200     // Re-use the destination register as a scratch if possible.
1201     Register target = rd != pc && rd != sp ? rd : temps.Acquire();
1202     uint32_t imm32 = static_cast<uint32_t>(x.immediate());
1203     movw(target, imm32 & 0xFFFF, cond);
1204     movt(target, imm32 >> 16, cond);
1205     if (target.code() != rd.code()) {
1206       mov(rd, target, LeaveCC, cond);
1207     }
1208   } else {
1209     int32_t immediate;
1210     if (x.IsHeapObjectRequest()) {
1211       RequestHeapObject(x.heap_object_request());
1212       immediate = 0;
1213     } else {
1214       immediate = x.immediate();
1215     }
1216     ConstantPoolAddEntry(pc_offset(), x.rmode_, immediate);
1217     ldr_pcrel(rd, 0, cond);
1218   }
1219 }
1220 
AddrMode1(Instr instr,Register rd,Register rn,const Operand & x)1221 void Assembler::AddrMode1(Instr instr, Register rd, Register rn,
1222                           const Operand& x) {
1223   CheckBuffer();
1224   uint32_t opcode = instr & kOpCodeMask;
1225   bool set_flags = (instr & S) != 0;
1226   DCHECK((opcode == ADC) || (opcode == ADD) || (opcode == AND) ||
1227          (opcode == BIC) || (opcode == EOR) || (opcode == ORR) ||
1228          (opcode == RSB) || (opcode == RSC) || (opcode == SBC) ||
1229          (opcode == SUB) || (opcode == CMN) || (opcode == CMP) ||
1230          (opcode == TEQ) || (opcode == TST) || (opcode == MOV) ||
1231          (opcode == MVN));
1232   // For comparison instructions, rd is not defined.
1233   DCHECK(rd.is_valid() || (opcode == CMN) || (opcode == CMP) ||
1234          (opcode == TEQ) || (opcode == TST));
1235   // For move instructions, rn is not defined.
1236   DCHECK(rn.is_valid() || (opcode == MOV) || (opcode == MVN));
1237   DCHECK(rd.is_valid() || rn.is_valid());
1238   DCHECK_EQ(instr & ~(kCondMask | kOpCodeMask | S), 0);
1239   if (!AddrMode1TryEncodeOperand(&instr, x)) {
1240     DCHECK(x.IsImmediate());
1241     // Upon failure to encode, the opcode should not have changed.
1242     DCHECK(opcode == (instr & kOpCodeMask));
1243     UseScratchRegisterScope temps(this);
1244     Condition cond = Instruction::ConditionField(instr);
1245     if ((opcode == MOV) && !set_flags) {
1246       // Generate a sequence of mov instructions or a load from the constant
1247       // pool only for a MOV instruction which does not set the flags.
1248       DCHECK(!rn.is_valid());
1249       Move32BitImmediate(rd, x, cond);
1250     } else if ((opcode == ADD) && !set_flags && (rd == rn) &&
1251                !temps.CanAcquire()) {
1252       // Split the operation into a sequence of additions if we cannot use a
1253       // scratch register. In this case, we cannot re-use rn and the assembler
1254       // does not have any scratch registers to spare.
1255       uint32_t imm = x.immediate();
1256       do {
1257         // The immediate encoding format is composed of 8 bits of data and 4
1258         // bits encoding a rotation. Each of the 16 possible rotations accounts
1259         // for a rotation by an even number.
1260         //   4 bits -> 16 rotations possible
1261         //          -> 16 rotations of 2 bits each fits in a 32-bit value.
1262         // This means that finding the even number of trailing zeroes of the
1263         // immediate allows us to more efficiently split it:
1264         int trailing_zeroes = base::bits::CountTrailingZeros(imm) & ~1u;
1265         uint32_t mask = (0xFF << trailing_zeroes);
1266         add(rd, rd, Operand(imm & mask), LeaveCC, cond);
1267         imm = imm & ~mask;
1268       } while (!ImmediateFitsAddrMode1Instruction(imm));
1269       add(rd, rd, Operand(imm), LeaveCC, cond);
1270     } else {
1271       // The immediate operand cannot be encoded as a shifter operand, so load
1272       // it first to a scratch register and change the original instruction to
1273       // use it.
1274       // Re-use the destination register if possible.
1275       Register scratch = (rd.is_valid() && rd != rn && rd != pc && rd != sp)
1276                              ? rd
1277                              : temps.Acquire();
1278       mov(scratch, x, LeaveCC, cond);
1279       AddrMode1(instr, rd, rn, Operand(scratch));
1280     }
1281     return;
1282   }
1283   if (!rd.is_valid()) {
1284     // Emit a comparison instruction.
1285     emit(instr | rn.code() * B16);
1286   } else if (!rn.is_valid()) {
1287     // Emit a move instruction. If the operand is a register-shifted register,
1288     // then prevent the destination from being PC as this is unpredictable.
1289     DCHECK(!x.IsRegisterShiftedRegister() || rd != pc);
1290     emit(instr | rd.code() * B12);
1291   } else {
1292     emit(instr | rn.code() * B16 | rd.code() * B12);
1293   }
1294   if (rn == pc || x.rm_ == pc) {
1295     // Block constant pool emission for one instruction after reading pc.
1296     BlockConstPoolFor(1);
1297   }
1298 }
1299 
AddrMode1TryEncodeOperand(Instr * instr,const Operand & x)1300 bool Assembler::AddrMode1TryEncodeOperand(Instr* instr, const Operand& x) {
1301   if (x.IsImmediate()) {
1302     // Immediate.
1303     uint32_t rotate_imm;
1304     uint32_t immed_8;
1305     if (x.MustOutputRelocInfo(this) ||
1306         !FitsShifter(x.immediate(), &rotate_imm, &immed_8, instr)) {
1307       // Let the caller handle generating multiple instructions.
1308       return false;
1309     }
1310     *instr |= I | rotate_imm * B8 | immed_8;
1311   } else if (x.IsImmediateShiftedRegister()) {
1312     *instr |= x.shift_imm_ * B7 | x.shift_op_ | x.rm_.code();
1313   } else {
1314     DCHECK(x.IsRegisterShiftedRegister());
1315     // It is unpredictable to use the PC in this case.
1316     DCHECK(x.rm_ != pc && x.rs_ != pc);
1317     *instr |= x.rs_.code() * B8 | x.shift_op_ | B4 | x.rm_.code();
1318   }
1319 
1320   return true;
1321 }
1322 
AddrMode2(Instr instr,Register rd,const MemOperand & x)1323 void Assembler::AddrMode2(Instr instr, Register rd, const MemOperand& x) {
1324   DCHECK((instr & ~(kCondMask | B | L)) == B26);
1325   // This method does not handle pc-relative addresses. ldr_pcrel() should be
1326   // used instead.
1327   DCHECK(x.rn_ != pc);
1328   int am = x.am_;
1329   if (!x.rm_.is_valid()) {
1330     // Immediate offset.
1331     int offset_12 = x.offset_;
1332     if (offset_12 < 0) {
1333       offset_12 = -offset_12;
1334       am ^= U;
1335     }
1336     if (!is_uint12(offset_12)) {
1337       // Immediate offset cannot be encoded, load it first to a scratch
1338       // register.
1339       UseScratchRegisterScope temps(this);
1340       // Allow re-using rd for load instructions if possible.
1341       bool is_load = (instr & L) == L;
1342       Register scratch = (is_load && rd != x.rn_ && rd != pc && rd != sp)
1343                              ? rd
1344                              : temps.Acquire();
1345       mov(scratch, Operand(x.offset_), LeaveCC,
1346           Instruction::ConditionField(instr));
1347       AddrMode2(instr, rd, MemOperand(x.rn_, scratch, x.am_));
1348       return;
1349     }
1350     DCHECK_GE(offset_12, 0);  // no masking needed
1351     instr |= offset_12;
1352   } else {
1353     // Register offset (shift_imm_ and shift_op_ are 0) or scaled
1354     // register offset the constructors make sure than both shift_imm_
1355     // and shift_op_ are initialized.
1356     DCHECK(x.rm_ != pc);
1357     instr |= B25 | x.shift_imm_ * B7 | x.shift_op_ | x.rm_.code();
1358   }
1359   DCHECK((am & (P | W)) == P || x.rn_ != pc);  // no pc base with writeback
1360   emit(instr | am | x.rn_.code() * B16 | rd.code() * B12);
1361 }
1362 
AddrMode3(Instr instr,Register rd,const MemOperand & x)1363 void Assembler::AddrMode3(Instr instr, Register rd, const MemOperand& x) {
1364   DCHECK((instr & ~(kCondMask | L | S6 | H)) == (B4 | B7));
1365   DCHECK(x.rn_.is_valid());
1366   // This method does not handle pc-relative addresses. ldr_pcrel() should be
1367   // used instead.
1368   DCHECK(x.rn_ != pc);
1369   int am = x.am_;
1370   bool is_load = (instr & L) == L;
1371   if (!x.rm_.is_valid()) {
1372     // Immediate offset.
1373     int offset_8 = x.offset_;
1374     if (offset_8 < 0) {
1375       offset_8 = -offset_8;
1376       am ^= U;
1377     }
1378     if (!is_uint8(offset_8)) {
1379       // Immediate offset cannot be encoded, load it first to a scratch
1380       // register.
1381       UseScratchRegisterScope temps(this);
1382       // Allow re-using rd for load instructions if possible.
1383       Register scratch = (is_load && rd != x.rn_ && rd != pc && rd != sp)
1384                              ? rd
1385                              : temps.Acquire();
1386       mov(scratch, Operand(x.offset_), LeaveCC,
1387           Instruction::ConditionField(instr));
1388       AddrMode3(instr, rd, MemOperand(x.rn_, scratch, x.am_));
1389       return;
1390     }
1391     DCHECK_GE(offset_8, 0);  // no masking needed
1392     instr |= B | (offset_8 >> 4) * B8 | (offset_8 & 0xF);
1393   } else if (x.shift_imm_ != 0) {
1394     // Scaled register offsets are not supported, compute the offset separately
1395     // to a scratch register.
1396     UseScratchRegisterScope temps(this);
1397     // Allow re-using rd for load instructions if possible.
1398     Register scratch =
1399         (is_load && rd != x.rn_ && rd != pc && rd != sp) ? rd : temps.Acquire();
1400     mov(scratch, Operand(x.rm_, x.shift_op_, x.shift_imm_), LeaveCC,
1401         Instruction::ConditionField(instr));
1402     AddrMode3(instr, rd, MemOperand(x.rn_, scratch, x.am_));
1403     return;
1404   } else {
1405     // Register offset.
1406     DCHECK((am & (P | W)) == P || x.rm_ != pc);  // no pc index with writeback
1407     instr |= x.rm_.code();
1408   }
1409   DCHECK((am & (P | W)) == P || x.rn_ != pc);  // no pc base with writeback
1410   emit(instr | am | x.rn_.code() * B16 | rd.code() * B12);
1411 }
1412 
AddrMode4(Instr instr,Register rn,RegList rl)1413 void Assembler::AddrMode4(Instr instr, Register rn, RegList rl) {
1414   DCHECK((instr & ~(kCondMask | P | U | W | L)) == B27);
1415   DCHECK(!rl.is_empty());
1416   DCHECK(rn != pc);
1417   emit(instr | rn.code() * B16 | rl.bits());
1418 }
1419 
AddrMode5(Instr instr,CRegister crd,const MemOperand & x)1420 void Assembler::AddrMode5(Instr instr, CRegister crd, const MemOperand& x) {
1421   // Unindexed addressing is not encoded by this function.
1422   DCHECK_EQ((B27 | B26),
1423             (instr & ~(kCondMask | kCoprocessorMask | P | U | N | W | L)));
1424   DCHECK(x.rn_.is_valid() && !x.rm_.is_valid());
1425   int am = x.am_;
1426   int offset_8 = x.offset_;
1427   DCHECK_EQ(offset_8 & 3, 0);  // offset must be an aligned word offset
1428   offset_8 >>= 2;
1429   if (offset_8 < 0) {
1430     offset_8 = -offset_8;
1431     am ^= U;
1432   }
1433   DCHECK(is_uint8(offset_8));  // unsigned word offset must fit in a byte
1434   DCHECK((am & (P | W)) == P || x.rn_ != pc);  // no pc base with writeback
1435 
1436   // Post-indexed addressing requires W == 1; different than in AddrMode2/3.
1437   if ((am & P) == 0) am |= W;
1438 
1439   DCHECK_GE(offset_8, 0);  // no masking needed
1440   emit(instr | am | x.rn_.code() * B16 | crd.code() * B12 | offset_8);
1441 }
1442 
branch_offset(Label * L)1443 int Assembler::branch_offset(Label* L) {
1444   int target_pos;
1445   if (L->is_bound()) {
1446     target_pos = L->pos();
1447   } else {
1448     if (L->is_linked()) {
1449       // Point to previous instruction that uses the link.
1450       target_pos = L->pos();
1451     } else {
1452       // First entry of the link chain points to itself.
1453       target_pos = pc_offset();
1454     }
1455     L->link_to(pc_offset());
1456   }
1457 
1458   return target_pos - (pc_offset() + Instruction::kPcLoadDelta);
1459 }
1460 
1461 // Branch instructions.
b(int branch_offset,Condition cond,RelocInfo::Mode rmode)1462 void Assembler::b(int branch_offset, Condition cond, RelocInfo::Mode rmode) {
1463   if (!RelocInfo::IsNoInfo(rmode)) RecordRelocInfo(rmode);
1464   DCHECK_EQ(branch_offset & 3, 0);
1465   int imm24 = branch_offset >> 2;
1466   const bool b_imm_check = is_int24(imm24);
1467   CHECK(b_imm_check);
1468 
1469   // Block the emission of the constant pool before the next instruction.
1470   // Otherwise the passed-in branch offset would be off.
1471   BlockConstPoolFor(1);
1472 
1473   emit(cond | B27 | B25 | (imm24 & kImm24Mask));
1474 
1475   if (cond == al) {
1476     // Dead code is a good location to emit the constant pool.
1477     CheckConstPool(false, false);
1478   }
1479 }
1480 
bl(int branch_offset,Condition cond,RelocInfo::Mode rmode)1481 void Assembler::bl(int branch_offset, Condition cond, RelocInfo::Mode rmode) {
1482   if (!RelocInfo::IsNoInfo(rmode)) RecordRelocInfo(rmode);
1483   DCHECK_EQ(branch_offset & 3, 0);
1484   int imm24 = branch_offset >> 2;
1485   const bool bl_imm_check = is_int24(imm24);
1486   CHECK(bl_imm_check);
1487 
1488   // Block the emission of the constant pool before the next instruction.
1489   // Otherwise the passed-in branch offset would be off.
1490   BlockConstPoolFor(1);
1491 
1492   emit(cond | B27 | B25 | B24 | (imm24 & kImm24Mask));
1493 }
1494 
blx(int branch_offset)1495 void Assembler::blx(int branch_offset) {
1496   DCHECK_EQ(branch_offset & 1, 0);
1497   int h = ((branch_offset & 2) >> 1) * B24;
1498   int imm24 = branch_offset >> 2;
1499   const bool blx_imm_check = is_int24(imm24);
1500   CHECK(blx_imm_check);
1501 
1502   // Block the emission of the constant pool before the next instruction.
1503   // Otherwise the passed-in branch offset would be off.
1504   BlockConstPoolFor(1);
1505 
1506   emit(kSpecialCondition | B27 | B25 | h | (imm24 & kImm24Mask));
1507 }
1508 
blx(Register target,Condition cond)1509 void Assembler::blx(Register target, Condition cond) {
1510   DCHECK(target != pc);
1511   emit(cond | B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | BLX | target.code());
1512 }
1513 
bx(Register target,Condition cond)1514 void Assembler::bx(Register target, Condition cond) {
1515   DCHECK(target != pc);  // use of pc is actually allowed, but discouraged
1516   emit(cond | B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | BX | target.code());
1517 }
1518 
b(Label * L,Condition cond)1519 void Assembler::b(Label* L, Condition cond) {
1520   CheckBuffer();
1521   b(branch_offset(L), cond);
1522 }
1523 
bl(Label * L,Condition cond)1524 void Assembler::bl(Label* L, Condition cond) {
1525   CheckBuffer();
1526   bl(branch_offset(L), cond);
1527 }
1528 
blx(Label * L)1529 void Assembler::blx(Label* L) {
1530   CheckBuffer();
1531   blx(branch_offset(L));
1532 }
1533 
1534 // Data-processing instructions.
1535 
and_(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1536 void Assembler::and_(Register dst, Register src1, const Operand& src2, SBit s,
1537                      Condition cond) {
1538   AddrMode1(cond | AND | s, dst, src1, src2);
1539 }
1540 
and_(Register dst,Register src1,Register src2,SBit s,Condition cond)1541 void Assembler::and_(Register dst, Register src1, Register src2, SBit s,
1542                      Condition cond) {
1543   and_(dst, src1, Operand(src2), s, cond);
1544 }
1545 
eor(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1546 void Assembler::eor(Register dst, Register src1, const Operand& src2, SBit s,
1547                     Condition cond) {
1548   AddrMode1(cond | EOR | s, dst, src1, src2);
1549 }
1550 
eor(Register dst,Register src1,Register src2,SBit s,Condition cond)1551 void Assembler::eor(Register dst, Register src1, Register src2, SBit s,
1552                     Condition cond) {
1553   AddrMode1(cond | EOR | s, dst, src1, Operand(src2));
1554 }
1555 
sub(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1556 void Assembler::sub(Register dst, Register src1, const Operand& src2, SBit s,
1557                     Condition cond) {
1558   AddrMode1(cond | SUB | s, dst, src1, src2);
1559 }
1560 
sub(Register dst,Register src1,Register src2,SBit s,Condition cond)1561 void Assembler::sub(Register dst, Register src1, Register src2, SBit s,
1562                     Condition cond) {
1563   sub(dst, src1, Operand(src2), s, cond);
1564 }
1565 
rsb(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1566 void Assembler::rsb(Register dst, Register src1, const Operand& src2, SBit s,
1567                     Condition cond) {
1568   AddrMode1(cond | RSB | s, dst, src1, src2);
1569 }
1570 
add(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1571 void Assembler::add(Register dst, Register src1, const Operand& src2, SBit s,
1572                     Condition cond) {
1573   AddrMode1(cond | ADD | s, dst, src1, src2);
1574 }
1575 
add(Register dst,Register src1,Register src2,SBit s,Condition cond)1576 void Assembler::add(Register dst, Register src1, Register src2, SBit s,
1577                     Condition cond) {
1578   add(dst, src1, Operand(src2), s, cond);
1579 }
1580 
adc(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1581 void Assembler::adc(Register dst, Register src1, const Operand& src2, SBit s,
1582                     Condition cond) {
1583   AddrMode1(cond | ADC | s, dst, src1, src2);
1584 }
1585 
sbc(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1586 void Assembler::sbc(Register dst, Register src1, const Operand& src2, SBit s,
1587                     Condition cond) {
1588   AddrMode1(cond | SBC | s, dst, src1, src2);
1589 }
1590 
rsc(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1591 void Assembler::rsc(Register dst, Register src1, const Operand& src2, SBit s,
1592                     Condition cond) {
1593   AddrMode1(cond | RSC | s, dst, src1, src2);
1594 }
1595 
tst(Register src1,const Operand & src2,Condition cond)1596 void Assembler::tst(Register src1, const Operand& src2, Condition cond) {
1597   AddrMode1(cond | TST | S, no_reg, src1, src2);
1598 }
1599 
tst(Register src1,Register src2,Condition cond)1600 void Assembler::tst(Register src1, Register src2, Condition cond) {
1601   tst(src1, Operand(src2), cond);
1602 }
1603 
teq(Register src1,const Operand & src2,Condition cond)1604 void Assembler::teq(Register src1, const Operand& src2, Condition cond) {
1605   AddrMode1(cond | TEQ | S, no_reg, src1, src2);
1606 }
1607 
cmp(Register src1,const Operand & src2,Condition cond)1608 void Assembler::cmp(Register src1, const Operand& src2, Condition cond) {
1609   AddrMode1(cond | CMP | S, no_reg, src1, src2);
1610 }
1611 
cmp(Register src1,Register src2,Condition cond)1612 void Assembler::cmp(Register src1, Register src2, Condition cond) {
1613   cmp(src1, Operand(src2), cond);
1614 }
1615 
cmp_raw_immediate(Register src,int raw_immediate,Condition cond)1616 void Assembler::cmp_raw_immediate(Register src, int raw_immediate,
1617                                   Condition cond) {
1618   DCHECK(is_uint12(raw_immediate));
1619   emit(cond | I | CMP | S | src.code() << 16 | raw_immediate);
1620 }
1621 
cmn(Register src1,const Operand & src2,Condition cond)1622 void Assembler::cmn(Register src1, const Operand& src2, Condition cond) {
1623   AddrMode1(cond | CMN | S, no_reg, src1, src2);
1624 }
1625 
orr(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1626 void Assembler::orr(Register dst, Register src1, const Operand& src2, SBit s,
1627                     Condition cond) {
1628   AddrMode1(cond | ORR | s, dst, src1, src2);
1629 }
1630 
orr(Register dst,Register src1,Register src2,SBit s,Condition cond)1631 void Assembler::orr(Register dst, Register src1, Register src2, SBit s,
1632                     Condition cond) {
1633   orr(dst, src1, Operand(src2), s, cond);
1634 }
1635 
mov(Register dst,const Operand & src,SBit s,Condition cond)1636 void Assembler::mov(Register dst, const Operand& src, SBit s, Condition cond) {
1637   // Don't allow nop instructions in the form mov rn, rn to be generated using
1638   // the mov instruction. They must be generated using nop(int/NopMarkerTypes).
1639   DCHECK(!(src.IsRegister() && src.rm() == dst && s == LeaveCC && cond == al));
1640   AddrMode1(cond | MOV | s, dst, no_reg, src);
1641 }
1642 
mov(Register dst,Register src,SBit s,Condition cond)1643 void Assembler::mov(Register dst, Register src, SBit s, Condition cond) {
1644   mov(dst, Operand(src), s, cond);
1645 }
1646 
mov_label_offset(Register dst,Label * label)1647 void Assembler::mov_label_offset(Register dst, Label* label) {
1648   if (label->is_bound()) {
1649     mov(dst, Operand(label->pos() + (Code::kHeaderSize - kHeapObjectTag)));
1650   } else {
1651     // Emit the link to the label in the code stream followed by extra nop
1652     // instructions.
1653     // If the label is not linked, then start a new link chain by linking it to
1654     // itself, emitting pc_offset().
1655     int link = label->is_linked() ? label->pos() : pc_offset();
1656     label->link_to(pc_offset());
1657 
1658     // When the label is bound, these instructions will be patched with a
1659     // sequence of movw/movt or mov/orr/orr instructions. They will load the
1660     // destination register with the position of the label from the beginning
1661     // of the code.
1662     //
1663     // The link will be extracted from the first instruction and the destination
1664     // register from the second.
1665     //   For ARMv7:
1666     //      link
1667     //      mov dst, dst
1668     //   For ARMv6:
1669     //      link
1670     //      mov dst, dst
1671     //      mov dst, dst
1672     //
1673     // When the label gets bound: target_at extracts the link and target_at_put
1674     // patches the instructions.
1675     CHECK(is_uint24(link));
1676     BlockConstPoolScope block_const_pool(this);
1677     emit(link);
1678     nop(dst.code());
1679     if (!CpuFeatures::IsSupported(ARMv7)) {
1680       nop(dst.code());
1681     }
1682   }
1683 }
1684 
movw(Register reg,uint32_t immediate,Condition cond)1685 void Assembler::movw(Register reg, uint32_t immediate, Condition cond) {
1686   DCHECK(IsEnabled(ARMv7));
1687   emit(cond | 0x30 * B20 | reg.code() * B12 | EncodeMovwImmediate(immediate));
1688 }
1689 
movt(Register reg,uint32_t immediate,Condition cond)1690 void Assembler::movt(Register reg, uint32_t immediate, Condition cond) {
1691   DCHECK(IsEnabled(ARMv7));
1692   emit(cond | 0x34 * B20 | reg.code() * B12 | EncodeMovwImmediate(immediate));
1693 }
1694 
bic(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1695 void Assembler::bic(Register dst, Register src1, const Operand& src2, SBit s,
1696                     Condition cond) {
1697   AddrMode1(cond | BIC | s, dst, src1, src2);
1698 }
1699 
mvn(Register dst,const Operand & src,SBit s,Condition cond)1700 void Assembler::mvn(Register dst, const Operand& src, SBit s, Condition cond) {
1701   AddrMode1(cond | MVN | s, dst, no_reg, src);
1702 }
1703 
asr(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1704 void Assembler::asr(Register dst, Register src1, const Operand& src2, SBit s,
1705                     Condition cond) {
1706   if (src2.IsRegister()) {
1707     mov(dst, Operand(src1, ASR, src2.rm()), s, cond);
1708   } else {
1709     mov(dst, Operand(src1, ASR, src2.immediate()), s, cond);
1710   }
1711 }
1712 
lsl(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1713 void Assembler::lsl(Register dst, Register src1, const Operand& src2, SBit s,
1714                     Condition cond) {
1715   if (src2.IsRegister()) {
1716     mov(dst, Operand(src1, LSL, src2.rm()), s, cond);
1717   } else {
1718     mov(dst, Operand(src1, LSL, src2.immediate()), s, cond);
1719   }
1720 }
1721 
lsr(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1722 void Assembler::lsr(Register dst, Register src1, const Operand& src2, SBit s,
1723                     Condition cond) {
1724   if (src2.IsRegister()) {
1725     mov(dst, Operand(src1, LSR, src2.rm()), s, cond);
1726   } else {
1727     mov(dst, Operand(src1, LSR, src2.immediate()), s, cond);
1728   }
1729 }
1730 
1731 // Multiply instructions.
mla(Register dst,Register src1,Register src2,Register srcA,SBit s,Condition cond)1732 void Assembler::mla(Register dst, Register src1, Register src2, Register srcA,
1733                     SBit s, Condition cond) {
1734   DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc);
1735   emit(cond | A | s | dst.code() * B16 | srcA.code() * B12 | src2.code() * B8 |
1736        B7 | B4 | src1.code());
1737 }
1738 
mls(Register dst,Register src1,Register src2,Register srcA,Condition cond)1739 void Assembler::mls(Register dst, Register src1, Register src2, Register srcA,
1740                     Condition cond) {
1741   DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc);
1742   DCHECK(IsEnabled(ARMv7));
1743   emit(cond | B22 | B21 | dst.code() * B16 | srcA.code() * B12 |
1744        src2.code() * B8 | B7 | B4 | src1.code());
1745 }
1746 
sdiv(Register dst,Register src1,Register src2,Condition cond)1747 void Assembler::sdiv(Register dst, Register src1, Register src2,
1748                      Condition cond) {
1749   DCHECK(dst != pc && src1 != pc && src2 != pc);
1750   DCHECK(IsEnabled(SUDIV));
1751   emit(cond | B26 | B25 | B24 | B20 | dst.code() * B16 | 0xF * B12 |
1752        src2.code() * B8 | B4 | src1.code());
1753 }
1754 
udiv(Register dst,Register src1,Register src2,Condition cond)1755 void Assembler::udiv(Register dst, Register src1, Register src2,
1756                      Condition cond) {
1757   DCHECK(dst != pc && src1 != pc && src2 != pc);
1758   DCHECK(IsEnabled(SUDIV));
1759   emit(cond | B26 | B25 | B24 | B21 | B20 | dst.code() * B16 | 0xF * B12 |
1760        src2.code() * B8 | B4 | src1.code());
1761 }
1762 
mul(Register dst,Register src1,Register src2,SBit s,Condition cond)1763 void Assembler::mul(Register dst, Register src1, Register src2, SBit s,
1764                     Condition cond) {
1765   DCHECK(dst != pc && src1 != pc && src2 != pc);
1766   // dst goes in bits 16-19 for this instruction!
1767   emit(cond | s | dst.code() * B16 | src2.code() * B8 | B7 | B4 | src1.code());
1768 }
1769 
smmla(Register dst,Register src1,Register src2,Register srcA,Condition cond)1770 void Assembler::smmla(Register dst, Register src1, Register src2, Register srcA,
1771                       Condition cond) {
1772   DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc);
1773   emit(cond | B26 | B25 | B24 | B22 | B20 | dst.code() * B16 |
1774        srcA.code() * B12 | src2.code() * B8 | B4 | src1.code());
1775 }
1776 
smmul(Register dst,Register src1,Register src2,Condition cond)1777 void Assembler::smmul(Register dst, Register src1, Register src2,
1778                       Condition cond) {
1779   DCHECK(dst != pc && src1 != pc && src2 != pc);
1780   emit(cond | B26 | B25 | B24 | B22 | B20 | dst.code() * B16 | 0xF * B12 |
1781        src2.code() * B8 | B4 | src1.code());
1782 }
1783 
smlal(Register dstL,Register dstH,Register src1,Register src2,SBit s,Condition cond)1784 void Assembler::smlal(Register dstL, Register dstH, Register src1,
1785                       Register src2, SBit s, Condition cond) {
1786   DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
1787   DCHECK(dstL != dstH);
1788   emit(cond | B23 | B22 | A | s | dstH.code() * B16 | dstL.code() * B12 |
1789        src2.code() * B8 | B7 | B4 | src1.code());
1790 }
1791 
smull(Register dstL,Register dstH,Register src1,Register src2,SBit s,Condition cond)1792 void Assembler::smull(Register dstL, Register dstH, Register src1,
1793                       Register src2, SBit s, Condition cond) {
1794   DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
1795   DCHECK(dstL != dstH);
1796   emit(cond | B23 | B22 | s | dstH.code() * B16 | dstL.code() * B12 |
1797        src2.code() * B8 | B7 | B4 | src1.code());
1798 }
1799 
umlal(Register dstL,Register dstH,Register src1,Register src2,SBit s,Condition cond)1800 void Assembler::umlal(Register dstL, Register dstH, Register src1,
1801                       Register src2, SBit s, Condition cond) {
1802   DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
1803   DCHECK(dstL != dstH);
1804   emit(cond | B23 | A | s | dstH.code() * B16 | dstL.code() * B12 |
1805        src2.code() * B8 | B7 | B4 | src1.code());
1806 }
1807 
umull(Register dstL,Register dstH,Register src1,Register src2,SBit s,Condition cond)1808 void Assembler::umull(Register dstL, Register dstH, Register src1,
1809                       Register src2, SBit s, Condition cond) {
1810   DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
1811   DCHECK(dstL != dstH);
1812   emit(cond | B23 | s | dstH.code() * B16 | dstL.code() * B12 |
1813        src2.code() * B8 | B7 | B4 | src1.code());
1814 }
1815 
1816 // Miscellaneous arithmetic instructions.
clz(Register dst,Register src,Condition cond)1817 void Assembler::clz(Register dst, Register src, Condition cond) {
1818   DCHECK(dst != pc && src != pc);
1819   emit(cond | B24 | B22 | B21 | 15 * B16 | dst.code() * B12 | 15 * B8 | CLZ |
1820        src.code());
1821 }
1822 
1823 // Saturating instructions.
1824 
1825 // Unsigned saturate.
usat(Register dst,int satpos,const Operand & src,Condition cond)1826 void Assembler::usat(Register dst, int satpos, const Operand& src,
1827                      Condition cond) {
1828   DCHECK(dst != pc && src.rm_ != pc);
1829   DCHECK((satpos >= 0) && (satpos <= 31));
1830   DCHECK(src.IsImmediateShiftedRegister());
1831   DCHECK((src.shift_op_ == ASR) || (src.shift_op_ == LSL));
1832 
1833   int sh = 0;
1834   if (src.shift_op_ == ASR) {
1835     sh = 1;
1836   }
1837 
1838   emit(cond | 0x6 * B24 | 0xE * B20 | satpos * B16 | dst.code() * B12 |
1839        src.shift_imm_ * B7 | sh * B6 | 0x1 * B4 | src.rm_.code());
1840 }
1841 
1842 // Bitfield manipulation instructions.
1843 
1844 // Unsigned bit field extract.
1845 // Extracts #width adjacent bits from position #lsb in a register, and
1846 // writes them to the low bits of a destination register.
1847 //   ubfx dst, src, #lsb, #width
ubfx(Register dst,Register src,int lsb,int width,Condition cond)1848 void Assembler::ubfx(Register dst, Register src, int lsb, int width,
1849                      Condition cond) {
1850   DCHECK(IsEnabled(ARMv7));
1851   DCHECK(dst != pc && src != pc);
1852   DCHECK((lsb >= 0) && (lsb <= 31));
1853   DCHECK((width >= 1) && (width <= (32 - lsb)));
1854   emit(cond | 0xF * B23 | B22 | B21 | (width - 1) * B16 | dst.code() * B12 |
1855        lsb * B7 | B6 | B4 | src.code());
1856 }
1857 
1858 // Signed bit field extract.
1859 // Extracts #width adjacent bits from position #lsb in a register, and
1860 // writes them to the low bits of a destination register. The extracted
1861 // value is sign extended to fill the destination register.
1862 //   sbfx dst, src, #lsb, #width
sbfx(Register dst,Register src,int lsb,int width,Condition cond)1863 void Assembler::sbfx(Register dst, Register src, int lsb, int width,
1864                      Condition cond) {
1865   DCHECK(IsEnabled(ARMv7));
1866   DCHECK(dst != pc && src != pc);
1867   DCHECK((lsb >= 0) && (lsb <= 31));
1868   DCHECK((width >= 1) && (width <= (32 - lsb)));
1869   emit(cond | 0xF * B23 | B21 | (width - 1) * B16 | dst.code() * B12 |
1870        lsb * B7 | B6 | B4 | src.code());
1871 }
1872 
1873 // Bit field clear.
1874 // Sets #width adjacent bits at position #lsb in the destination register
1875 // to zero, preserving the value of the other bits.
1876 //   bfc dst, #lsb, #width
bfc(Register dst,int lsb,int width,Condition cond)1877 void Assembler::bfc(Register dst, int lsb, int width, Condition cond) {
1878   DCHECK(IsEnabled(ARMv7));
1879   DCHECK(dst != pc);
1880   DCHECK((lsb >= 0) && (lsb <= 31));
1881   DCHECK((width >= 1) && (width <= (32 - lsb)));
1882   int msb = lsb + width - 1;
1883   emit(cond | 0x1F * B22 | msb * B16 | dst.code() * B12 | lsb * B7 | B4 | 0xF);
1884 }
1885 
1886 // Bit field insert.
1887 // Inserts #width adjacent bits from the low bits of the source register
1888 // into position #lsb of the destination register.
1889 //   bfi dst, src, #lsb, #width
bfi(Register dst,Register src,int lsb,int width,Condition cond)1890 void Assembler::bfi(Register dst, Register src, int lsb, int width,
1891                     Condition cond) {
1892   DCHECK(IsEnabled(ARMv7));
1893   DCHECK(dst != pc && src != pc);
1894   DCHECK((lsb >= 0) && (lsb <= 31));
1895   DCHECK((width >= 1) && (width <= (32 - lsb)));
1896   int msb = lsb + width - 1;
1897   emit(cond | 0x1F * B22 | msb * B16 | dst.code() * B12 | lsb * B7 | B4 |
1898        src.code());
1899 }
1900 
pkhbt(Register dst,Register src1,const Operand & src2,Condition cond)1901 void Assembler::pkhbt(Register dst, Register src1, const Operand& src2,
1902                       Condition cond) {
1903   // Instruction details available in ARM DDI 0406C.b, A8.8.125.
1904   // cond(31-28) | 01101000(27-20) | Rn(19-16) |
1905   // Rd(15-12) | imm5(11-7) | 0(6) | 01(5-4) | Rm(3-0)
1906   DCHECK(dst != pc);
1907   DCHECK(src1 != pc);
1908   DCHECK(src2.IsImmediateShiftedRegister());
1909   DCHECK(src2.rm() != pc);
1910   DCHECK((src2.shift_imm_ >= 0) && (src2.shift_imm_ <= 31));
1911   DCHECK(src2.shift_op() == LSL);
1912   emit(cond | 0x68 * B20 | src1.code() * B16 | dst.code() * B12 |
1913        src2.shift_imm_ * B7 | B4 | src2.rm().code());
1914 }
1915 
pkhtb(Register dst,Register src1,const Operand & src2,Condition cond)1916 void Assembler::pkhtb(Register dst, Register src1, const Operand& src2,
1917                       Condition cond) {
1918   // Instruction details available in ARM DDI 0406C.b, A8.8.125.
1919   // cond(31-28) | 01101000(27-20) | Rn(19-16) |
1920   // Rd(15-12) | imm5(11-7) | 1(6) | 01(5-4) | Rm(3-0)
1921   DCHECK(dst != pc);
1922   DCHECK(src1 != pc);
1923   DCHECK(src2.IsImmediateShiftedRegister());
1924   DCHECK(src2.rm() != pc);
1925   DCHECK((src2.shift_imm_ >= 1) && (src2.shift_imm_ <= 32));
1926   DCHECK(src2.shift_op() == ASR);
1927   int asr = (src2.shift_imm_ == 32) ? 0 : src2.shift_imm_;
1928   emit(cond | 0x68 * B20 | src1.code() * B16 | dst.code() * B12 | asr * B7 |
1929        B6 | B4 | src2.rm().code());
1930 }
1931 
sxtb(Register dst,Register src,int rotate,Condition cond)1932 void Assembler::sxtb(Register dst, Register src, int rotate, Condition cond) {
1933   // Instruction details available in ARM DDI 0406C.b, A8.8.233.
1934   // cond(31-28) | 01101010(27-20) | 1111(19-16) |
1935   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1936   DCHECK(dst != pc);
1937   DCHECK(src != pc);
1938   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1939   emit(cond | 0x6A * B20 | 0xF * B16 | dst.code() * B12 |
1940        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
1941 }
1942 
sxtab(Register dst,Register src1,Register src2,int rotate,Condition cond)1943 void Assembler::sxtab(Register dst, Register src1, Register src2, int rotate,
1944                       Condition cond) {
1945   // Instruction details available in ARM DDI 0406C.b, A8.8.233.
1946   // cond(31-28) | 01101010(27-20) | Rn(19-16) |
1947   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1948   DCHECK(dst != pc);
1949   DCHECK(src1 != pc);
1950   DCHECK(src2 != pc);
1951   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1952   emit(cond | 0x6A * B20 | src1.code() * B16 | dst.code() * B12 |
1953        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
1954 }
1955 
sxth(Register dst,Register src,int rotate,Condition cond)1956 void Assembler::sxth(Register dst, Register src, int rotate, Condition cond) {
1957   // Instruction details available in ARM DDI 0406C.b, A8.8.235.
1958   // cond(31-28) | 01101011(27-20) | 1111(19-16) |
1959   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1960   DCHECK(dst != pc);
1961   DCHECK(src != pc);
1962   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1963   emit(cond | 0x6B * B20 | 0xF * B16 | dst.code() * B12 |
1964        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
1965 }
1966 
sxtah(Register dst,Register src1,Register src2,int rotate,Condition cond)1967 void Assembler::sxtah(Register dst, Register src1, Register src2, int rotate,
1968                       Condition cond) {
1969   // Instruction details available in ARM DDI 0406C.b, A8.8.235.
1970   // cond(31-28) | 01101011(27-20) | Rn(19-16) |
1971   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1972   DCHECK(dst != pc);
1973   DCHECK(src1 != pc);
1974   DCHECK(src2 != pc);
1975   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1976   emit(cond | 0x6B * B20 | src1.code() * B16 | dst.code() * B12 |
1977        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
1978 }
1979 
uxtb(Register dst,Register src,int rotate,Condition cond)1980 void Assembler::uxtb(Register dst, Register src, int rotate, Condition cond) {
1981   // Instruction details available in ARM DDI 0406C.b, A8.8.274.
1982   // cond(31-28) | 01101110(27-20) | 1111(19-16) |
1983   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1984   DCHECK(dst != pc);
1985   DCHECK(src != pc);
1986   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1987   emit(cond | 0x6E * B20 | 0xF * B16 | dst.code() * B12 |
1988        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
1989 }
1990 
uxtab(Register dst,Register src1,Register src2,int rotate,Condition cond)1991 void Assembler::uxtab(Register dst, Register src1, Register src2, int rotate,
1992                       Condition cond) {
1993   // Instruction details available in ARM DDI 0406C.b, A8.8.271.
1994   // cond(31-28) | 01101110(27-20) | Rn(19-16) |
1995   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1996   DCHECK(dst != pc);
1997   DCHECK(src1 != pc);
1998   DCHECK(src2 != pc);
1999   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
2000   emit(cond | 0x6E * B20 | src1.code() * B16 | dst.code() * B12 |
2001        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
2002 }
2003 
uxtb16(Register dst,Register src,int rotate,Condition cond)2004 void Assembler::uxtb16(Register dst, Register src, int rotate, Condition cond) {
2005   // Instruction details available in ARM DDI 0406C.b, A8.8.275.
2006   // cond(31-28) | 01101100(27-20) | 1111(19-16) |
2007   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
2008   DCHECK(dst != pc);
2009   DCHECK(src != pc);
2010   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
2011   emit(cond | 0x6C * B20 | 0xF * B16 | dst.code() * B12 |
2012        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
2013 }
2014 
uxth(Register dst,Register src,int rotate,Condition cond)2015 void Assembler::uxth(Register dst, Register src, int rotate, Condition cond) {
2016   // Instruction details available in ARM DDI 0406C.b, A8.8.276.
2017   // cond(31-28) | 01101111(27-20) | 1111(19-16) |
2018   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
2019   DCHECK(dst != pc);
2020   DCHECK(src != pc);
2021   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
2022   emit(cond | 0x6F * B20 | 0xF * B16 | dst.code() * B12 |
2023        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
2024 }
2025 
uxtah(Register dst,Register src1,Register src2,int rotate,Condition cond)2026 void Assembler::uxtah(Register dst, Register src1, Register src2, int rotate,
2027                       Condition cond) {
2028   // Instruction details available in ARM DDI 0406C.b, A8.8.273.
2029   // cond(31-28) | 01101111(27-20) | Rn(19-16) |
2030   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
2031   DCHECK(dst != pc);
2032   DCHECK(src1 != pc);
2033   DCHECK(src2 != pc);
2034   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
2035   emit(cond | 0x6F * B20 | src1.code() * B16 | dst.code() * B12 |
2036        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
2037 }
2038 
rbit(Register dst,Register src,Condition cond)2039 void Assembler::rbit(Register dst, Register src, Condition cond) {
2040   // Instruction details available in ARM DDI 0406C.b, A8.8.144.
2041   // cond(31-28) | 011011111111(27-16) | Rd(15-12) | 11110011(11-4) | Rm(3-0)
2042   DCHECK(IsEnabled(ARMv7));
2043   DCHECK(dst != pc);
2044   DCHECK(src != pc);
2045   emit(cond | 0x6FF * B16 | dst.code() * B12 | 0xF3 * B4 | src.code());
2046 }
2047 
rev(Register dst,Register src,Condition cond)2048 void Assembler::rev(Register dst, Register src, Condition cond) {
2049   // Instruction details available in ARM DDI 0406C.b, A8.8.144.
2050   // cond(31-28) | 011010111111(27-16) | Rd(15-12) | 11110011(11-4) | Rm(3-0)
2051   DCHECK(dst != pc);
2052   DCHECK(src != pc);
2053   emit(cond | 0x6BF * B16 | dst.code() * B12 | 0xF3 * B4 | src.code());
2054 }
2055 
2056 // Status register access instructions.
mrs(Register dst,SRegister s,Condition cond)2057 void Assembler::mrs(Register dst, SRegister s, Condition cond) {
2058   DCHECK(dst != pc);
2059   emit(cond | B24 | s | 15 * B16 | dst.code() * B12);
2060 }
2061 
msr(SRegisterFieldMask fields,const Operand & src,Condition cond)2062 void Assembler::msr(SRegisterFieldMask fields, const Operand& src,
2063                     Condition cond) {
2064   DCHECK_NE(fields & 0x000F0000, 0);  // At least one field must be set.
2065   DCHECK(((fields & 0xFFF0FFFF) == CPSR) || ((fields & 0xFFF0FFFF) == SPSR));
2066   Instr instr;
2067   if (src.IsImmediate()) {
2068     // Immediate.
2069     uint32_t rotate_imm;
2070     uint32_t immed_8;
2071     if (src.MustOutputRelocInfo(this) ||
2072         !FitsShifter(src.immediate(), &rotate_imm, &immed_8, nullptr)) {
2073       UseScratchRegisterScope temps(this);
2074       Register scratch = temps.Acquire();
2075       // Immediate operand cannot be encoded, load it first to a scratch
2076       // register.
2077       Move32BitImmediate(scratch, src);
2078       msr(fields, Operand(scratch), cond);
2079       return;
2080     }
2081     instr = I | rotate_imm * B8 | immed_8;
2082   } else {
2083     DCHECK(src.IsRegister());  // Only rm is allowed.
2084     instr = src.rm_.code();
2085   }
2086   emit(cond | instr | B24 | B21 | fields | 15 * B12);
2087 }
2088 
2089 // Load/Store instructions.
ldr(Register dst,const MemOperand & src,Condition cond)2090 void Assembler::ldr(Register dst, const MemOperand& src, Condition cond) {
2091   AddrMode2(cond | B26 | L, dst, src);
2092 }
2093 
str(Register src,const MemOperand & dst,Condition cond)2094 void Assembler::str(Register src, const MemOperand& dst, Condition cond) {
2095   AddrMode2(cond | B26, src, dst);
2096 }
2097 
ldrb(Register dst,const MemOperand & src,Condition cond)2098 void Assembler::ldrb(Register dst, const MemOperand& src, Condition cond) {
2099   AddrMode2(cond | B26 | B | L, dst, src);
2100 }
2101 
strb(Register src,const MemOperand & dst,Condition cond)2102 void Assembler::strb(Register src, const MemOperand& dst, Condition cond) {
2103   AddrMode2(cond | B26 | B, src, dst);
2104 }
2105 
ldrh(Register dst,const MemOperand & src,Condition cond)2106 void Assembler::ldrh(Register dst, const MemOperand& src, Condition cond) {
2107   AddrMode3(cond | L | B7 | H | B4, dst, src);
2108 }
2109 
strh(Register src,const MemOperand & dst,Condition cond)2110 void Assembler::strh(Register src, const MemOperand& dst, Condition cond) {
2111   AddrMode3(cond | B7 | H | B4, src, dst);
2112 }
2113 
ldrsb(Register dst,const MemOperand & src,Condition cond)2114 void Assembler::ldrsb(Register dst, const MemOperand& src, Condition cond) {
2115   AddrMode3(cond | L | B7 | S6 | B4, dst, src);
2116 }
2117 
ldrsh(Register dst,const MemOperand & src,Condition cond)2118 void Assembler::ldrsh(Register dst, const MemOperand& src, Condition cond) {
2119   AddrMode3(cond | L | B7 | S6 | H | B4, dst, src);
2120 }
2121 
ldrd(Register dst1,Register dst2,const MemOperand & src,Condition cond)2122 void Assembler::ldrd(Register dst1, Register dst2, const MemOperand& src,
2123                      Condition cond) {
2124   DCHECK(src.rm() == no_reg);
2125   DCHECK(dst1 != lr);  // r14.
2126   DCHECK_EQ(0, dst1.code() % 2);
2127   DCHECK_EQ(dst1.code() + 1, dst2.code());
2128   AddrMode3(cond | B7 | B6 | B4, dst1, src);
2129 }
2130 
strd(Register src1,Register src2,const MemOperand & dst,Condition cond)2131 void Assembler::strd(Register src1, Register src2, const MemOperand& dst,
2132                      Condition cond) {
2133   DCHECK(dst.rm() == no_reg);
2134   DCHECK(src1 != lr);  // r14.
2135   DCHECK_EQ(0, src1.code() % 2);
2136   DCHECK_EQ(src1.code() + 1, src2.code());
2137   AddrMode3(cond | B7 | B6 | B5 | B4, src1, dst);
2138 }
2139 
ldr_pcrel(Register dst,int imm12,Condition cond)2140 void Assembler::ldr_pcrel(Register dst, int imm12, Condition cond) {
2141   AddrMode am = Offset;
2142   if (imm12 < 0) {
2143     imm12 = -imm12;
2144     am = NegOffset;
2145   }
2146   DCHECK(is_uint12(imm12));
2147   emit(cond | B26 | am | L | pc.code() * B16 | dst.code() * B12 | imm12);
2148 }
2149 
2150 // Load/Store exclusive instructions.
ldrex(Register dst,Register src,Condition cond)2151 void Assembler::ldrex(Register dst, Register src, Condition cond) {
2152   // Instruction details available in ARM DDI 0406C.b, A8.8.75.
2153   // cond(31-28) | 00011001(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2154   DCHECK(dst != pc);
2155   DCHECK(src != pc);
2156   emit(cond | B24 | B23 | B20 | src.code() * B16 | dst.code() * B12 | 0xF9F);
2157 }
2158 
strex(Register src1,Register src2,Register dst,Condition cond)2159 void Assembler::strex(Register src1, Register src2, Register dst,
2160                       Condition cond) {
2161   // Instruction details available in ARM DDI 0406C.b, A8.8.212.
2162   // cond(31-28) | 00011000(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) |
2163   // Rt(3-0)
2164   DCHECK(dst != pc);
2165   DCHECK(src1 != pc);
2166   DCHECK(src2 != pc);
2167   DCHECK(src1 != dst);
2168   DCHECK(src1 != src2);
2169   emit(cond | B24 | B23 | dst.code() * B16 | src1.code() * B12 | 0xF9 * B4 |
2170        src2.code());
2171 }
2172 
ldrexb(Register dst,Register src,Condition cond)2173 void Assembler::ldrexb(Register dst, Register src, Condition cond) {
2174   // Instruction details available in ARM DDI 0406C.b, A8.8.76.
2175   // cond(31-28) | 00011101(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2176   DCHECK(dst != pc);
2177   DCHECK(src != pc);
2178   emit(cond | B24 | B23 | B22 | B20 | src.code() * B16 | dst.code() * B12 |
2179        0xF9F);
2180 }
2181 
strexb(Register src1,Register src2,Register dst,Condition cond)2182 void Assembler::strexb(Register src1, Register src2, Register dst,
2183                        Condition cond) {
2184   // Instruction details available in ARM DDI 0406C.b, A8.8.213.
2185   // cond(31-28) | 00011100(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) |
2186   // Rt(3-0)
2187   DCHECK(dst != pc);
2188   DCHECK(src1 != pc);
2189   DCHECK(src2 != pc);
2190   DCHECK(src1 != dst);
2191   DCHECK(src1 != src2);
2192   emit(cond | B24 | B23 | B22 | dst.code() * B16 | src1.code() * B12 |
2193        0xF9 * B4 | src2.code());
2194 }
2195 
ldrexh(Register dst,Register src,Condition cond)2196 void Assembler::ldrexh(Register dst, Register src, Condition cond) {
2197   // Instruction details available in ARM DDI 0406C.b, A8.8.78.
2198   // cond(31-28) | 00011111(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2199   DCHECK(dst != pc);
2200   DCHECK(src != pc);
2201   emit(cond | B24 | B23 | B22 | B21 | B20 | src.code() * B16 |
2202        dst.code() * B12 | 0xF9F);
2203 }
2204 
strexh(Register src1,Register src2,Register dst,Condition cond)2205 void Assembler::strexh(Register src1, Register src2, Register dst,
2206                        Condition cond) {
2207   // Instruction details available in ARM DDI 0406C.b, A8.8.215.
2208   // cond(31-28) | 00011110(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) |
2209   // Rt(3-0)
2210   DCHECK(dst != pc);
2211   DCHECK(src1 != pc);
2212   DCHECK(src2 != pc);
2213   DCHECK(src1 != dst);
2214   DCHECK(src1 != src2);
2215   emit(cond | B24 | B23 | B22 | B21 | dst.code() * B16 | src1.code() * B12 |
2216        0xF9 * B4 | src2.code());
2217 }
2218 
ldrexd(Register dst1,Register dst2,Register src,Condition cond)2219 void Assembler::ldrexd(Register dst1, Register dst2, Register src,
2220                        Condition cond) {
2221   // cond(31-28) | 00011011(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2222   DCHECK(dst1 != lr);  // r14.
2223   // The pair of destination registers is restricted to being an even-numbered
2224   // register and the odd-numbered register that immediately follows it.
2225   DCHECK_EQ(0, dst1.code() % 2);
2226   DCHECK_EQ(dst1.code() + 1, dst2.code());
2227   emit(cond | B24 | B23 | B21 | B20 | src.code() * B16 | dst1.code() * B12 |
2228        0xF9F);
2229 }
2230 
strexd(Register res,Register src1,Register src2,Register dst,Condition cond)2231 void Assembler::strexd(Register res, Register src1, Register src2, Register dst,
2232                        Condition cond) {
2233   // cond(31-28) | 00011010(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2234   DCHECK(src1 != lr);  // r14.
2235   // The pair of source registers is restricted to being an even-numbered
2236   // register and the odd-numbered register that immediately follows it.
2237   DCHECK_EQ(0, src1.code() % 2);
2238   DCHECK_EQ(src1.code() + 1, src2.code());
2239   emit(cond | B24 | B23 | B21 | dst.code() * B16 | res.code() * B12 |
2240        0xF9 * B4 | src1.code());
2241 }
2242 
2243 // Preload instructions.
pld(const MemOperand & address)2244 void Assembler::pld(const MemOperand& address) {
2245   // Instruction details available in ARM DDI 0406C.b, A8.8.128.
2246   // 1111(31-28) | 0111(27-24) | U(23) | R(22) | 01(21-20) | Rn(19-16) |
2247   // 1111(15-12) | imm5(11-07) | type(6-5) | 0(4)| Rm(3-0) |
2248   DCHECK(address.rm() == no_reg);
2249   DCHECK(address.am() == Offset);
2250   int U = B23;
2251   int offset = address.offset();
2252   if (offset < 0) {
2253     offset = -offset;
2254     U = 0;
2255   }
2256   DCHECK_LT(offset, 4096);
2257   emit(kSpecialCondition | B26 | B24 | U | B22 | B20 |
2258        address.rn().code() * B16 | 0xF * B12 | offset);
2259 }
2260 
2261 // Load/Store multiple instructions.
ldm(BlockAddrMode am,Register base,RegList dst,Condition cond)2262 void Assembler::ldm(BlockAddrMode am, Register base, RegList dst,
2263                     Condition cond) {
2264   // ABI stack constraint: ldmxx base, {..sp..}  base != sp  is not restartable.
2265   DCHECK(base == sp || !dst.has(sp));
2266 
2267   AddrMode4(cond | B27 | am | L, base, dst);
2268 
2269   // Emit the constant pool after a function return implemented by ldm ..{..pc}.
2270   if (cond == al && dst.has(pc)) {
2271     // There is a slight chance that the ldm instruction was actually a call,
2272     // in which case it would be wrong to return into the constant pool; we
2273     // recognize this case by checking if the emission of the pool was blocked
2274     // at the pc of the ldm instruction by a mov lr, pc instruction; if this is
2275     // the case, we emit a jump over the pool.
2276     CheckConstPool(true, no_const_pool_before_ == pc_offset() - kInstrSize);
2277   }
2278 }
2279 
stm(BlockAddrMode am,Register base,RegList src,Condition cond)2280 void Assembler::stm(BlockAddrMode am, Register base, RegList src,
2281                     Condition cond) {
2282   AddrMode4(cond | B27 | am, base, src);
2283 }
2284 
2285 // Exception-generating instructions and debugging support.
2286 // Stops with a non-negative code less than kNumOfWatchedStops support
2287 // enabling/disabling and a counter feature. See simulator-arm.h .
stop(Condition cond,int32_t code)2288 void Assembler::stop(Condition cond, int32_t code) {
2289 #ifndef __arm__
2290   DCHECK_GE(code, kDefaultStopCode);
2291   {
2292     BlockConstPoolScope block_const_pool(this);
2293     if (code >= 0) {
2294       svc(kStopCode + code, cond);
2295     } else {
2296       svc(kStopCode + kMaxStopCode, cond);
2297     }
2298   }
2299 #else   // def __arm__
2300   if (cond != al) {
2301     Label skip;
2302     b(&skip, NegateCondition(cond));
2303     bkpt(0);
2304     bind(&skip);
2305   } else {
2306     bkpt(0);
2307   }
2308 #endif  // def __arm__
2309 }
2310 
bkpt(uint32_t imm16)2311 void Assembler::bkpt(uint32_t imm16) {
2312   DCHECK(is_uint16(imm16));
2313   emit(al | B24 | B21 | (imm16 >> 4) * B8 | BKPT | (imm16 & 0xF));
2314 }
2315 
svc(uint32_t imm24,Condition cond)2316 void Assembler::svc(uint32_t imm24, Condition cond) {
2317   CHECK(is_uint24(imm24));
2318   emit(cond | 15 * B24 | imm24);
2319 }
2320 
dmb(BarrierOption option)2321 void Assembler::dmb(BarrierOption option) {
2322   if (CpuFeatures::IsSupported(ARMv7)) {
2323     // Details available in ARM DDI 0406C.b, A8-378.
2324     emit(kSpecialCondition | 0x57FF * B12 | 5 * B4 | option);
2325   } else {
2326     // Details available in ARM DDI 0406C.b, B3-1750.
2327     // CP15DMB: CRn=c7, opc1=0, CRm=c10, opc2=5, Rt is ignored.
2328     mcr(p15, 0, r0, cr7, cr10, 5);
2329   }
2330 }
2331 
dsb(BarrierOption option)2332 void Assembler::dsb(BarrierOption option) {
2333   if (CpuFeatures::IsSupported(ARMv7)) {
2334     // Details available in ARM DDI 0406C.b, A8-380.
2335     emit(kSpecialCondition | 0x57FF * B12 | 4 * B4 | option);
2336   } else {
2337     // Details available in ARM DDI 0406C.b, B3-1750.
2338     // CP15DSB: CRn=c7, opc1=0, CRm=c10, opc2=4, Rt is ignored.
2339     mcr(p15, 0, r0, cr7, cr10, 4);
2340   }
2341 }
2342 
isb(BarrierOption option)2343 void Assembler::isb(BarrierOption option) {
2344   if (CpuFeatures::IsSupported(ARMv7)) {
2345     // Details available in ARM DDI 0406C.b, A8-389.
2346     emit(kSpecialCondition | 0x57FF * B12 | 6 * B4 | option);
2347   } else {
2348     // Details available in ARM DDI 0406C.b, B3-1750.
2349     // CP15ISB: CRn=c7, opc1=0, CRm=c5, opc2=4, Rt is ignored.
2350     mcr(p15, 0, r0, cr7, cr5, 4);
2351   }
2352 }
2353 
csdb()2354 void Assembler::csdb() {
2355   // Details available in Arm Cache Speculation Side-channels white paper,
2356   // version 1.1, page 4.
2357   emit(0xE320F014);
2358 }
2359 
2360 // Coprocessor instructions.
cdp(Coprocessor coproc,int opcode_1,CRegister crd,CRegister crn,CRegister crm,int opcode_2,Condition cond)2361 void Assembler::cdp(Coprocessor coproc, int opcode_1, CRegister crd,
2362                     CRegister crn, CRegister crm, int opcode_2,
2363                     Condition cond) {
2364   DCHECK(is_uint4(opcode_1) && is_uint3(opcode_2));
2365   emit(cond | B27 | B26 | B25 | (opcode_1 & 15) * B20 | crn.code() * B16 |
2366        crd.code() * B12 | coproc * B8 | (opcode_2 & 7) * B5 | crm.code());
2367 }
2368 
cdp2(Coprocessor coproc,int opcode_1,CRegister crd,CRegister crn,CRegister crm,int opcode_2)2369 void Assembler::cdp2(Coprocessor coproc, int opcode_1, CRegister crd,
2370                      CRegister crn, CRegister crm, int opcode_2) {
2371   cdp(coproc, opcode_1, crd, crn, crm, opcode_2, kSpecialCondition);
2372 }
2373 
mcr(Coprocessor coproc,int opcode_1,Register rd,CRegister crn,CRegister crm,int opcode_2,Condition cond)2374 void Assembler::mcr(Coprocessor coproc, int opcode_1, Register rd,
2375                     CRegister crn, CRegister crm, int opcode_2,
2376                     Condition cond) {
2377   DCHECK(is_uint3(opcode_1) && is_uint3(opcode_2));
2378   emit(cond | B27 | B26 | B25 | (opcode_1 & 7) * B21 | crn.code() * B16 |
2379        rd.code() * B12 | coproc * B8 | (opcode_2 & 7) * B5 | B4 | crm.code());
2380 }
2381 
mcr2(Coprocessor coproc,int opcode_1,Register rd,CRegister crn,CRegister crm,int opcode_2)2382 void Assembler::mcr2(Coprocessor coproc, int opcode_1, Register rd,
2383                      CRegister crn, CRegister crm, int opcode_2) {
2384   mcr(coproc, opcode_1, rd, crn, crm, opcode_2, kSpecialCondition);
2385 }
2386 
mrc(Coprocessor coproc,int opcode_1,Register rd,CRegister crn,CRegister crm,int opcode_2,Condition cond)2387 void Assembler::mrc(Coprocessor coproc, int opcode_1, Register rd,
2388                     CRegister crn, CRegister crm, int opcode_2,
2389                     Condition cond) {
2390   DCHECK(is_uint3(opcode_1) && is_uint3(opcode_2));
2391   emit(cond | B27 | B26 | B25 | (opcode_1 & 7) * B21 | L | crn.code() * B16 |
2392        rd.code() * B12 | coproc * B8 | (opcode_2 & 7) * B5 | B4 | crm.code());
2393 }
2394 
mrc2(Coprocessor coproc,int opcode_1,Register rd,CRegister crn,CRegister crm,int opcode_2)2395 void Assembler::mrc2(Coprocessor coproc, int opcode_1, Register rd,
2396                      CRegister crn, CRegister crm, int opcode_2) {
2397   mrc(coproc, opcode_1, rd, crn, crm, opcode_2, kSpecialCondition);
2398 }
2399 
ldc(Coprocessor coproc,CRegister crd,const MemOperand & src,LFlag l,Condition cond)2400 void Assembler::ldc(Coprocessor coproc, CRegister crd, const MemOperand& src,
2401                     LFlag l, Condition cond) {
2402   AddrMode5(cond | B27 | B26 | l | L | coproc * B8, crd, src);
2403 }
2404 
ldc(Coprocessor coproc,CRegister crd,Register rn,int option,LFlag l,Condition cond)2405 void Assembler::ldc(Coprocessor coproc, CRegister crd, Register rn, int option,
2406                     LFlag l, Condition cond) {
2407   // Unindexed addressing.
2408   DCHECK(is_uint8(option));
2409   emit(cond | B27 | B26 | U | l | L | rn.code() * B16 | crd.code() * B12 |
2410        coproc * B8 | (option & 255));
2411 }
2412 
ldc2(Coprocessor coproc,CRegister crd,const MemOperand & src,LFlag l)2413 void Assembler::ldc2(Coprocessor coproc, CRegister crd, const MemOperand& src,
2414                      LFlag l) {
2415   ldc(coproc, crd, src, l, kSpecialCondition);
2416 }
2417 
ldc2(Coprocessor coproc,CRegister crd,Register rn,int option,LFlag l)2418 void Assembler::ldc2(Coprocessor coproc, CRegister crd, Register rn, int option,
2419                      LFlag l) {
2420   ldc(coproc, crd, rn, option, l, kSpecialCondition);
2421 }
2422 
2423 // Support for VFP.
2424 
vldr(const DwVfpRegister dst,const Register base,int offset,const Condition cond)2425 void Assembler::vldr(const DwVfpRegister dst, const Register base, int offset,
2426                      const Condition cond) {
2427   // Ddst = MEM(Rbase + offset).
2428   // Instruction details available in ARM DDI 0406C.b, A8-924.
2429   // cond(31-28) | 1101(27-24)| U(23) | D(22) | 01(21-20) | Rbase(19-16) |
2430   // Vd(15-12) | 1011(11-8) | offset
2431   DCHECK(VfpRegisterIsAvailable(dst));
2432   int u = 1;
2433   if (offset < 0) {
2434     CHECK_NE(offset, kMinInt);
2435     offset = -offset;
2436     u = 0;
2437   }
2438   int vd, d;
2439   dst.split_code(&vd, &d);
2440 
2441   DCHECK_GE(offset, 0);
2442   if ((offset % 4) == 0 && (offset / 4) < 256) {
2443     emit(cond | 0xD * B24 | u * B23 | d * B22 | B20 | base.code() * B16 |
2444          vd * B12 | 0xB * B8 | ((offset / 4) & 255));
2445   } else {
2446     UseScratchRegisterScope temps(this);
2447     Register scratch = temps.Acquire();
2448     // Larger offsets must be handled by computing the correct address in a
2449     // scratch register.
2450     DCHECK(base != scratch);
2451     if (u == 1) {
2452       add(scratch, base, Operand(offset));
2453     } else {
2454       sub(scratch, base, Operand(offset));
2455     }
2456     emit(cond | 0xD * B24 | d * B22 | B20 | scratch.code() * B16 | vd * B12 |
2457          0xB * B8);
2458   }
2459 }
2460 
vldr(const DwVfpRegister dst,const MemOperand & operand,const Condition cond)2461 void Assembler::vldr(const DwVfpRegister dst, const MemOperand& operand,
2462                      const Condition cond) {
2463   DCHECK(VfpRegisterIsAvailable(dst));
2464   DCHECK(operand.am_ == Offset);
2465   if (operand.rm().is_valid()) {
2466     UseScratchRegisterScope temps(this);
2467     Register scratch = temps.Acquire();
2468     add(scratch, operand.rn(),
2469         Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
2470     vldr(dst, scratch, 0, cond);
2471   } else {
2472     vldr(dst, operand.rn(), operand.offset(), cond);
2473   }
2474 }
2475 
vldr(const SwVfpRegister dst,const Register base,int offset,const Condition cond)2476 void Assembler::vldr(const SwVfpRegister dst, const Register base, int offset,
2477                      const Condition cond) {
2478   // Sdst = MEM(Rbase + offset).
2479   // Instruction details available in ARM DDI 0406A, A8-628.
2480   // cond(31-28) | 1101(27-24)| U001(23-20) | Rbase(19-16) |
2481   // Vdst(15-12) | 1010(11-8) | offset
2482   int u = 1;
2483   if (offset < 0) {
2484     offset = -offset;
2485     u = 0;
2486   }
2487   int sd, d;
2488   dst.split_code(&sd, &d);
2489   DCHECK_GE(offset, 0);
2490 
2491   if ((offset % 4) == 0 && (offset / 4) < 256) {
2492     emit(cond | u * B23 | d * B22 | 0xD1 * B20 | base.code() * B16 | sd * B12 |
2493          0xA * B8 | ((offset / 4) & 255));
2494   } else {
2495     // Larger offsets must be handled by computing the correct address in a
2496     // scratch register.
2497     UseScratchRegisterScope temps(this);
2498     Register scratch = temps.Acquire();
2499     DCHECK(base != scratch);
2500     if (u == 1) {
2501       add(scratch, base, Operand(offset));
2502     } else {
2503       sub(scratch, base, Operand(offset));
2504     }
2505     emit(cond | d * B22 | 0xD1 * B20 | scratch.code() * B16 | sd * B12 |
2506          0xA * B8);
2507   }
2508 }
2509 
vldr(const SwVfpRegister dst,const MemOperand & operand,const Condition cond)2510 void Assembler::vldr(const SwVfpRegister dst, const MemOperand& operand,
2511                      const Condition cond) {
2512   DCHECK(operand.am_ == Offset);
2513   if (operand.rm().is_valid()) {
2514     UseScratchRegisterScope temps(this);
2515     Register scratch = temps.Acquire();
2516     add(scratch, operand.rn(),
2517         Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
2518     vldr(dst, scratch, 0, cond);
2519   } else {
2520     vldr(dst, operand.rn(), operand.offset(), cond);
2521   }
2522 }
2523 
vstr(const DwVfpRegister src,const Register base,int offset,const Condition cond)2524 void Assembler::vstr(const DwVfpRegister src, const Register base, int offset,
2525                      const Condition cond) {
2526   // MEM(Rbase + offset) = Dsrc.
2527   // Instruction details available in ARM DDI 0406C.b, A8-1082.
2528   // cond(31-28) | 1101(27-24)| U(23) | D(22) | 00(21-20) | Rbase(19-16) |
2529   // Vd(15-12) | 1011(11-8) | (offset/4)
2530   DCHECK(VfpRegisterIsAvailable(src));
2531   int u = 1;
2532   if (offset < 0) {
2533     CHECK_NE(offset, kMinInt);
2534     offset = -offset;
2535     u = 0;
2536   }
2537   DCHECK_GE(offset, 0);
2538   int vd, d;
2539   src.split_code(&vd, &d);
2540 
2541   if ((offset % 4) == 0 && (offset / 4) < 256) {
2542     emit(cond | 0xD * B24 | u * B23 | d * B22 | base.code() * B16 | vd * B12 |
2543          0xB * B8 | ((offset / 4) & 255));
2544   } else {
2545     // Larger offsets must be handled by computing the correct address in the a
2546     // scratch register.
2547     UseScratchRegisterScope temps(this);
2548     Register scratch = temps.Acquire();
2549     DCHECK(base != scratch);
2550     if (u == 1) {
2551       add(scratch, base, Operand(offset));
2552     } else {
2553       sub(scratch, base, Operand(offset));
2554     }
2555     emit(cond | 0xD * B24 | d * B22 | scratch.code() * B16 | vd * B12 |
2556          0xB * B8);
2557   }
2558 }
2559 
vstr(const DwVfpRegister src,const MemOperand & operand,const Condition cond)2560 void Assembler::vstr(const DwVfpRegister src, const MemOperand& operand,
2561                      const Condition cond) {
2562   DCHECK(VfpRegisterIsAvailable(src));
2563   DCHECK(operand.am_ == Offset);
2564   if (operand.rm().is_valid()) {
2565     UseScratchRegisterScope temps(this);
2566     Register scratch = temps.Acquire();
2567     add(scratch, operand.rn(),
2568         Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
2569     vstr(src, scratch, 0, cond);
2570   } else {
2571     vstr(src, operand.rn(), operand.offset(), cond);
2572   }
2573 }
2574 
vstr(const SwVfpRegister src,const Register base,int offset,const Condition cond)2575 void Assembler::vstr(const SwVfpRegister src, const Register base, int offset,
2576                      const Condition cond) {
2577   // MEM(Rbase + offset) = SSrc.
2578   // Instruction details available in ARM DDI 0406A, A8-786.
2579   // cond(31-28) | 1101(27-24)| U000(23-20) | Rbase(19-16) |
2580   // Vdst(15-12) | 1010(11-8) | (offset/4)
2581   int u = 1;
2582   if (offset < 0) {
2583     CHECK_NE(offset, kMinInt);
2584     offset = -offset;
2585     u = 0;
2586   }
2587   int sd, d;
2588   src.split_code(&sd, &d);
2589   DCHECK_GE(offset, 0);
2590   if ((offset % 4) == 0 && (offset / 4) < 256) {
2591     emit(cond | u * B23 | d * B22 | 0xD0 * B20 | base.code() * B16 | sd * B12 |
2592          0xA * B8 | ((offset / 4) & 255));
2593   } else {
2594     // Larger offsets must be handled by computing the correct address in a
2595     // scratch register.
2596     UseScratchRegisterScope temps(this);
2597     Register scratch = temps.Acquire();
2598     DCHECK(base != scratch);
2599     if (u == 1) {
2600       add(scratch, base, Operand(offset));
2601     } else {
2602       sub(scratch, base, Operand(offset));
2603     }
2604     emit(cond | d * B22 | 0xD0 * B20 | scratch.code() * B16 | sd * B12 |
2605          0xA * B8);
2606   }
2607 }
2608 
vstr(const SwVfpRegister src,const MemOperand & operand,const Condition cond)2609 void Assembler::vstr(const SwVfpRegister src, const MemOperand& operand,
2610                      const Condition cond) {
2611   DCHECK(operand.am_ == Offset);
2612   if (operand.rm().is_valid()) {
2613     UseScratchRegisterScope temps(this);
2614     Register scratch = temps.Acquire();
2615     add(scratch, operand.rn(),
2616         Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
2617     vstr(src, scratch, 0, cond);
2618   } else {
2619     vstr(src, operand.rn(), operand.offset(), cond);
2620   }
2621 }
2622 
vldm(BlockAddrMode am,Register base,DwVfpRegister first,DwVfpRegister last,Condition cond)2623 void Assembler::vldm(BlockAddrMode am, Register base, DwVfpRegister first,
2624                      DwVfpRegister last, Condition cond) {
2625   // Instruction details available in ARM DDI 0406C.b, A8-922.
2626   // cond(31-28) | 110(27-25)| PUDW1(24-20) | Rbase(19-16) |
2627   // first(15-12) | 1011(11-8) | (count * 2)
2628   DCHECK_LE(first.code(), last.code());
2629   DCHECK(VfpRegisterIsAvailable(last));
2630   DCHECK(am == ia || am == ia_w || am == db_w);
2631   DCHECK(base != pc);
2632 
2633   int sd, d;
2634   first.split_code(&sd, &d);
2635   int count = last.code() - first.code() + 1;
2636   DCHECK_LE(count, 16);
2637   emit(cond | B27 | B26 | am | d * B22 | B20 | base.code() * B16 | sd * B12 |
2638        0xB * B8 | count * 2);
2639 }
2640 
vstm(BlockAddrMode am,Register base,DwVfpRegister first,DwVfpRegister last,Condition cond)2641 void Assembler::vstm(BlockAddrMode am, Register base, DwVfpRegister first,
2642                      DwVfpRegister last, Condition cond) {
2643   // Instruction details available in ARM DDI 0406C.b, A8-1080.
2644   // cond(31-28) | 110(27-25)| PUDW0(24-20) | Rbase(19-16) |
2645   // first(15-12) | 1011(11-8) | (count * 2)
2646   DCHECK_LE(first.code(), last.code());
2647   DCHECK(VfpRegisterIsAvailable(last));
2648   DCHECK(am == ia || am == ia_w || am == db_w);
2649   DCHECK(base != pc);
2650 
2651   int sd, d;
2652   first.split_code(&sd, &d);
2653   int count = last.code() - first.code() + 1;
2654   DCHECK_LE(count, 16);
2655   emit(cond | B27 | B26 | am | d * B22 | base.code() * B16 | sd * B12 |
2656        0xB * B8 | count * 2);
2657 }
2658 
vldm(BlockAddrMode am,Register base,SwVfpRegister first,SwVfpRegister last,Condition cond)2659 void Assembler::vldm(BlockAddrMode am, Register base, SwVfpRegister first,
2660                      SwVfpRegister last, Condition cond) {
2661   // Instruction details available in ARM DDI 0406A, A8-626.
2662   // cond(31-28) | 110(27-25)| PUDW1(24-20) | Rbase(19-16) |
2663   // first(15-12) | 1010(11-8) | (count/2)
2664   DCHECK_LE(first.code(), last.code());
2665   DCHECK(am == ia || am == ia_w || am == db_w);
2666   DCHECK(base != pc);
2667 
2668   int sd, d;
2669   first.split_code(&sd, &d);
2670   int count = last.code() - first.code() + 1;
2671   emit(cond | B27 | B26 | am | d * B22 | B20 | base.code() * B16 | sd * B12 |
2672        0xA * B8 | count);
2673 }
2674 
vstm(BlockAddrMode am,Register base,SwVfpRegister first,SwVfpRegister last,Condition cond)2675 void Assembler::vstm(BlockAddrMode am, Register base, SwVfpRegister first,
2676                      SwVfpRegister last, Condition cond) {
2677   // Instruction details available in ARM DDI 0406A, A8-784.
2678   // cond(31-28) | 110(27-25)| PUDW0(24-20) | Rbase(19-16) |
2679   // first(15-12) | 1011(11-8) | (count/2)
2680   DCHECK_LE(first.code(), last.code());
2681   DCHECK(am == ia || am == ia_w || am == db_w);
2682   DCHECK(base != pc);
2683 
2684   int sd, d;
2685   first.split_code(&sd, &d);
2686   int count = last.code() - first.code() + 1;
2687   emit(cond | B27 | B26 | am | d * B22 | base.code() * B16 | sd * B12 |
2688        0xA * B8 | count);
2689 }
2690 
DoubleAsTwoUInt32(base::Double d,uint32_t * lo,uint32_t * hi)2691 static void DoubleAsTwoUInt32(base::Double d, uint32_t* lo, uint32_t* hi) {
2692   uint64_t i = d.AsUint64();
2693 
2694   *lo = i & 0xFFFFFFFF;
2695   *hi = i >> 32;
2696 }
2697 
WriteVmovIntImmEncoding(uint8_t imm,uint32_t * encoding)2698 static void WriteVmovIntImmEncoding(uint8_t imm, uint32_t* encoding) {
2699   // Integer promotion from uint8_t to int makes these all okay.
2700   *encoding = ((imm & 0x80) << (24 - 7));   // a
2701   *encoding |= ((imm & 0x70) << (16 - 4));  // bcd
2702   *encoding |= (imm & 0x0f);                //  efgh
2703 }
2704 
2705 // This checks if imm can be encoded into an immediate for vmov.
2706 // See Table A7-15 in ARM DDI 0406C.d.
2707 // Currently only supports the first row and op=0 && cmode=1110.
FitsVmovIntImm(uint64_t imm,uint32_t * encoding,uint8_t * cmode)2708 static bool FitsVmovIntImm(uint64_t imm, uint32_t* encoding, uint8_t* cmode) {
2709   uint32_t lo = imm & 0xFFFFFFFF;
2710   uint32_t hi = imm >> 32;
2711   if ((lo == hi && ((lo & 0xffffff00) == 0))) {
2712     WriteVmovIntImmEncoding(imm & 0xff, encoding);
2713     *cmode = 0;
2714     return true;
2715   } else if ((lo == hi) && ((lo & 0xffff) == (lo >> 16)) &&
2716              ((lo & 0xff) == (lo >> 24))) {
2717     // Check that all bytes in imm are the same.
2718     WriteVmovIntImmEncoding(imm & 0xff, encoding);
2719     *cmode = 0xe;
2720     return true;
2721   }
2722 
2723   return false;
2724 }
2725 
vmov(const DwVfpRegister dst,uint64_t imm)2726 void Assembler::vmov(const DwVfpRegister dst, uint64_t imm) {
2727   uint32_t enc;
2728   uint8_t cmode;
2729   uint8_t op = 0;
2730   if (CpuFeatures::IsSupported(NEON) && FitsVmovIntImm(imm, &enc, &cmode)) {
2731     CpuFeatureScope scope(this, NEON);
2732     // Instruction details available in ARM DDI 0406C.b, A8-937.
2733     // 001i1(27-23) | D(22) | 000(21-19) | imm3(18-16) | Vd(15-12) | cmode(11-8)
2734     // | 0(7) | 0(6) | op(5) | 4(1) | imm4(3-0)
2735     int vd, d;
2736     dst.split_code(&vd, &d);
2737     emit(kSpecialCondition | 0x05 * B23 | d * B22 | vd * B12 | cmode * B8 |
2738          op * B5 | 0x1 * B4 | enc);
2739   } else {
2740     UNIMPLEMENTED();
2741   }
2742 }
2743 
vmov(const QwNeonRegister dst,uint64_t imm)2744 void Assembler::vmov(const QwNeonRegister dst, uint64_t imm) {
2745   uint32_t enc;
2746   uint8_t cmode;
2747   uint8_t op = 0;
2748   if (CpuFeatures::IsSupported(NEON) && FitsVmovIntImm(imm, &enc, &cmode)) {
2749     CpuFeatureScope scope(this, NEON);
2750     // Instruction details available in ARM DDI 0406C.b, A8-937.
2751     // 001i1(27-23) | D(22) | 000(21-19) | imm3(18-16) | Vd(15-12) | cmode(11-8)
2752     // | 0(7) | Q(6) | op(5) | 4(1) | imm4(3-0)
2753     int vd, d;
2754     dst.split_code(&vd, &d);
2755     emit(kSpecialCondition | 0x05 * B23 | d * B22 | vd * B12 | cmode * B8 |
2756          0x1 * B6 | op * B5 | 0x1 * B4 | enc);
2757   } else {
2758     UNIMPLEMENTED();
2759   }
2760 }
2761 
2762 // Only works for little endian floating point formats.
2763 // We don't support VFP on the mixed endian floating point platform.
FitsVmovFPImmediate(base::Double d,uint32_t * encoding)2764 static bool FitsVmovFPImmediate(base::Double d, uint32_t* encoding) {
2765   // VMOV can accept an immediate of the form:
2766   //
2767   //  +/- m * 2^(-n) where 16 <= m <= 31 and 0 <= n <= 7
2768   //
2769   // The immediate is encoded using an 8-bit quantity, comprised of two
2770   // 4-bit fields. For an 8-bit immediate of the form:
2771   //
2772   //  [abcdefgh]
2773   //
2774   // where a is the MSB and h is the LSB, an immediate 64-bit double can be
2775   // created of the form:
2776   //
2777   //  [aBbbbbbb,bbcdefgh,00000000,00000000,
2778   //      00000000,00000000,00000000,00000000]
2779   //
2780   // where B = ~b.
2781   //
2782 
2783   uint32_t lo, hi;
2784   DoubleAsTwoUInt32(d, &lo, &hi);
2785 
2786   // The most obvious constraint is the long block of zeroes.
2787   if ((lo != 0) || ((hi & 0xFFFF) != 0)) {
2788     return false;
2789   }
2790 
2791   // Bits 61:54 must be all clear or all set.
2792   if (((hi & 0x3FC00000) != 0) && ((hi & 0x3FC00000) != 0x3FC00000)) {
2793     return false;
2794   }
2795 
2796   // Bit 62 must be NOT bit 61.
2797   if (((hi ^ (hi << 1)) & (0x40000000)) == 0) {
2798     return false;
2799   }
2800 
2801   // Create the encoded immediate in the form:
2802   //  [00000000,0000abcd,00000000,0000efgh]
2803   *encoding = (hi >> 16) & 0xF;       // Low nybble.
2804   *encoding |= (hi >> 4) & 0x70000;   // Low three bits of the high nybble.
2805   *encoding |= (hi >> 12) & 0x80000;  // Top bit of the high nybble.
2806 
2807   return true;
2808 }
2809 
vmov(const SwVfpRegister dst,Float32 imm)2810 void Assembler::vmov(const SwVfpRegister dst, Float32 imm) {
2811   uint32_t enc;
2812   if (CpuFeatures::IsSupported(VFPv3) &&
2813       FitsVmovFPImmediate(base::Double(imm.get_scalar()), &enc)) {
2814     CpuFeatureScope scope(this, VFPv3);
2815     // The float can be encoded in the instruction.
2816     //
2817     // Sd = immediate
2818     // Instruction details available in ARM DDI 0406C.b, A8-936.
2819     // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | imm4H(19-16) |
2820     // Vd(15-12) | 101(11-9) | sz=0(8) | imm4L(3-0)
2821     int vd, d;
2822     dst.split_code(&vd, &d);
2823     emit(al | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | enc);
2824   } else {
2825     UseScratchRegisterScope temps(this);
2826     Register scratch = temps.Acquire();
2827     mov(scratch, Operand(imm.get_bits()));
2828     vmov(dst, scratch);
2829   }
2830 }
2831 
vmov(const DwVfpRegister dst,base::Double imm,const Register extra_scratch)2832 void Assembler::vmov(const DwVfpRegister dst, base::Double imm,
2833                      const Register extra_scratch) {
2834   DCHECK(VfpRegisterIsAvailable(dst));
2835   uint32_t enc;
2836   if (CpuFeatures::IsSupported(VFPv3) && FitsVmovFPImmediate(imm, &enc)) {
2837     CpuFeatureScope scope(this, VFPv3);
2838     // The double can be encoded in the instruction.
2839     //
2840     // Dd = immediate
2841     // Instruction details available in ARM DDI 0406C.b, A8-936.
2842     // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | imm4H(19-16) |
2843     // Vd(15-12) | 101(11-9) | sz=1(8) | imm4L(3-0)
2844     int vd, d;
2845     dst.split_code(&vd, &d);
2846     emit(al | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B8 |
2847          enc);
2848   } else {
2849     // Synthesise the double from ARM immediates.
2850     uint32_t lo, hi;
2851     DoubleAsTwoUInt32(imm, &lo, &hi);
2852     UseScratchRegisterScope temps(this);
2853     Register scratch = temps.Acquire();
2854 
2855     if (lo == hi) {
2856       // Move the low and high parts of the double to a D register in one
2857       // instruction.
2858       mov(scratch, Operand(lo));
2859       vmov(dst, scratch, scratch);
2860     } else if (extra_scratch == no_reg) {
2861       // We only have one spare scratch register.
2862       mov(scratch, Operand(lo));
2863       vmov(NeonS32, dst, 0, scratch);
2864       if (((lo & 0xFFFF) == (hi & 0xFFFF)) && CpuFeatures::IsSupported(ARMv7)) {
2865         CpuFeatureScope scope(this, ARMv7);
2866         movt(scratch, hi >> 16);
2867       } else {
2868         mov(scratch, Operand(hi));
2869       }
2870       vmov(NeonS32, dst, 1, scratch);
2871     } else {
2872       // Move the low and high parts of the double to a D register in one
2873       // instruction.
2874       mov(scratch, Operand(lo));
2875       mov(extra_scratch, Operand(hi));
2876       vmov(dst, scratch, extra_scratch);
2877     }
2878   }
2879 }
2880 
vmov(const SwVfpRegister dst,const SwVfpRegister src,const Condition cond)2881 void Assembler::vmov(const SwVfpRegister dst, const SwVfpRegister src,
2882                      const Condition cond) {
2883   // Sd = Sm
2884   // Instruction details available in ARM DDI 0406B, A8-642.
2885   int sd, d, sm, m;
2886   dst.split_code(&sd, &d);
2887   src.split_code(&sm, &m);
2888   emit(cond | 0xE * B24 | d * B22 | 0xB * B20 | sd * B12 | 0xA * B8 | B6 |
2889        m * B5 | sm);
2890 }
2891 
vmov(const DwVfpRegister dst,const DwVfpRegister src,const Condition cond)2892 void Assembler::vmov(const DwVfpRegister dst, const DwVfpRegister src,
2893                      const Condition cond) {
2894   // Dd = Dm
2895   // Instruction details available in ARM DDI 0406C.b, A8-938.
2896   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
2897   // 101(11-9) | sz=1(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
2898   DCHECK(VfpRegisterIsAvailable(dst));
2899   DCHECK(VfpRegisterIsAvailable(src));
2900   int vd, d;
2901   dst.split_code(&vd, &d);
2902   int vm, m;
2903   src.split_code(&vm, &m);
2904   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B8 | B6 |
2905        m * B5 | vm);
2906 }
2907 
vmov(const DwVfpRegister dst,const Register src1,const Register src2,const Condition cond)2908 void Assembler::vmov(const DwVfpRegister dst, const Register src1,
2909                      const Register src2, const Condition cond) {
2910   // Dm = <Rt,Rt2>.
2911   // Instruction details available in ARM DDI 0406C.b, A8-948.
2912   // cond(31-28) | 1100(27-24)| 010(23-21) | op=0(20) | Rt2(19-16) |
2913   // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm
2914   DCHECK(VfpRegisterIsAvailable(dst));
2915   DCHECK(src1 != pc && src2 != pc);
2916   int vm, m;
2917   dst.split_code(&vm, &m);
2918   emit(cond | 0xC * B24 | B22 | src2.code() * B16 | src1.code() * B12 |
2919        0xB * B8 | m * B5 | B4 | vm);
2920 }
2921 
vmov(const Register dst1,const Register dst2,const DwVfpRegister src,const Condition cond)2922 void Assembler::vmov(const Register dst1, const Register dst2,
2923                      const DwVfpRegister src, const Condition cond) {
2924   // <Rt,Rt2> = Dm.
2925   // Instruction details available in ARM DDI 0406C.b, A8-948.
2926   // cond(31-28) | 1100(27-24)| 010(23-21) | op=1(20) | Rt2(19-16) |
2927   // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm
2928   DCHECK(VfpRegisterIsAvailable(src));
2929   DCHECK(dst1 != pc && dst2 != pc);
2930   int vm, m;
2931   src.split_code(&vm, &m);
2932   emit(cond | 0xC * B24 | B22 | B20 | dst2.code() * B16 | dst1.code() * B12 |
2933        0xB * B8 | m * B5 | B4 | vm);
2934 }
2935 
vmov(const SwVfpRegister dst,const Register src,const Condition cond)2936 void Assembler::vmov(const SwVfpRegister dst, const Register src,
2937                      const Condition cond) {
2938   // Sn = Rt.
2939   // Instruction details available in ARM DDI 0406A, A8-642.
2940   // cond(31-28) | 1110(27-24)| 000(23-21) | op=0(20) | Vn(19-16) |
2941   // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0)
2942   DCHECK(src != pc);
2943   int sn, n;
2944   dst.split_code(&sn, &n);
2945   emit(cond | 0xE * B24 | sn * B16 | src.code() * B12 | 0xA * B8 | n * B7 | B4);
2946 }
2947 
vmov(const Register dst,const SwVfpRegister src,const Condition cond)2948 void Assembler::vmov(const Register dst, const SwVfpRegister src,
2949                      const Condition cond) {
2950   // Rt = Sn.
2951   // Instruction details available in ARM DDI 0406A, A8-642.
2952   // cond(31-28) | 1110(27-24)| 000(23-21) | op=1(20) | Vn(19-16) |
2953   // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0)
2954   DCHECK(dst != pc);
2955   int sn, n;
2956   src.split_code(&sn, &n);
2957   emit(cond | 0xE * B24 | B20 | sn * B16 | dst.code() * B12 | 0xA * B8 |
2958        n * B7 | B4);
2959 }
2960 
2961 // Type of data to read from or write to VFP register.
2962 // Used as specifier in generic vcvt instruction.
2963 enum VFPType { S32, U32, F32, F64 };
2964 
IsSignedVFPType(VFPType type)2965 static bool IsSignedVFPType(VFPType type) {
2966   switch (type) {
2967     case S32:
2968       return true;
2969     case U32:
2970       return false;
2971     default:
2972       UNREACHABLE();
2973   }
2974 }
2975 
IsIntegerVFPType(VFPType type)2976 static bool IsIntegerVFPType(VFPType type) {
2977   switch (type) {
2978     case S32:
2979     case U32:
2980       return true;
2981     case F32:
2982     case F64:
2983       return false;
2984     default:
2985       UNREACHABLE();
2986   }
2987 }
2988 
IsDoubleVFPType(VFPType type)2989 static bool IsDoubleVFPType(VFPType type) {
2990   switch (type) {
2991     case F32:
2992       return false;
2993     case F64:
2994       return true;
2995     default:
2996       UNREACHABLE();
2997   }
2998 }
2999 
3000 // Split five bit reg_code based on size of reg_type.
3001 //  32-bit register codes are Vm:M
3002 //  64-bit register codes are M:Vm
3003 // where Vm is four bits, and M is a single bit.
SplitRegCode(VFPType reg_type,int reg_code,int * vm,int * m)3004 static void SplitRegCode(VFPType reg_type, int reg_code, int* vm, int* m) {
3005   DCHECK((reg_code >= 0) && (reg_code <= 31));
3006   if (IsIntegerVFPType(reg_type) || !IsDoubleVFPType(reg_type)) {
3007     SwVfpRegister::split_code(reg_code, vm, m);
3008   } else {
3009     DwVfpRegister::split_code(reg_code, vm, m);
3010   }
3011 }
3012 
3013 // Encode vcvt.src_type.dst_type instruction.
EncodeVCVT(const VFPType dst_type,const int dst_code,const VFPType src_type,const int src_code,VFPConversionMode mode,const Condition cond)3014 static Instr EncodeVCVT(const VFPType dst_type, const int dst_code,
3015                         const VFPType src_type, const int src_code,
3016                         VFPConversionMode mode, const Condition cond) {
3017   DCHECK(src_type != dst_type);
3018   int D, Vd, M, Vm;
3019   SplitRegCode(src_type, src_code, &Vm, &M);
3020   SplitRegCode(dst_type, dst_code, &Vd, &D);
3021 
3022   if (IsIntegerVFPType(dst_type) || IsIntegerVFPType(src_type)) {
3023     // Conversion between IEEE floating point and 32-bit integer.
3024     // Instruction details available in ARM DDI 0406B, A8.6.295.
3025     // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 1(19) | opc2(18-16) |
3026     // Vd(15-12) | 101(11-9) | sz(8) | op(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3027     DCHECK(!IsIntegerVFPType(dst_type) || !IsIntegerVFPType(src_type));
3028 
3029     int sz, opc2, op;
3030 
3031     if (IsIntegerVFPType(dst_type)) {
3032       opc2 = IsSignedVFPType(dst_type) ? 0x5 : 0x4;
3033       sz = IsDoubleVFPType(src_type) ? 0x1 : 0x0;
3034       op = mode;
3035     } else {
3036       DCHECK(IsIntegerVFPType(src_type));
3037       opc2 = 0x0;
3038       sz = IsDoubleVFPType(dst_type) ? 0x1 : 0x0;
3039       op = IsSignedVFPType(src_type) ? 0x1 : 0x0;
3040     }
3041 
3042     return (cond | 0xE * B24 | B23 | D * B22 | 0x3 * B20 | B19 | opc2 * B16 |
3043             Vd * B12 | 0x5 * B9 | sz * B8 | op * B7 | B6 | M * B5 | Vm);
3044   } else {
3045     // Conversion between IEEE double and single precision.
3046     // Instruction details available in ARM DDI 0406B, A8.6.298.
3047     // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0111(19-16) |
3048     // Vd(15-12) | 101(11-9) | sz(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3049     int sz = IsDoubleVFPType(src_type) ? 0x1 : 0x0;
3050     return (cond | 0xE * B24 | B23 | D * B22 | 0x3 * B20 | 0x7 * B16 |
3051             Vd * B12 | 0x5 * B9 | sz * B8 | B7 | B6 | M * B5 | Vm);
3052   }
3053 }
3054 
vcvt_f64_s32(const DwVfpRegister dst,const SwVfpRegister src,VFPConversionMode mode,const Condition cond)3055 void Assembler::vcvt_f64_s32(const DwVfpRegister dst, const SwVfpRegister src,
3056                              VFPConversionMode mode, const Condition cond) {
3057   DCHECK(VfpRegisterIsAvailable(dst));
3058   emit(EncodeVCVT(F64, dst.code(), S32, src.code(), mode, cond));
3059 }
3060 
vcvt_f32_s32(const SwVfpRegister dst,const SwVfpRegister src,VFPConversionMode mode,const Condition cond)3061 void Assembler::vcvt_f32_s32(const SwVfpRegister dst, const SwVfpRegister src,
3062                              VFPConversionMode mode, const Condition cond) {
3063   emit(EncodeVCVT(F32, dst.code(), S32, src.code(), mode, cond));
3064 }
3065 
vcvt_f64_u32(const DwVfpRegister dst,const SwVfpRegister src,VFPConversionMode mode,const Condition cond)3066 void Assembler::vcvt_f64_u32(const DwVfpRegister dst, const SwVfpRegister src,
3067                              VFPConversionMode mode, const Condition cond) {
3068   DCHECK(VfpRegisterIsAvailable(dst));
3069   emit(EncodeVCVT(F64, dst.code(), U32, src.code(), mode, cond));
3070 }
3071 
vcvt_f32_u32(const SwVfpRegister dst,const SwVfpRegister src,VFPConversionMode mode,const Condition cond)3072 void Assembler::vcvt_f32_u32(const SwVfpRegister dst, const SwVfpRegister src,
3073                              VFPConversionMode mode, const Condition cond) {
3074   emit(EncodeVCVT(F32, dst.code(), U32, src.code(), mode, cond));
3075 }
3076 
vcvt_s32_f32(const SwVfpRegister dst,const SwVfpRegister src,VFPConversionMode mode,const Condition cond)3077 void Assembler::vcvt_s32_f32(const SwVfpRegister dst, const SwVfpRegister src,
3078                              VFPConversionMode mode, const Condition cond) {
3079   emit(EncodeVCVT(S32, dst.code(), F32, src.code(), mode, cond));
3080 }
3081 
vcvt_u32_f32(const SwVfpRegister dst,const SwVfpRegister src,VFPConversionMode mode,const Condition cond)3082 void Assembler::vcvt_u32_f32(const SwVfpRegister dst, const SwVfpRegister src,
3083                              VFPConversionMode mode, const Condition cond) {
3084   emit(EncodeVCVT(U32, dst.code(), F32, src.code(), mode, cond));
3085 }
3086 
vcvt_s32_f64(const SwVfpRegister dst,const DwVfpRegister src,VFPConversionMode mode,const Condition cond)3087 void Assembler::vcvt_s32_f64(const SwVfpRegister dst, const DwVfpRegister src,
3088                              VFPConversionMode mode, const Condition cond) {
3089   DCHECK(VfpRegisterIsAvailable(src));
3090   emit(EncodeVCVT(S32, dst.code(), F64, src.code(), mode, cond));
3091 }
3092 
vcvt_u32_f64(const SwVfpRegister dst,const DwVfpRegister src,VFPConversionMode mode,const Condition cond)3093 void Assembler::vcvt_u32_f64(const SwVfpRegister dst, const DwVfpRegister src,
3094                              VFPConversionMode mode, const Condition cond) {
3095   DCHECK(VfpRegisterIsAvailable(src));
3096   emit(EncodeVCVT(U32, dst.code(), F64, src.code(), mode, cond));
3097 }
3098 
vcvt_f64_f32(const DwVfpRegister dst,const SwVfpRegister src,VFPConversionMode mode,const Condition cond)3099 void Assembler::vcvt_f64_f32(const DwVfpRegister dst, const SwVfpRegister src,
3100                              VFPConversionMode mode, const Condition cond) {
3101   DCHECK(VfpRegisterIsAvailable(dst));
3102   emit(EncodeVCVT(F64, dst.code(), F32, src.code(), mode, cond));
3103 }
3104 
vcvt_f32_f64(const SwVfpRegister dst,const DwVfpRegister src,VFPConversionMode mode,const Condition cond)3105 void Assembler::vcvt_f32_f64(const SwVfpRegister dst, const DwVfpRegister src,
3106                              VFPConversionMode mode, const Condition cond) {
3107   DCHECK(VfpRegisterIsAvailable(src));
3108   emit(EncodeVCVT(F32, dst.code(), F64, src.code(), mode, cond));
3109 }
3110 
vcvt_f64_s32(const DwVfpRegister dst,int fraction_bits,const Condition cond)3111 void Assembler::vcvt_f64_s32(const DwVfpRegister dst, int fraction_bits,
3112                              const Condition cond) {
3113   // Instruction details available in ARM DDI 0406C.b, A8-874.
3114   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 1010(19-16) | Vd(15-12) |
3115   // 101(11-9) | sf=1(8) | sx=1(7) | 1(6) | i(5) | 0(4) | imm4(3-0)
3116   DCHECK(IsEnabled(VFPv3));
3117   DCHECK(VfpRegisterIsAvailable(dst));
3118   DCHECK(fraction_bits > 0 && fraction_bits <= 32);
3119   int vd, d;
3120   dst.split_code(&vd, &d);
3121   int imm5 = 32 - fraction_bits;
3122   int i = imm5 & 1;
3123   int imm4 = (imm5 >> 1) & 0xF;
3124   emit(cond | 0xE * B24 | B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 |
3125        vd * B12 | 0x5 * B9 | B8 | B7 | B6 | i * B5 | imm4);
3126 }
3127 
vneg(const DwVfpRegister dst,const DwVfpRegister src,const Condition cond)3128 void Assembler::vneg(const DwVfpRegister dst, const DwVfpRegister src,
3129                      const Condition cond) {
3130   // Instruction details available in ARM DDI 0406C.b, A8-968.
3131   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0001(19-16) | Vd(15-12) |
3132   // 101(11-9) | sz=1(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3133   DCHECK(VfpRegisterIsAvailable(dst));
3134   DCHECK(VfpRegisterIsAvailable(src));
3135   int vd, d;
3136   dst.split_code(&vd, &d);
3137   int vm, m;
3138   src.split_code(&vm, &m);
3139 
3140   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 |
3141        B8 | B6 | m * B5 | vm);
3142 }
3143 
vneg(const SwVfpRegister dst,const SwVfpRegister src,const Condition cond)3144 void Assembler::vneg(const SwVfpRegister dst, const SwVfpRegister src,
3145                      const Condition cond) {
3146   // Instruction details available in ARM DDI 0406C.b, A8-968.
3147   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0001(19-16) | Vd(15-12) |
3148   // 101(11-9) | sz=0(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3149   int vd, d;
3150   dst.split_code(&vd, &d);
3151   int vm, m;
3152   src.split_code(&vm, &m);
3153 
3154   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 |
3155        B6 | m * B5 | vm);
3156 }
3157 
vabs(const DwVfpRegister dst,const DwVfpRegister src,const Condition cond)3158 void Assembler::vabs(const DwVfpRegister dst, const DwVfpRegister src,
3159                      const Condition cond) {
3160   // Instruction details available in ARM DDI 0406C.b, A8-524.
3161   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
3162   // 101(11-9) | sz=1(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3163   DCHECK(VfpRegisterIsAvailable(dst));
3164   DCHECK(VfpRegisterIsAvailable(src));
3165   int vd, d;
3166   dst.split_code(&vd, &d);
3167   int vm, m;
3168   src.split_code(&vm, &m);
3169   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B8 | B7 |
3170        B6 | m * B5 | vm);
3171 }
3172 
vabs(const SwVfpRegister dst,const SwVfpRegister src,const Condition cond)3173 void Assembler::vabs(const SwVfpRegister dst, const SwVfpRegister src,
3174                      const Condition cond) {
3175   // Instruction details available in ARM DDI 0406C.b, A8-524.
3176   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
3177   // 101(11-9) | sz=0(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3178   int vd, d;
3179   dst.split_code(&vd, &d);
3180   int vm, m;
3181   src.split_code(&vm, &m);
3182   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B7 | B6 |
3183        m * B5 | vm);
3184 }
3185 
vadd(const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2,const Condition cond)3186 void Assembler::vadd(const DwVfpRegister dst, const DwVfpRegister src1,
3187                      const DwVfpRegister src2, const Condition cond) {
3188   // Dd = vadd(Dn, Dm) double precision floating point addition.
3189   // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
3190   // Instruction details available in ARM DDI 0406C.b, A8-830.
3191   // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
3192   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3193   DCHECK(VfpRegisterIsAvailable(dst));
3194   DCHECK(VfpRegisterIsAvailable(src1));
3195   DCHECK(VfpRegisterIsAvailable(src2));
3196   int vd, d;
3197   dst.split_code(&vd, &d);
3198   int vn, n;
3199   src1.split_code(&vn, &n);
3200   int vm, m;
3201   src2.split_code(&vm, &m);
3202   emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 |
3203        0x5 * B9 | B8 | n * B7 | m * B5 | vm);
3204 }
3205 
vadd(const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2,const Condition cond)3206 void Assembler::vadd(const SwVfpRegister dst, const SwVfpRegister src1,
3207                      const SwVfpRegister src2, const Condition cond) {
3208   // Sd = vadd(Sn, Sm) single precision floating point addition.
3209   // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
3210   // Instruction details available in ARM DDI 0406C.b, A8-830.
3211   // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
3212   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3213   int vd, d;
3214   dst.split_code(&vd, &d);
3215   int vn, n;
3216   src1.split_code(&vn, &n);
3217   int vm, m;
3218   src2.split_code(&vm, &m);
3219   emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 |
3220        0x5 * B9 | n * B7 | m * B5 | vm);
3221 }
3222 
vsub(const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2,const Condition cond)3223 void Assembler::vsub(const DwVfpRegister dst, const DwVfpRegister src1,
3224                      const DwVfpRegister src2, const Condition cond) {
3225   // Dd = vsub(Dn, Dm) double precision floating point subtraction.
3226   // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
3227   // Instruction details available in ARM DDI 0406C.b, A8-1086.
3228   // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
3229   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3230   DCHECK(VfpRegisterIsAvailable(dst));
3231   DCHECK(VfpRegisterIsAvailable(src1));
3232   DCHECK(VfpRegisterIsAvailable(src2));
3233   int vd, d;
3234   dst.split_code(&vd, &d);
3235   int vn, n;
3236   src1.split_code(&vn, &n);
3237   int vm, m;
3238   src2.split_code(&vm, &m);
3239   emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 |
3240        0x5 * B9 | B8 | n * B7 | B6 | m * B5 | vm);
3241 }
3242 
vsub(const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2,const Condition cond)3243 void Assembler::vsub(const SwVfpRegister dst, const SwVfpRegister src1,
3244                      const SwVfpRegister src2, const Condition cond) {
3245   // Sd = vsub(Sn, Sm) single precision floating point subtraction.
3246   // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
3247   // Instruction details available in ARM DDI 0406C.b, A8-1086.
3248   // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
3249   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3250   int vd, d;
3251   dst.split_code(&vd, &d);
3252   int vn, n;
3253   src1.split_code(&vn, &n);
3254   int vm, m;
3255   src2.split_code(&vm, &m);
3256   emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 |
3257        0x5 * B9 | n * B7 | B6 | m * B5 | vm);
3258 }
3259 
vmul(const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2,const Condition cond)3260 void Assembler::vmul(const DwVfpRegister dst, const DwVfpRegister src1,
3261                      const DwVfpRegister src2, const Condition cond) {
3262   // Dd = vmul(Dn, Dm) double precision floating point multiplication.
3263   // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
3264   // Instruction details available in ARM DDI 0406C.b, A8-960.
3265   // cond(31-28) | 11100(27-23)| D(22) | 10(21-20) | Vn(19-16) |
3266   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3267   DCHECK(VfpRegisterIsAvailable(dst));
3268   DCHECK(VfpRegisterIsAvailable(src1));
3269   DCHECK(VfpRegisterIsAvailable(src2));
3270   int vd, d;
3271   dst.split_code(&vd, &d);
3272   int vn, n;
3273   src1.split_code(&vn, &n);
3274   int vm, m;
3275   src2.split_code(&vm, &m);
3276   emit(cond | 0x1C * B23 | d * B22 | 0x2 * B20 | vn * B16 | vd * B12 |
3277        0x5 * B9 | B8 | n * B7 | m * B5 | vm);
3278 }
3279 
vmul(const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2,const Condition cond)3280 void Assembler::vmul(const SwVfpRegister dst, const SwVfpRegister src1,
3281                      const SwVfpRegister src2, const Condition cond) {
3282   // Sd = vmul(Sn, Sm) single precision floating point multiplication.
3283   // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
3284   // Instruction details available in ARM DDI 0406C.b, A8-960.
3285   // cond(31-28) | 11100(27-23)| D(22) | 10(21-20) | Vn(19-16) |
3286   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3287   int vd, d;
3288   dst.split_code(&vd, &d);
3289   int vn, n;
3290   src1.split_code(&vn, &n);
3291   int vm, m;
3292   src2.split_code(&vm, &m);
3293   emit(cond | 0x1C * B23 | d * B22 | 0x2 * B20 | vn * B16 | vd * B12 |
3294        0x5 * B9 | n * B7 | m * B5 | vm);
3295 }
3296 
vmla(const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2,const Condition cond)3297 void Assembler::vmla(const DwVfpRegister dst, const DwVfpRegister src1,
3298                      const DwVfpRegister src2, const Condition cond) {
3299   // Instruction details available in ARM DDI 0406C.b, A8-932.
3300   // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3301   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | op=0(6) | M(5) | 0(4) | Vm(3-0)
3302   DCHECK(VfpRegisterIsAvailable(dst));
3303   DCHECK(VfpRegisterIsAvailable(src1));
3304   DCHECK(VfpRegisterIsAvailable(src2));
3305   int vd, d;
3306   dst.split_code(&vd, &d);
3307   int vn, n;
3308   src1.split_code(&vn, &n);
3309   int vm, m;
3310   src2.split_code(&vm, &m);
3311   emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | B8 |
3312        n * B7 | m * B5 | vm);
3313 }
3314 
vmla(const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2,const Condition cond)3315 void Assembler::vmla(const SwVfpRegister dst, const SwVfpRegister src1,
3316                      const SwVfpRegister src2, const Condition cond) {
3317   // Instruction details available in ARM DDI 0406C.b, A8-932.
3318   // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3319   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | op=0(6) | M(5) | 0(4) | Vm(3-0)
3320   int vd, d;
3321   dst.split_code(&vd, &d);
3322   int vn, n;
3323   src1.split_code(&vn, &n);
3324   int vm, m;
3325   src2.split_code(&vm, &m);
3326   emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 |
3327        m * B5 | vm);
3328 }
3329 
vmls(const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2,const Condition cond)3330 void Assembler::vmls(const DwVfpRegister dst, const DwVfpRegister src1,
3331                      const DwVfpRegister src2, const Condition cond) {
3332   // Instruction details available in ARM DDI 0406C.b, A8-932.
3333   // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3334   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | op=1(6) | M(5) | 0(4) | Vm(3-0)
3335   DCHECK(VfpRegisterIsAvailable(dst));
3336   DCHECK(VfpRegisterIsAvailable(src1));
3337   DCHECK(VfpRegisterIsAvailable(src2));
3338   int vd, d;
3339   dst.split_code(&vd, &d);
3340   int vn, n;
3341   src1.split_code(&vn, &n);
3342   int vm, m;
3343   src2.split_code(&vm, &m);
3344   emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | B8 |
3345        n * B7 | B6 | m * B5 | vm);
3346 }
3347 
vmls(const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2,const Condition cond)3348 void Assembler::vmls(const SwVfpRegister dst, const SwVfpRegister src1,
3349                      const SwVfpRegister src2, const Condition cond) {
3350   // Instruction details available in ARM DDI 0406C.b, A8-932.
3351   // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3352   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | op=1(6) | M(5) | 0(4) | Vm(3-0)
3353   int vd, d;
3354   dst.split_code(&vd, &d);
3355   int vn, n;
3356   src1.split_code(&vn, &n);
3357   int vm, m;
3358   src2.split_code(&vm, &m);
3359   emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 |
3360        B6 | m * B5 | vm);
3361 }
3362 
vdiv(const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2,const Condition cond)3363 void Assembler::vdiv(const DwVfpRegister dst, const DwVfpRegister src1,
3364                      const DwVfpRegister src2, const Condition cond) {
3365   // Dd = vdiv(Dn, Dm) double precision floating point division.
3366   // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
3367   // Instruction details available in ARM DDI 0406C.b, A8-882.
3368   // cond(31-28) | 11101(27-23)| D(22) | 00(21-20) | Vn(19-16) |
3369   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3370   DCHECK(VfpRegisterIsAvailable(dst));
3371   DCHECK(VfpRegisterIsAvailable(src1));
3372   DCHECK(VfpRegisterIsAvailable(src2));
3373   int vd, d;
3374   dst.split_code(&vd, &d);
3375   int vn, n;
3376   src1.split_code(&vn, &n);
3377   int vm, m;
3378   src2.split_code(&vm, &m);
3379   emit(cond | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | B8 |
3380        n * B7 | m * B5 | vm);
3381 }
3382 
vdiv(const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2,const Condition cond)3383 void Assembler::vdiv(const SwVfpRegister dst, const SwVfpRegister src1,
3384                      const SwVfpRegister src2, const Condition cond) {
3385   // Sd = vdiv(Sn, Sm) single precision floating point division.
3386   // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
3387   // Instruction details available in ARM DDI 0406C.b, A8-882.
3388   // cond(31-28) | 11101(27-23)| D(22) | 00(21-20) | Vn(19-16) |
3389   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3390   int vd, d;
3391   dst.split_code(&vd, &d);
3392   int vn, n;
3393   src1.split_code(&vn, &n);
3394   int vm, m;
3395   src2.split_code(&vm, &m);
3396   emit(cond | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 |
3397        m * B5 | vm);
3398 }
3399 
vcmp(const DwVfpRegister src1,const DwVfpRegister src2,const Condition cond)3400 void Assembler::vcmp(const DwVfpRegister src1, const DwVfpRegister src2,
3401                      const Condition cond) {
3402   // vcmp(Dd, Dm) double precision floating point comparison.
3403   // Instruction details available in ARM DDI 0406C.b, A8-864.
3404   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0100(19-16) |
3405   // Vd(15-12) | 101(11-9) | sz=1(8) | E=0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3406   DCHECK(VfpRegisterIsAvailable(src1));
3407   DCHECK(VfpRegisterIsAvailable(src2));
3408   int vd, d;
3409   src1.split_code(&vd, &d);
3410   int vm, m;
3411   src2.split_code(&vm, &m);
3412   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x4 * B16 | vd * B12 |
3413        0x5 * B9 | B8 | B6 | m * B5 | vm);
3414 }
3415 
vcmp(const SwVfpRegister src1,const SwVfpRegister src2,const Condition cond)3416 void Assembler::vcmp(const SwVfpRegister src1, const SwVfpRegister src2,
3417                      const Condition cond) {
3418   // vcmp(Sd, Sm) single precision floating point comparison.
3419   // Instruction details available in ARM DDI 0406C.b, A8-864.
3420   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0100(19-16) |
3421   // Vd(15-12) | 101(11-9) | sz=0(8) | E=0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3422   int vd, d;
3423   src1.split_code(&vd, &d);
3424   int vm, m;
3425   src2.split_code(&vm, &m);
3426   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x4 * B16 | vd * B12 |
3427        0x5 * B9 | B6 | m * B5 | vm);
3428 }
3429 
vcmp(const DwVfpRegister src1,const double src2,const Condition cond)3430 void Assembler::vcmp(const DwVfpRegister src1, const double src2,
3431                      const Condition cond) {
3432   // vcmp(Dd, #0.0) double precision floating point comparison.
3433   // Instruction details available in ARM DDI 0406C.b, A8-864.
3434   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0101(19-16) |
3435   // Vd(15-12) | 101(11-9) | sz=1(8) | E=0(7) | 1(6) | 0(5) | 0(4) | 0000(3-0)
3436   DCHECK(VfpRegisterIsAvailable(src1));
3437   DCHECK_EQ(src2, 0.0);
3438   int vd, d;
3439   src1.split_code(&vd, &d);
3440   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x5 * B16 | vd * B12 |
3441        0x5 * B9 | B8 | B6);
3442 }
3443 
vcmp(const SwVfpRegister src1,const float src2,const Condition cond)3444 void Assembler::vcmp(const SwVfpRegister src1, const float src2,
3445                      const Condition cond) {
3446   // vcmp(Sd, #0.0) single precision floating point comparison.
3447   // Instruction details available in ARM DDI 0406C.b, A8-864.
3448   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0101(19-16) |
3449   // Vd(15-12) | 101(11-9) | sz=0(8) | E=0(7) | 1(6) | 0(5) | 0(4) | 0000(3-0)
3450   DCHECK_EQ(src2, 0.0);
3451   int vd, d;
3452   src1.split_code(&vd, &d);
3453   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x5 * B16 | vd * B12 |
3454        0x5 * B9 | B6);
3455 }
3456 
vmaxnm(const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2)3457 void Assembler::vmaxnm(const DwVfpRegister dst, const DwVfpRegister src1,
3458                        const DwVfpRegister src2) {
3459   // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3460   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3461   DCHECK(IsEnabled(ARMv8));
3462   int vd, d;
3463   dst.split_code(&vd, &d);
3464   int vn, n;
3465   src1.split_code(&vn, &n);
3466   int vm, m;
3467   src2.split_code(&vm, &m);
3468 
3469   emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
3470        0x5 * B9 | B8 | n * B7 | m * B5 | vm);
3471 }
3472 
vmaxnm(const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2)3473 void Assembler::vmaxnm(const SwVfpRegister dst, const SwVfpRegister src1,
3474                        const SwVfpRegister src2) {
3475   // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3476   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3477   DCHECK(IsEnabled(ARMv8));
3478   int vd, d;
3479   dst.split_code(&vd, &d);
3480   int vn, n;
3481   src1.split_code(&vn, &n);
3482   int vm, m;
3483   src2.split_code(&vm, &m);
3484 
3485   emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
3486        0x5 * B9 | n * B7 | m * B5 | vm);
3487 }
3488 
vminnm(const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2)3489 void Assembler::vminnm(const DwVfpRegister dst, const DwVfpRegister src1,
3490                        const DwVfpRegister src2) {
3491   // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3492   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3493   DCHECK(IsEnabled(ARMv8));
3494   int vd, d;
3495   dst.split_code(&vd, &d);
3496   int vn, n;
3497   src1.split_code(&vn, &n);
3498   int vm, m;
3499   src2.split_code(&vm, &m);
3500 
3501   emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
3502        0x5 * B9 | B8 | n * B7 | B6 | m * B5 | vm);
3503 }
3504 
vminnm(const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2)3505 void Assembler::vminnm(const SwVfpRegister dst, const SwVfpRegister src1,
3506                        const SwVfpRegister src2) {
3507   // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3508   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3509   DCHECK(IsEnabled(ARMv8));
3510   int vd, d;
3511   dst.split_code(&vd, &d);
3512   int vn, n;
3513   src1.split_code(&vn, &n);
3514   int vm, m;
3515   src2.split_code(&vm, &m);
3516 
3517   emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
3518        0x5 * B9 | n * B7 | B6 | m * B5 | vm);
3519 }
3520 
vsel(Condition cond,const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2)3521 void Assembler::vsel(Condition cond, const DwVfpRegister dst,
3522                      const DwVfpRegister src1, const DwVfpRegister src2) {
3523   // cond=kSpecialCondition(31-28) | 11100(27-23) | D(22) |
3524   // vsel_cond=XX(21-20) | Vn(19-16) | Vd(15-12) | 101(11-9) | sz=1(8) | N(7) |
3525   // 0(6) | M(5) | 0(4) | Vm(3-0)
3526   DCHECK(IsEnabled(ARMv8));
3527   int vd, d;
3528   dst.split_code(&vd, &d);
3529   int vn, n;
3530   src1.split_code(&vn, &n);
3531   int vm, m;
3532   src2.split_code(&vm, &m);
3533   int sz = 1;
3534 
3535   // VSEL has a special (restricted) condition encoding.
3536   //   eq(0b0000)... -> 0b00
3537   //   ge(0b1010)... -> 0b10
3538   //   gt(0b1100)... -> 0b11
3539   //   vs(0b0110)... -> 0b01
3540   // No other conditions are supported.
3541   int vsel_cond = (cond >> 30) & 0x3;
3542   if ((cond != eq) && (cond != ge) && (cond != gt) && (cond != vs)) {
3543     // We can implement some other conditions by swapping the inputs.
3544     DCHECK((cond == ne) | (cond == lt) | (cond == le) | (cond == vc));
3545     std::swap(vn, vm);
3546     std::swap(n, m);
3547   }
3548 
3549   emit(kSpecialCondition | 0x1C * B23 | d * B22 | vsel_cond * B20 | vn * B16 |
3550        vd * B12 | 0x5 * B9 | sz * B8 | n * B7 | m * B5 | vm);
3551 }
3552 
vsel(Condition cond,const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2)3553 void Assembler::vsel(Condition cond, const SwVfpRegister dst,
3554                      const SwVfpRegister src1, const SwVfpRegister src2) {
3555   // cond=kSpecialCondition(31-28) | 11100(27-23) | D(22) |
3556   // vsel_cond=XX(21-20) | Vn(19-16) | Vd(15-12) | 101(11-9) | sz=0(8) | N(7) |
3557   // 0(6) | M(5) | 0(4) | Vm(3-0)
3558   DCHECK(IsEnabled(ARMv8));
3559   int vd, d;
3560   dst.split_code(&vd, &d);
3561   int vn, n;
3562   src1.split_code(&vn, &n);
3563   int vm, m;
3564   src2.split_code(&vm, &m);
3565   int sz = 0;
3566 
3567   // VSEL has a special (restricted) condition encoding.
3568   //   eq(0b0000)... -> 0b00
3569   //   ge(0b1010)... -> 0b10
3570   //   gt(0b1100)... -> 0b11
3571   //   vs(0b0110)... -> 0b01
3572   // No other conditions are supported.
3573   int vsel_cond = (cond >> 30) & 0x3;
3574   if ((cond != eq) && (cond != ge) && (cond != gt) && (cond != vs)) {
3575     // We can implement some other conditions by swapping the inputs.
3576     DCHECK((cond == ne) | (cond == lt) | (cond == le) | (cond == vc));
3577     std::swap(vn, vm);
3578     std::swap(n, m);
3579   }
3580 
3581   emit(kSpecialCondition | 0x1C * B23 | d * B22 | vsel_cond * B20 | vn * B16 |
3582        vd * B12 | 0x5 * B9 | sz * B8 | n * B7 | m * B5 | vm);
3583 }
3584 
vsqrt(const DwVfpRegister dst,const DwVfpRegister src,const Condition cond)3585 void Assembler::vsqrt(const DwVfpRegister dst, const DwVfpRegister src,
3586                       const Condition cond) {
3587   // Instruction details available in ARM DDI 0406C.b, A8-1058.
3588   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0001(19-16) |
3589   // Vd(15-12) | 101(11-9) | sz=1(8) | 11(7-6) | M(5) | 0(4) | Vm(3-0)
3590   DCHECK(VfpRegisterIsAvailable(dst));
3591   DCHECK(VfpRegisterIsAvailable(src));
3592   int vd, d;
3593   dst.split_code(&vd, &d);
3594   int vm, m;
3595   src.split_code(&vm, &m);
3596   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 |
3597        B8 | 0x3 * B6 | m * B5 | vm);
3598 }
3599 
vsqrt(const SwVfpRegister dst,const SwVfpRegister src,const Condition cond)3600 void Assembler::vsqrt(const SwVfpRegister dst, const SwVfpRegister src,
3601                       const Condition cond) {
3602   // Instruction details available in ARM DDI 0406C.b, A8-1058.
3603   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0001(19-16) |
3604   // Vd(15-12) | 101(11-9) | sz=0(8) | 11(7-6) | M(5) | 0(4) | Vm(3-0)
3605   int vd, d;
3606   dst.split_code(&vd, &d);
3607   int vm, m;
3608   src.split_code(&vm, &m);
3609   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 |
3610        0x3 * B6 | m * B5 | vm);
3611 }
3612 
vmsr(Register dst,Condition cond)3613 void Assembler::vmsr(Register dst, Condition cond) {
3614   // Instruction details available in ARM DDI 0406A, A8-652.
3615   // cond(31-28) | 1110 (27-24) | 1110(23-20)| 0001 (19-16) |
3616   // Rt(15-12) | 1010 (11-8) | 0(7) | 00 (6-5) | 1(4) | 0000(3-0)
3617   emit(cond | 0xE * B24 | 0xE * B20 | B16 | dst.code() * B12 | 0xA * B8 | B4);
3618 }
3619 
vmrs(Register dst,Condition cond)3620 void Assembler::vmrs(Register dst, Condition cond) {
3621   // Instruction details available in ARM DDI 0406A, A8-652.
3622   // cond(31-28) | 1110 (27-24) | 1111(23-20)| 0001 (19-16) |
3623   // Rt(15-12) | 1010 (11-8) | 0(7) | 00 (6-5) | 1(4) | 0000(3-0)
3624   emit(cond | 0xE * B24 | 0xF * B20 | B16 | dst.code() * B12 | 0xA * B8 | B4);
3625 }
3626 
vrinta(const SwVfpRegister dst,const SwVfpRegister src)3627 void Assembler::vrinta(const SwVfpRegister dst, const SwVfpRegister src) {
3628   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3629   // 10(19-18) | RM=00(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
3630   // M(5) | 0(4) | Vm(3-0)
3631   DCHECK(IsEnabled(ARMv8));
3632   int vd, d;
3633   dst.split_code(&vd, &d);
3634   int vm, m;
3635   src.split_code(&vm, &m);
3636   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | vd * B12 |
3637        0x5 * B9 | B6 | m * B5 | vm);
3638 }
3639 
vrinta(const DwVfpRegister dst,const DwVfpRegister src)3640 void Assembler::vrinta(const DwVfpRegister dst, const DwVfpRegister src) {
3641   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3642   // 10(19-18) | RM=00(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
3643   // M(5) | 0(4) | Vm(3-0)
3644   DCHECK(IsEnabled(ARMv8));
3645   int vd, d;
3646   dst.split_code(&vd, &d);
3647   int vm, m;
3648   src.split_code(&vm, &m);
3649   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | vd * B12 |
3650        0x5 * B9 | B8 | B6 | m * B5 | vm);
3651 }
3652 
vrintn(const SwVfpRegister dst,const SwVfpRegister src)3653 void Assembler::vrintn(const SwVfpRegister dst, const SwVfpRegister src) {
3654   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3655   // 10(19-18) | RM=01(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
3656   // M(5) | 0(4) | Vm(3-0)
3657   DCHECK(IsEnabled(ARMv8));
3658   int vd, d;
3659   dst.split_code(&vd, &d);
3660   int vm, m;
3661   src.split_code(&vm, &m);
3662   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x1 * B16 |
3663        vd * B12 | 0x5 * B9 | B6 | m * B5 | vm);
3664 }
3665 
vrintn(const DwVfpRegister dst,const DwVfpRegister src)3666 void Assembler::vrintn(const DwVfpRegister dst, const DwVfpRegister src) {
3667   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3668   // 10(19-18) | RM=01(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
3669   // M(5) | 0(4) | Vm(3-0)
3670   DCHECK(IsEnabled(ARMv8));
3671   int vd, d;
3672   dst.split_code(&vd, &d);
3673   int vm, m;
3674   src.split_code(&vm, &m);
3675   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x1 * B16 |
3676        vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm);
3677 }
3678 
vrintp(const SwVfpRegister dst,const SwVfpRegister src)3679 void Assembler::vrintp(const SwVfpRegister dst, const SwVfpRegister src) {
3680   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3681   // 10(19-18) | RM=10(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
3682   // M(5) | 0(4) | Vm(3-0)
3683   DCHECK(IsEnabled(ARMv8));
3684   int vd, d;
3685   dst.split_code(&vd, &d);
3686   int vm, m;
3687   src.split_code(&vm, &m);
3688   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 |
3689        vd * B12 | 0x5 * B9 | B6 | m * B5 | vm);
3690 }
3691 
vrintp(const DwVfpRegister dst,const DwVfpRegister src)3692 void Assembler::vrintp(const DwVfpRegister dst, const DwVfpRegister src) {
3693   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3694   // 10(19-18) | RM=10(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
3695   // M(5) | 0(4) | Vm(3-0)
3696   DCHECK(IsEnabled(ARMv8));
3697   int vd, d;
3698   dst.split_code(&vd, &d);
3699   int vm, m;
3700   src.split_code(&vm, &m);
3701   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 |
3702        vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm);
3703 }
3704 
vrintm(const SwVfpRegister dst,const SwVfpRegister src)3705 void Assembler::vrintm(const SwVfpRegister dst, const SwVfpRegister src) {
3706   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3707   // 10(19-18) | RM=11(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
3708   // M(5) | 0(4) | Vm(3-0)
3709   DCHECK(IsEnabled(ARMv8));
3710   int vd, d;
3711   dst.split_code(&vd, &d);
3712   int vm, m;
3713   src.split_code(&vm, &m);
3714   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x3 * B16 |
3715        vd * B12 | 0x5 * B9 | B6 | m * B5 | vm);
3716 }
3717 
vrintm(const DwVfpRegister dst,const DwVfpRegister src)3718 void Assembler::vrintm(const DwVfpRegister dst, const DwVfpRegister src) {
3719   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3720   // 10(19-18) | RM=11(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
3721   // M(5) | 0(4) | Vm(3-0)
3722   DCHECK(IsEnabled(ARMv8));
3723   int vd, d;
3724   dst.split_code(&vd, &d);
3725   int vm, m;
3726   src.split_code(&vm, &m);
3727   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x3 * B16 |
3728        vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm);
3729 }
3730 
vrintz(const SwVfpRegister dst,const SwVfpRegister src,const Condition cond)3731 void Assembler::vrintz(const SwVfpRegister dst, const SwVfpRegister src,
3732                        const Condition cond) {
3733   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 011(19-17) | 0(16) |
3734   // Vd(15-12) | 101(11-9) | sz=0(8) | op=1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3735   DCHECK(IsEnabled(ARMv8));
3736   int vd, d;
3737   dst.split_code(&vd, &d);
3738   int vm, m;
3739   src.split_code(&vm, &m);
3740   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x3 * B17 | vd * B12 |
3741        0x5 * B9 | B7 | B6 | m * B5 | vm);
3742 }
3743 
vrintz(const DwVfpRegister dst,const DwVfpRegister src,const Condition cond)3744 void Assembler::vrintz(const DwVfpRegister dst, const DwVfpRegister src,
3745                        const Condition cond) {
3746   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 011(19-17) | 0(16) |
3747   // Vd(15-12) | 101(11-9) | sz=1(8) | op=1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3748   DCHECK(IsEnabled(ARMv8));
3749   int vd, d;
3750   dst.split_code(&vd, &d);
3751   int vm, m;
3752   src.split_code(&vm, &m);
3753   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x3 * B17 | vd * B12 |
3754        0x5 * B9 | B8 | B7 | B6 | m * B5 | vm);
3755 }
3756 
3757 // Support for NEON.
3758 
vld1(NeonSize size,const NeonListOperand & dst,const NeonMemOperand & src)3759 void Assembler::vld1(NeonSize size, const NeonListOperand& dst,
3760                      const NeonMemOperand& src) {
3761   // Instruction details available in ARM DDI 0406C.b, A8.8.320.
3762   // 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) |
3763   // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0)
3764   DCHECK(IsEnabled(NEON));
3765   int vd, d;
3766   dst.base().split_code(&vd, &d);
3767   emit(0xFU * B28 | 4 * B24 | d * B22 | 2 * B20 | src.rn().code() * B16 |
3768        vd * B12 | dst.type() * B8 | size * B6 | src.align() * B4 |
3769        src.rm().code());
3770 }
3771 
3772 // vld1s(ingle element to one lane).
vld1s(NeonSize size,const NeonListOperand & dst,uint8_t index,const NeonMemOperand & src)3773 void Assembler::vld1s(NeonSize size, const NeonListOperand& dst, uint8_t index,
3774                       const NeonMemOperand& src) {
3775   // Instruction details available in ARM DDI 0406C.b, A8.8.322.
3776   // 1111(31-28) | 01001(27-23) | D(22) | 10(21-20) | Rn(19-16) |
3777   // Vd(15-12) | size(11-10) | index_align(7-4) | Rm(3-0)
3778   // See vld1 (single element to all lanes) if size == 0x3, implemented as
3779   // vld1r(eplicate).
3780   DCHECK_NE(size, 0x3);
3781   // Check for valid lane indices.
3782   DCHECK_GT(1 << (3 - size), index);
3783   // Specifying alignment not supported, use standard alignment.
3784   uint8_t index_align = index << (size + 1);
3785 
3786   DCHECK(IsEnabled(NEON));
3787   int vd, d;
3788   dst.base().split_code(&vd, &d);
3789   emit(0xFU * B28 | 4 * B24 | 1 * B23 | d * B22 | 2 * B20 |
3790        src.rn().code() * B16 | vd * B12 | size * B10 | index_align * B4 |
3791        src.rm().code());
3792 }
3793 
3794 // vld1r(eplicate)
vld1r(NeonSize size,const NeonListOperand & dst,const NeonMemOperand & src)3795 void Assembler::vld1r(NeonSize size, const NeonListOperand& dst,
3796                       const NeonMemOperand& src) {
3797   DCHECK(IsEnabled(NEON));
3798   int vd, d;
3799   dst.base().split_code(&vd, &d);
3800   emit(0xFU * B28 | 4 * B24 | 1 * B23 | d * B22 | 2 * B20 |
3801        src.rn().code() * B16 | vd * B12 | 0xC * B8 | size * B6 |
3802        dst.length() * B5 | src.rm().code());
3803 }
3804 
vst1(NeonSize size,const NeonListOperand & src,const NeonMemOperand & dst)3805 void Assembler::vst1(NeonSize size, const NeonListOperand& src,
3806                      const NeonMemOperand& dst) {
3807   // Instruction details available in ARM DDI 0406C.b, A8.8.404.
3808   // 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) |
3809   // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0)
3810   DCHECK(IsEnabled(NEON));
3811   int vd, d;
3812   src.base().split_code(&vd, &d);
3813   emit(0xFU * B28 | 4 * B24 | d * B22 | dst.rn().code() * B16 | vd * B12 |
3814        src.type() * B8 | size * B6 | dst.align() * B4 | dst.rm().code());
3815 }
3816 
vst1s(NeonSize size,const NeonListOperand & src,uint8_t index,const NeonMemOperand & dst)3817 void Assembler::vst1s(NeonSize size, const NeonListOperand& src, uint8_t index,
3818                       const NeonMemOperand& dst) {
3819   // Instruction details available in ARM DDI 0487F.b F6.1.236.
3820   // 1111(31-28) | 01001(27-23) | D(22) | 00(21-20) | Rn(19-16) |
3821   // Vd(15-12) | size(11-10) | 00(9-8) | index_align(7-4) | Rm(3-0)
3822   DCHECK(IsEnabled(NEON));
3823   DCHECK_NE(size, 0x3);
3824   DCHECK_GT(1 << (3 - size), index);
3825   // Specifying alignment not supported, use standard alignment.
3826   uint8_t index_align = index << (size + 1);
3827   int vd, d;
3828   src.base().split_code(&vd, &d);
3829   emit(0xFU * B28 | 9 * B23 | d * B22 | dst.rn().code() * B16 | vd * B12 |
3830        size * B10 | index_align * B4 | dst.rm().code());
3831 }
3832 
vmovl(NeonDataType dt,QwNeonRegister dst,DwVfpRegister src)3833 void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) {
3834   // Instruction details available in ARM DDI 0406C.b, A8.8.346.
3835   // 1111(31-28) | 001(27-25) | U(24) | 1(23) | D(22) | imm3(21-19) |
3836   // 000(18-16) | Vd(15-12) | 101000(11-6) | M(5) | 1(4) | Vm(3-0)
3837   DCHECK(IsEnabled(NEON));
3838   int vd, d;
3839   dst.split_code(&vd, &d);
3840   int vm, m;
3841   src.split_code(&vm, &m);
3842   int U = NeonU(dt);
3843   int imm3 = 1 << NeonSz(dt);
3844   emit(0xFU * B28 | B25 | U * B24 | B23 | d * B22 | imm3 * B19 | vd * B12 |
3845        0xA * B8 | m * B5 | B4 | vm);
3846 }
3847 
vqmovn(NeonDataType dst_dt,NeonDataType src_dt,DwVfpRegister dst,QwNeonRegister src)3848 void Assembler::vqmovn(NeonDataType dst_dt, NeonDataType src_dt,
3849                        DwVfpRegister dst, QwNeonRegister src) {
3850   // Instruction details available in ARM DDI 0406C.b, A8.8.1004.
3851   // vqmovn.<type><size> Dd, Qm. ARM vector narrowing move with saturation.
3852   // vqmovun.<type><size> Dd, Qm. Same as above, but produces unsigned results.
3853   DCHECK(IsEnabled(NEON));
3854   DCHECK_IMPLIES(NeonU(src_dt), NeonU(dst_dt));
3855   int vd, d;
3856   dst.split_code(&vd, &d);
3857   int vm, m;
3858   src.split_code(&vm, &m);
3859   int size = NeonSz(dst_dt);
3860   DCHECK_NE(3, size);
3861   int op = NeonU(src_dt) ? 0b11 : NeonU(dst_dt) ? 0b01 : 0b10;
3862   emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | 0x2 * B16 | vd * B12 |
3863        0x2 * B8 | op * B6 | m * B5 | vm);
3864 }
3865 
EncodeScalar(NeonDataType dt,int index)3866 static int EncodeScalar(NeonDataType dt, int index) {
3867   int opc1_opc2 = 0;
3868   DCHECK_LE(0, index);
3869   switch (dt) {
3870     case NeonS8:
3871     case NeonU8:
3872       DCHECK_GT(8, index);
3873       opc1_opc2 = 0x8 | index;
3874       break;
3875     case NeonS16:
3876     case NeonU16:
3877       DCHECK_GT(4, index);
3878       opc1_opc2 = 0x1 | (index << 1);
3879       break;
3880     case NeonS32:
3881     case NeonU32:
3882       DCHECK_GT(2, index);
3883       opc1_opc2 = index << 2;
3884       break;
3885     default:
3886       UNREACHABLE();
3887   }
3888   return (opc1_opc2 >> 2) * B21 | (opc1_opc2 & 0x3) * B5;
3889 }
3890 
vmov(NeonDataType dt,DwVfpRegister dst,int index,Register src)3891 void Assembler::vmov(NeonDataType dt, DwVfpRegister dst, int index,
3892                      Register src) {
3893   // Instruction details available in ARM DDI 0406C.b, A8.8.940.
3894   // vmov ARM core register to scalar.
3895   DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON));
3896   int vd, d;
3897   dst.split_code(&vd, &d);
3898   int opc1_opc2 = EncodeScalar(dt, index);
3899   emit(0xEEu * B24 | vd * B16 | src.code() * B12 | 0xB * B8 | d * B7 | B4 |
3900        opc1_opc2);
3901 }
3902 
vmov(NeonDataType dt,Register dst,DwVfpRegister src,int index)3903 void Assembler::vmov(NeonDataType dt, Register dst, DwVfpRegister src,
3904                      int index) {
3905   // Instruction details available in ARM DDI 0406C.b, A8.8.942.
3906   // vmov Arm scalar to core register.
3907   DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON));
3908   int vn, n;
3909   src.split_code(&vn, &n);
3910   int opc1_opc2 = EncodeScalar(dt, index);
3911   // NeonS32 and NeonU32 both encoded as u = 0.
3912   int u = NeonDataTypeToSize(dt) == Neon32 ? 0 : NeonU(dt);
3913   emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 |
3914        n * B7 | B4 | opc1_opc2);
3915 }
3916 
vmov(QwNeonRegister dst,QwNeonRegister src)3917 void Assembler::vmov(QwNeonRegister dst, QwNeonRegister src) {
3918   // Instruction details available in ARM DDI 0406C.b, A8-938.
3919   // vmov is encoded as vorr.
3920   vorr(dst, src, src);
3921 }
3922 
vdup(NeonSize size,QwNeonRegister dst,Register src)3923 void Assembler::vdup(NeonSize size, QwNeonRegister dst, Register src) {
3924   DCHECK(IsEnabled(NEON));
3925   // Instruction details available in ARM DDI 0406C.b, A8-886.
3926   int B = 0, E = 0;
3927   switch (size) {
3928     case Neon8:
3929       B = 1;
3930       break;
3931     case Neon16:
3932       E = 1;
3933       break;
3934     case Neon32:
3935       break;
3936     default:
3937       UNREACHABLE();
3938   }
3939   int vd, d;
3940   dst.split_code(&vd, &d);
3941 
3942   emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 |
3943        0xB * B8 | d * B7 | E * B5 | B4);
3944 }
3945 
3946 enum NeonRegType { NEON_D, NEON_Q };
3947 
NeonSplitCode(NeonRegType type,int code,int * vm,int * m,int * encoding)3948 void NeonSplitCode(NeonRegType type, int code, int* vm, int* m, int* encoding) {
3949   if (type == NEON_D) {
3950     DwVfpRegister::split_code(code, vm, m);
3951   } else {
3952     DCHECK_EQ(type, NEON_Q);
3953     QwNeonRegister::split_code(code, vm, m);
3954     *encoding |= B6;
3955   }
3956 }
3957 
EncodeNeonDupOp(NeonSize size,NeonRegType reg_type,int dst_code,DwVfpRegister src,int index)3958 static Instr EncodeNeonDupOp(NeonSize size, NeonRegType reg_type, int dst_code,
3959                              DwVfpRegister src, int index) {
3960   DCHECK_NE(Neon64, size);
3961   int sz = static_cast<int>(size);
3962   DCHECK_LE(0, index);
3963   DCHECK_GT(kSimd128Size / (1 << sz), index);
3964   int imm4 = (1 << sz) | ((index << (sz + 1)) & 0xF);
3965   int qbit = 0;
3966   int vd, d;
3967   NeonSplitCode(reg_type, dst_code, &vd, &d, &qbit);
3968   int vm, m;
3969   src.split_code(&vm, &m);
3970 
3971   return 0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 |
3972          0x18 * B7 | qbit | m * B5 | vm;
3973 }
3974 
vdup(NeonSize size,DwVfpRegister dst,DwVfpRegister src,int index)3975 void Assembler::vdup(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
3976                      int index) {
3977   DCHECK(IsEnabled(NEON));
3978   // Instruction details available in ARM DDI 0406C.b, A8-884.
3979   emit(EncodeNeonDupOp(size, NEON_D, dst.code(), src, index));
3980 }
3981 
vdup(NeonSize size,QwNeonRegister dst,DwVfpRegister src,int index)3982 void Assembler::vdup(NeonSize size, QwNeonRegister dst, DwVfpRegister src,
3983                      int index) {
3984   // Instruction details available in ARM DDI 0406C.b, A8-884.
3985   DCHECK(IsEnabled(NEON));
3986   emit(EncodeNeonDupOp(size, NEON_Q, dst.code(), src, index));
3987 }
3988 
3989 // Encode NEON vcvt.src_type.dst_type instruction.
EncodeNeonVCVT(VFPType dst_type,QwNeonRegister dst,VFPType src_type,QwNeonRegister src)3990 static Instr EncodeNeonVCVT(VFPType dst_type, QwNeonRegister dst,
3991                             VFPType src_type, QwNeonRegister src) {
3992   DCHECK(src_type != dst_type);
3993   DCHECK(src_type == F32 || dst_type == F32);
3994   // Instruction details available in ARM DDI 0406C.b, A8.8.868.
3995   int vd, d;
3996   dst.split_code(&vd, &d);
3997   int vm, m;
3998   src.split_code(&vm, &m);
3999 
4000   int op = 0;
4001   if (src_type == F32) {
4002     DCHECK(dst_type == S32 || dst_type == U32);
4003     op = dst_type == U32 ? 3 : 2;
4004   } else {
4005     DCHECK(src_type == S32 || src_type == U32);
4006     op = src_type == U32 ? 1 : 0;
4007   }
4008 
4009   return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 |
4010          B6 | m * B5 | vm;
4011 }
4012 
vcvt_f32_s32(QwNeonRegister dst,QwNeonRegister src)4013 void Assembler::vcvt_f32_s32(QwNeonRegister dst, QwNeonRegister src) {
4014   DCHECK(IsEnabled(NEON));
4015   DCHECK(VfpRegisterIsAvailable(dst));
4016   DCHECK(VfpRegisterIsAvailable(src));
4017   emit(EncodeNeonVCVT(F32, dst, S32, src));
4018 }
4019 
vcvt_f32_u32(QwNeonRegister dst,QwNeonRegister src)4020 void Assembler::vcvt_f32_u32(QwNeonRegister dst, QwNeonRegister src) {
4021   DCHECK(IsEnabled(NEON));
4022   DCHECK(VfpRegisterIsAvailable(dst));
4023   DCHECK(VfpRegisterIsAvailable(src));
4024   emit(EncodeNeonVCVT(F32, dst, U32, src));
4025 }
4026 
vcvt_s32_f32(QwNeonRegister dst,QwNeonRegister src)4027 void Assembler::vcvt_s32_f32(QwNeonRegister dst, QwNeonRegister src) {
4028   DCHECK(IsEnabled(NEON));
4029   DCHECK(VfpRegisterIsAvailable(dst));
4030   DCHECK(VfpRegisterIsAvailable(src));
4031   emit(EncodeNeonVCVT(S32, dst, F32, src));
4032 }
4033 
vcvt_u32_f32(QwNeonRegister dst,QwNeonRegister src)4034 void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) {
4035   DCHECK(IsEnabled(NEON));
4036   DCHECK(VfpRegisterIsAvailable(dst));
4037   DCHECK(VfpRegisterIsAvailable(src));
4038   emit(EncodeNeonVCVT(U32, dst, F32, src));
4039 }
4040 
4041 enum UnaryOp {
4042   VMVN,
4043   VSWP,
4044   VABS,
4045   VABSF,
4046   VNEG,
4047   VNEGF,
4048   VRINTM,
4049   VRINTN,
4050   VRINTP,
4051   VRINTZ,
4052   VZIP,
4053   VUZP,
4054   VREV16,
4055   VREV32,
4056   VREV64,
4057   VTRN,
4058   VRECPE,
4059   VRSQRTE,
4060   VPADAL_S,
4061   VPADAL_U,
4062   VPADDL_S,
4063   VPADDL_U,
4064   VCEQ0,
4065   VCLT0,
4066   VCNT
4067 };
4068 
4069 // Encoding helper for "Advanced SIMD two registers misc" decode group. See ARM
4070 // DDI 0487F.b, F4-4228.
EncodeNeonUnaryOp(UnaryOp op,NeonRegType reg_type,NeonSize size,int dst_code,int src_code)4071 static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
4072                                int dst_code, int src_code) {
4073   int op_encoding = 0;
4074   switch (op) {
4075     case VMVN:
4076       DCHECK_EQ(Neon8, size);  // size == 0 for vmvn
4077       op_encoding = B10 | 0x3 * B7;
4078       break;
4079     case VSWP:
4080       DCHECK_EQ(Neon8, size);  // size == 0 for vswp
4081       op_encoding = B17;
4082       break;
4083     case VABS:
4084       op_encoding = B16 | 0x6 * B7;
4085       break;
4086     case VABSF:
4087       DCHECK_EQ(Neon32, size);
4088       op_encoding = B16 | B10 | 0x6 * B7;
4089       break;
4090     case VNEG:
4091       op_encoding = B16 | 0x7 * B7;
4092       break;
4093     case VNEGF:
4094       DCHECK_EQ(Neon32, size);
4095       op_encoding = B16 | B10 | 0x7 * B7;
4096       break;
4097     case VRINTM:
4098       op_encoding = B17 | 0xD * B7;
4099       break;
4100     case VRINTN:
4101       op_encoding = B17 | 0x8 * B7;
4102       break;
4103     case VRINTP:
4104       op_encoding = B17 | 0xF * B7;
4105       break;
4106     case VRINTZ:
4107       op_encoding = B17 | 0xB * B7;
4108       break;
4109     case VZIP:
4110       op_encoding = 0x2 * B16 | 0x3 * B7;
4111       break;
4112     case VUZP:
4113       op_encoding = 0x2 * B16 | 0x2 * B7;
4114       break;
4115     case VREV16:
4116       op_encoding = 0x2 * B7;
4117       break;
4118     case VREV32:
4119       op_encoding = 0x1 * B7;
4120       break;
4121     case VREV64:
4122       // op_encoding is 0;
4123       break;
4124     case VTRN:
4125       op_encoding = 0x2 * B16 | B7;
4126       break;
4127     case VRECPE:
4128       // Only support floating point.
4129       op_encoding = 0x3 * B16 | 0xA * B7;
4130       break;
4131     case VRSQRTE:
4132       // Only support floating point.
4133       op_encoding = 0x3 * B16 | 0xB * B7;
4134       break;
4135     case VPADAL_S:
4136       op_encoding = 0xC * B7;
4137       break;
4138     case VPADAL_U:
4139       op_encoding = 0xD * B7;
4140       break;
4141     case VPADDL_S:
4142       op_encoding = 0x4 * B7;
4143       break;
4144     case VPADDL_U:
4145       op_encoding = 0x5 * B7;
4146       break;
4147     case VCEQ0:
4148       // Only support integers.
4149       op_encoding = 0x1 * B16 | 0x2 * B7;
4150       break;
4151     case VCLT0:
4152       // Only support signed integers.
4153       op_encoding = 0x1 * B16 | 0x4 * B7;
4154       break;
4155     case VCNT:
4156       op_encoding = 0xA * B7;
4157       break;
4158   }
4159   int vd, d;
4160   NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
4161   int vm, m;
4162   NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
4163 
4164   return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | vd * B12 | m * B5 |
4165          vm | op_encoding;
4166 }
4167 
vmvn(QwNeonRegister dst,QwNeonRegister src)4168 void Assembler::vmvn(QwNeonRegister dst, QwNeonRegister src) {
4169   // Qd = vmvn(Qn, Qm) SIMD bitwise negate.
4170   // Instruction details available in ARM DDI 0406C.b, A8-966.
4171   DCHECK(IsEnabled(NEON));
4172   emit(EncodeNeonUnaryOp(VMVN, NEON_Q, Neon8, dst.code(), src.code()));
4173 }
4174 
vswp(DwVfpRegister dst,DwVfpRegister src)4175 void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) {
4176   DCHECK(IsEnabled(NEON));
4177   // Dd = vswp(Dn, Dm) SIMD d-register swap.
4178   // Instruction details available in ARM DDI 0406C.b, A8.8.418.
4179   DCHECK(IsEnabled(NEON));
4180   emit(EncodeNeonUnaryOp(VSWP, NEON_D, Neon8, dst.code(), src.code()));
4181 }
4182 
vswp(QwNeonRegister dst,QwNeonRegister src)4183 void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) {
4184   // Qd = vswp(Qn, Qm) SIMD q-register swap.
4185   // Instruction details available in ARM DDI 0406C.b, A8.8.418.
4186   DCHECK(IsEnabled(NEON));
4187   emit(EncodeNeonUnaryOp(VSWP, NEON_Q, Neon8, dst.code(), src.code()));
4188 }
4189 
vabs(QwNeonRegister dst,QwNeonRegister src)4190 void Assembler::vabs(QwNeonRegister dst, QwNeonRegister src) {
4191   // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value.
4192   // Instruction details available in ARM DDI 0406C.b, A8.8.824.
4193   DCHECK(IsEnabled(NEON));
4194   emit(EncodeNeonUnaryOp(VABSF, NEON_Q, Neon32, dst.code(), src.code()));
4195 }
4196 
vabs(NeonSize size,QwNeonRegister dst,QwNeonRegister src)4197 void Assembler::vabs(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
4198   // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value.
4199   // Instruction details available in ARM DDI 0406C.b, A8.8.824.
4200   DCHECK(IsEnabled(NEON));
4201   emit(EncodeNeonUnaryOp(VABS, NEON_Q, size, dst.code(), src.code()));
4202 }
4203 
vneg(QwNeonRegister dst,QwNeonRegister src)4204 void Assembler::vneg(QwNeonRegister dst, QwNeonRegister src) {
4205   // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate.
4206   // Instruction details available in ARM DDI 0406C.b, A8.8.968.
4207   DCHECK(IsEnabled(NEON));
4208   emit(EncodeNeonUnaryOp(VNEGF, NEON_Q, Neon32, dst.code(), src.code()));
4209 }
4210 
vneg(NeonSize size,QwNeonRegister dst,QwNeonRegister src)4211 void Assembler::vneg(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
4212   // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate.
4213   // Instruction details available in ARM DDI 0406C.b, A8.8.968.
4214   DCHECK(IsEnabled(NEON));
4215   emit(EncodeNeonUnaryOp(VNEG, NEON_Q, size, dst.code(), src.code()));
4216 }
4217 
4218 enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN };
4219 
EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op,NeonRegType reg_type,int dst_code,int src_code1,int src_code2)4220 static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, NeonRegType reg_type,
4221                                        int dst_code, int src_code1,
4222                                        int src_code2) {
4223   int op_encoding = 0;
4224   switch (op) {
4225     case VBIC:
4226       op_encoding = 0x1 * B20;
4227       break;
4228     case VBIF:
4229       op_encoding = B24 | 0x3 * B20;
4230       break;
4231     case VBIT:
4232       op_encoding = B24 | 0x2 * B20;
4233       break;
4234     case VBSL:
4235       op_encoding = B24 | 0x1 * B20;
4236       break;
4237     case VEOR:
4238       op_encoding = B24;
4239       break;
4240     case VORR:
4241       op_encoding = 0x2 * B20;
4242       break;
4243     case VORN:
4244       op_encoding = 0x3 * B20;
4245       break;
4246     case VAND:
4247       // op_encoding is 0.
4248       break;
4249     default:
4250       UNREACHABLE();
4251   }
4252   int vd, d;
4253   NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
4254   int vn, n;
4255   NeonSplitCode(reg_type, src_code1, &vn, &n, &op_encoding);
4256   int vm, m;
4257   NeonSplitCode(reg_type, src_code2, &vm, &m, &op_encoding);
4258 
4259   return 0x1E4U * B23 | op_encoding | d * B22 | vn * B16 | vd * B12 | B8 |
4260          n * B7 | m * B5 | B4 | vm;
4261 }
4262 
vand(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4263 void Assembler::vand(QwNeonRegister dst, QwNeonRegister src1,
4264                      QwNeonRegister src2) {
4265   // Qd = vand(Qn, Qm) SIMD AND.
4266   // Instruction details available in ARM DDI 0406C.b, A8.8.836.
4267   DCHECK(IsEnabled(NEON));
4268   emit(EncodeNeonBinaryBitwiseOp(VAND, NEON_Q, dst.code(), src1.code(),
4269                                  src2.code()));
4270 }
4271 
vbic(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4272 void Assembler::vbic(QwNeonRegister dst, QwNeonRegister src1,
4273                      QwNeonRegister src2) {
4274   // Qd = vbic(Qn, Qm) SIMD AND.
4275   // Instruction details available in ARM DDI 0406C.b, A8-840.
4276   DCHECK(IsEnabled(NEON));
4277   emit(EncodeNeonBinaryBitwiseOp(VBIC, NEON_Q, dst.code(), src1.code(),
4278                                  src2.code()));
4279 }
4280 
vbsl(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4281 void Assembler::vbsl(QwNeonRegister dst, QwNeonRegister src1,
4282                      QwNeonRegister src2) {
4283   // Qd = vbsl(Qn, Qm) SIMD bitwise select.
4284   // Instruction details available in ARM DDI 0406C.b, A8-844.
4285   DCHECK(IsEnabled(NEON));
4286   emit(EncodeNeonBinaryBitwiseOp(VBSL, NEON_Q, dst.code(), src1.code(),
4287                                  src2.code()));
4288 }
4289 
veor(DwVfpRegister dst,DwVfpRegister src1,DwVfpRegister src2)4290 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
4291                      DwVfpRegister src2) {
4292   // Dd = veor(Dn, Dm) SIMD exclusive OR.
4293   // Instruction details available in ARM DDI 0406C.b, A8.8.888.
4294   DCHECK(IsEnabled(NEON));
4295   emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_D, dst.code(), src1.code(),
4296                                  src2.code()));
4297 }
4298 
veor(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4299 void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1,
4300                      QwNeonRegister src2) {
4301   // Qd = veor(Qn, Qm) SIMD exclusive OR.
4302   // Instruction details available in ARM DDI 0406C.b, A8.8.888.
4303   DCHECK(IsEnabled(NEON));
4304   emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_Q, dst.code(), src1.code(),
4305                                  src2.code()));
4306 }
4307 
vorr(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4308 void Assembler::vorr(QwNeonRegister dst, QwNeonRegister src1,
4309                      QwNeonRegister src2) {
4310   // Qd = vorr(Qn, Qm) SIMD OR.
4311   // Instruction details available in ARM DDI 0406C.b, A8.8.976.
4312   DCHECK(IsEnabled(NEON));
4313   emit(EncodeNeonBinaryBitwiseOp(VORR, NEON_Q, dst.code(), src1.code(),
4314                                  src2.code()));
4315 }
4316 
vorn(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4317 void Assembler::vorn(QwNeonRegister dst, QwNeonRegister src1,
4318                      QwNeonRegister src2) {
4319   // Qd = vorn(Qn, Qm) SIMD OR NOT.
4320   // Instruction details available in ARM DDI 0406C.d, A8.8.359.
4321   DCHECK(IsEnabled(NEON));
4322   emit(EncodeNeonBinaryBitwiseOp(VORN, NEON_Q, dst.code(), src1.code(),
4323                                  src2.code()));
4324 }
4325 
4326 enum FPBinOp {
4327   VADDF,
4328   VSUBF,
4329   VMULF,
4330   VMINF,
4331   VMAXF,
4332   VRECPS,
4333   VRSQRTS,
4334   VCEQF,
4335   VCGEF,
4336   VCGTF
4337 };
4338 
EncodeNeonBinOp(FPBinOp op,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4339 static Instr EncodeNeonBinOp(FPBinOp op, QwNeonRegister dst,
4340                              QwNeonRegister src1, QwNeonRegister src2) {
4341   int op_encoding = 0;
4342   switch (op) {
4343     case VADDF:
4344       op_encoding = 0xD * B8;
4345       break;
4346     case VSUBF:
4347       op_encoding = B21 | 0xD * B8;
4348       break;
4349     case VMULF:
4350       op_encoding = B24 | 0xD * B8 | B4;
4351       break;
4352     case VMINF:
4353       op_encoding = B21 | 0xF * B8;
4354       break;
4355     case VMAXF:
4356       op_encoding = 0xF * B8;
4357       break;
4358     case VRECPS:
4359       op_encoding = 0xF * B8 | B4;
4360       break;
4361     case VRSQRTS:
4362       op_encoding = B21 | 0xF * B8 | B4;
4363       break;
4364     case VCEQF:
4365       op_encoding = 0xE * B8;
4366       break;
4367     case VCGEF:
4368       op_encoding = B24 | 0xE * B8;
4369       break;
4370     case VCGTF:
4371       op_encoding = B24 | B21 | 0xE * B8;
4372       break;
4373     default:
4374       UNREACHABLE();
4375   }
4376   int vd, d;
4377   dst.split_code(&vd, &d);
4378   int vn, n;
4379   src1.split_code(&vn, &n);
4380   int vm, m;
4381   src2.split_code(&vm, &m);
4382   return 0x1E4U * B23 | d * B22 | vn * B16 | vd * B12 | n * B7 | B6 | m * B5 |
4383          vm | op_encoding;
4384 }
4385 
4386 enum IntegerBinOp {
4387   VADD,
4388   VQADD,
4389   VSUB,
4390   VQSUB,
4391   VMUL,
4392   VMIN,
4393   VMAX,
4394   VTST,
4395   VCEQ,
4396   VCGE,
4397   VCGT,
4398   VRHADD,
4399   VQRDMULH
4400 };
4401 
EncodeNeonBinOp(IntegerBinOp op,NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4402 static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,
4403                              QwNeonRegister dst, QwNeonRegister src1,
4404                              QwNeonRegister src2) {
4405   int op_encoding = 0;
4406   switch (op) {
4407     case VADD:
4408       op_encoding = 0x8 * B8;
4409       break;
4410     case VQADD:
4411       op_encoding = B4;
4412       break;
4413     case VSUB:
4414       op_encoding = B24 | 0x8 * B8;
4415       break;
4416     case VQSUB:
4417       op_encoding = 0x2 * B8 | B4;
4418       break;
4419     case VMUL:
4420       op_encoding = 0x9 * B8 | B4;
4421       break;
4422     case VMIN:
4423       op_encoding = 0x6 * B8 | B4;
4424       break;
4425     case VMAX:
4426       op_encoding = 0x6 * B8;
4427       break;
4428     case VTST:
4429       op_encoding = 0x8 * B8 | B4;
4430       break;
4431     case VCEQ:
4432       op_encoding = B24 | 0x8 * B8 | B4;
4433       break;
4434     case VCGE:
4435       op_encoding = 0x3 * B8 | B4;
4436       break;
4437     case VCGT:
4438       op_encoding = 0x3 * B8;
4439       break;
4440     case VRHADD:
4441       op_encoding = B8;
4442       break;
4443     case VQRDMULH:
4444       op_encoding = B24 | 0xB * B8;
4445       break;
4446     default:
4447       UNREACHABLE();
4448   }
4449   int vd, d;
4450   dst.split_code(&vd, &d);
4451   int vn, n;
4452   src1.split_code(&vn, &n);
4453   int vm, m;
4454   src2.split_code(&vm, &m);
4455   int size = NeonSz(dt);
4456   int u = NeonU(dt);
4457   return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 |
4458          n * B7 | B6 | m * B5 | vm | op_encoding;
4459 }
4460 
EncodeNeonBinOp(IntegerBinOp op,NeonSize size,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4461 static Instr EncodeNeonBinOp(IntegerBinOp op, NeonSize size, QwNeonRegister dst,
4462                              QwNeonRegister src1, QwNeonRegister src2) {
4463   // Map NeonSize values to the signed values in NeonDataType, so the U bit
4464   // will be 0.
4465   return EncodeNeonBinOp(op, static_cast<NeonDataType>(size), dst, src1, src2);
4466 }
4467 
vadd(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4468 void Assembler::vadd(QwNeonRegister dst, QwNeonRegister src1,
4469                      QwNeonRegister src2) {
4470   DCHECK(IsEnabled(NEON));
4471   // Qd = vadd(Qn, Qm) SIMD floating point addition.
4472   // Instruction details available in ARM DDI 0406C.b, A8-830.
4473   emit(EncodeNeonBinOp(VADDF, dst, src1, src2));
4474 }
4475 
vadd(NeonSize size,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4476 void Assembler::vadd(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4477                      QwNeonRegister src2) {
4478   DCHECK(IsEnabled(NEON));
4479   // Qd = vadd(Qn, Qm) SIMD integer addition.
4480   // Instruction details available in ARM DDI 0406C.b, A8-828.
4481   emit(EncodeNeonBinOp(VADD, size, dst, src1, src2));
4482 }
4483 
vqadd(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4484 void Assembler::vqadd(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4485                       QwNeonRegister src2) {
4486   DCHECK(IsEnabled(NEON));
4487   // Qd = vqadd(Qn, Qm) SIMD integer saturating addition.
4488   // Instruction details available in ARM DDI 0406C.b, A8-996.
4489   emit(EncodeNeonBinOp(VQADD, dt, dst, src1, src2));
4490 }
4491 
vsub(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4492 void Assembler::vsub(QwNeonRegister dst, QwNeonRegister src1,
4493                      QwNeonRegister src2) {
4494   DCHECK(IsEnabled(NEON));
4495   // Qd = vsub(Qn, Qm) SIMD floating point subtraction.
4496   // Instruction details available in ARM DDI 0406C.b, A8-1086.
4497   emit(EncodeNeonBinOp(VSUBF, dst, src1, src2));
4498 }
4499 
vsub(NeonSize size,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4500 void Assembler::vsub(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4501                      QwNeonRegister src2) {
4502   DCHECK(IsEnabled(NEON));
4503   // Qd = vsub(Qn, Qm) SIMD integer subtraction.
4504   // Instruction details available in ARM DDI 0406C.b, A8-1084.
4505   emit(EncodeNeonBinOp(VSUB, size, dst, src1, src2));
4506 }
4507 
vqsub(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4508 void Assembler::vqsub(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4509                       QwNeonRegister src2) {
4510   DCHECK(IsEnabled(NEON));
4511   // Qd = vqsub(Qn, Qm) SIMD integer saturating subtraction.
4512   // Instruction details available in ARM DDI 0406C.b, A8-1020.
4513   emit(EncodeNeonBinOp(VQSUB, dt, dst, src1, src2));
4514 }
4515 
vmlal(NeonDataType dt,QwNeonRegister dst,DwVfpRegister src1,DwVfpRegister src2)4516 void Assembler::vmlal(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src1,
4517                       DwVfpRegister src2) {
4518   DCHECK(IsEnabled(NEON));
4519   // Qd = vmlal(Dn, Dm) Vector Multiply Accumulate Long (integer)
4520   // Instruction details available in ARM DDI 0406C.b, A8-931.
4521   int vd, d;
4522   dst.split_code(&vd, &d);
4523   int vn, n;
4524   src1.split_code(&vn, &n);
4525   int vm, m;
4526   src2.split_code(&vm, &m);
4527   int size = NeonSz(dt);
4528   int u = NeonU(dt);
4529   if (!u) UNIMPLEMENTED();
4530   DCHECK_NE(size, 3);  // SEE "Related encodings"
4531   emit(0xFU * B28 | B25 | u * B24 | B23 | d * B22 | size * B20 | vn * B16 |
4532        vd * B12 | 0x8 * B8 | n * B7 | m * B5 | vm);
4533 }
4534 
vmul(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4535 void Assembler::vmul(QwNeonRegister dst, QwNeonRegister src1,
4536                      QwNeonRegister src2) {
4537   DCHECK(IsEnabled(NEON));
4538   // Qd = vadd(Qn, Qm) SIMD floating point multiply.
4539   // Instruction details available in ARM DDI 0406C.b, A8-958.
4540   emit(EncodeNeonBinOp(VMULF, dst, src1, src2));
4541 }
4542 
vmul(NeonSize size,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4543 void Assembler::vmul(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4544                      QwNeonRegister src2) {
4545   DCHECK(IsEnabled(NEON));
4546   // Qd = vadd(Qn, Qm) SIMD integer multiply.
4547   // Instruction details available in ARM DDI 0406C.b, A8-960.
4548   emit(EncodeNeonBinOp(VMUL, size, dst, src1, src2));
4549 }
4550 
vmull(NeonDataType dt,QwNeonRegister dst,DwVfpRegister src1,DwVfpRegister src2)4551 void Assembler::vmull(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src1,
4552                       DwVfpRegister src2) {
4553   DCHECK(IsEnabled(NEON));
4554   // Qd = vmull(Dn, Dm) Vector Multiply Long (integer).
4555   // Instruction details available in ARM DDI 0406C.b, A8-960.
4556   int vd, d;
4557   dst.split_code(&vd, &d);
4558   int vn, n;
4559   src1.split_code(&vn, &n);
4560   int vm, m;
4561   src2.split_code(&vm, &m);
4562   int size = NeonSz(dt);
4563   int u = NeonU(dt);
4564   emit(0xFU * B28 | B25 | u * B24 | B23 | d * B22 | size * B20 | vn * B16 |
4565        vd * B12 | 0xC * B8 | n * B7 | m * B5 | vm);
4566 }
4567 
vmin(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4568 void Assembler::vmin(QwNeonRegister dst, QwNeonRegister src1,
4569                      QwNeonRegister src2) {
4570   DCHECK(IsEnabled(NEON));
4571   // Qd = vmin(Qn, Qm) SIMD floating point MIN.
4572   // Instruction details available in ARM DDI 0406C.b, A8-928.
4573   emit(EncodeNeonBinOp(VMINF, dst, src1, src2));
4574 }
4575 
vmin(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4576 void Assembler::vmin(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4577                      QwNeonRegister src2) {
4578   DCHECK(IsEnabled(NEON));
4579   // Qd = vmin(Qn, Qm) SIMD integer MIN.
4580   // Instruction details available in ARM DDI 0406C.b, A8-926.
4581   emit(EncodeNeonBinOp(VMIN, dt, dst, src1, src2));
4582 }
4583 
vmax(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4584 void Assembler::vmax(QwNeonRegister dst, QwNeonRegister src1,
4585                      QwNeonRegister src2) {
4586   DCHECK(IsEnabled(NEON));
4587   // Qd = vmax(Qn, Qm) SIMD floating point MAX.
4588   // Instruction details available in ARM DDI 0406C.b, A8-928.
4589   emit(EncodeNeonBinOp(VMAXF, dst, src1, src2));
4590 }
4591 
vmax(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4592 void Assembler::vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4593                      QwNeonRegister src2) {
4594   DCHECK(IsEnabled(NEON));
4595   // Qd = vmax(Qn, Qm) SIMD integer MAX.
4596   // Instruction details available in ARM DDI 0406C.b, A8-926.
4597   emit(EncodeNeonBinOp(VMAX, dt, dst, src1, src2));
4598 }
4599 
4600 enum NeonShiftOp { VSHL, VSHR, VSLI, VSRI, VSRA };
4601 
EncodeNeonShiftRegisterOp(NeonShiftOp op,NeonDataType dt,NeonRegType reg_type,int dst_code,int src_code,int shift_code)4602 static Instr EncodeNeonShiftRegisterOp(NeonShiftOp op, NeonDataType dt,
4603                                        NeonRegType reg_type, int dst_code,
4604                                        int src_code, int shift_code) {
4605   DCHECK_EQ(op, VSHL);
4606   int op_encoding = 0;
4607   int vd, d;
4608   NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
4609   int vm, m;
4610   NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
4611   int vn, n;
4612   NeonSplitCode(reg_type, shift_code, &vn, &n, &op_encoding);
4613   int size = NeonSz(dt);
4614   int u = NeonU(dt);
4615 
4616   return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 |
4617          0x4 * B8 | n * B7 | m * B5 | vm | op_encoding;
4618 }
4619 
EncodeNeonShiftOp(NeonShiftOp op,NeonSize size,bool is_unsigned,NeonRegType reg_type,int dst_code,int src_code,int shift)4620 static Instr EncodeNeonShiftOp(NeonShiftOp op, NeonSize size, bool is_unsigned,
4621                                NeonRegType reg_type, int dst_code, int src_code,
4622                                int shift) {
4623   int size_in_bits = kBitsPerByte << static_cast<int>(size);
4624   int op_encoding = 0, imm6 = 0, L = 0;
4625   switch (op) {
4626     case VSHL: {
4627       DCHECK(shift >= 0 && size_in_bits > shift);
4628       imm6 = size_in_bits + shift;
4629       op_encoding = 0x5 * B8;
4630       break;
4631     }
4632     case VSHR: {
4633       DCHECK(shift > 0 && size_in_bits >= shift);
4634       imm6 = 2 * size_in_bits - shift;
4635       if (is_unsigned) op_encoding |= B24;
4636       break;
4637     }
4638     case VSLI: {
4639       DCHECK(shift >= 0 && size_in_bits > shift);
4640       imm6 = size_in_bits + shift;
4641       op_encoding = B24 | 0x5 * B8;
4642       break;
4643     }
4644     case VSRI: {
4645       DCHECK(shift > 0 && size_in_bits >= shift);
4646       imm6 = 2 * size_in_bits - shift;
4647       op_encoding = B24 | 0x4 * B8;
4648       break;
4649     }
4650     case VSRA: {
4651       DCHECK(shift > 0 && size_in_bits >= shift);
4652       imm6 = 2 * size_in_bits - shift;
4653       op_encoding = B8;
4654       if (is_unsigned) op_encoding |= B24;
4655       break;
4656     }
4657     default:
4658       UNREACHABLE();
4659   }
4660 
4661   L = imm6 >> 6;
4662   imm6 &= 0x3F;
4663 
4664   int vd, d;
4665   NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
4666   int vm, m;
4667   NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
4668 
4669   return 0x1E5U * B23 | d * B22 | imm6 * B16 | vd * B12 | L * B7 | m * B5 | B4 |
4670          vm | op_encoding;
4671 }
4672 
vshl(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src,int shift)4673 void Assembler::vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
4674                      int shift) {
4675   DCHECK(IsEnabled(NEON));
4676   // Qd = vshl(Qm, bits) SIMD shift left immediate.
4677   // Instruction details available in ARM DDI 0406C.b, A8-1046.
4678   emit(EncodeNeonShiftOp(VSHL, NeonDataTypeToSize(dt), false, NEON_Q,
4679                          dst.code(), src.code(), shift));
4680 }
4681 
vshl(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src,QwNeonRegister shift)4682 void Assembler::vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
4683                      QwNeonRegister shift) {
4684   DCHECK(IsEnabled(NEON));
4685   // Qd = vshl(Qm, Qn) SIMD shift left Register.
4686   // Instruction details available in ARM DDI 0487A.a, F8-3340..
4687   emit(EncodeNeonShiftRegisterOp(VSHL, dt, NEON_Q, dst.code(), src.code(),
4688                                  shift.code()));
4689 }
4690 
vshr(NeonDataType dt,DwVfpRegister dst,DwVfpRegister src,int shift)4691 void Assembler::vshr(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src,
4692                      int shift) {
4693   DCHECK(IsEnabled(NEON));
4694   // Dd = vshr(Dm, bits) SIMD shift right immediate.
4695   // Instruction details available in ARM DDI 0406C.b, A8-1052.
4696   emit(EncodeNeonShiftOp(VSHR, NeonDataTypeToSize(dt), NeonU(dt), NEON_D,
4697                          dst.code(), src.code(), shift));
4698 }
4699 
vshr(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src,int shift)4700 void Assembler::vshr(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
4701                      int shift) {
4702   DCHECK(IsEnabled(NEON));
4703   // Qd = vshr(Qm, bits) SIMD shift right immediate.
4704   // Instruction details available in ARM DDI 0406C.b, A8-1052.
4705   emit(EncodeNeonShiftOp(VSHR, NeonDataTypeToSize(dt), NeonU(dt), NEON_Q,
4706                          dst.code(), src.code(), shift));
4707 }
4708 
vsli(NeonSize size,DwVfpRegister dst,DwVfpRegister src,int shift)4709 void Assembler::vsli(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
4710                      int shift) {
4711   DCHECK(IsEnabled(NEON));
4712   // Dd = vsli(Dm, bits) SIMD shift left and insert.
4713   // Instruction details available in ARM DDI 0406C.b, A8-1056.
4714   emit(EncodeNeonShiftOp(VSLI, size, false, NEON_D, dst.code(), src.code(),
4715                          shift));
4716 }
4717 
vsri(NeonSize size,DwVfpRegister dst,DwVfpRegister src,int shift)4718 void Assembler::vsri(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
4719                      int shift) {
4720   DCHECK(IsEnabled(NEON));
4721   // Dd = vsri(Dm, bits) SIMD shift right and insert.
4722   // Instruction details available in ARM DDI 0406C.b, A8-1062.
4723   emit(EncodeNeonShiftOp(VSRI, size, false, NEON_D, dst.code(), src.code(),
4724                          shift));
4725 }
4726 
vsra(NeonDataType dt,DwVfpRegister dst,DwVfpRegister src,int imm)4727 void Assembler::vsra(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src,
4728                      int imm) {
4729   DCHECK(IsEnabled(NEON));
4730   // Dd = vsra(Dm, imm) SIMD shift right and accumulate.
4731   // Instruction details available in ARM DDI 0487F.b, F6-5569.
4732   emit(EncodeNeonShiftOp(VSRA, NeonDataTypeToSize(dt), NeonU(dt), NEON_D,
4733                          dst.code(), src.code(), imm));
4734 }
4735 
vrecpe(QwNeonRegister dst,QwNeonRegister src)4736 void Assembler::vrecpe(QwNeonRegister dst, QwNeonRegister src) {
4737   DCHECK(IsEnabled(NEON));
4738   // Qd = vrecpe(Qm) SIMD reciprocal estimate.
4739   // Instruction details available in ARM DDI 0406C.b, A8-1024.
4740   emit(EncodeNeonUnaryOp(VRECPE, NEON_Q, Neon32, dst.code(), src.code()));
4741 }
4742 
vrsqrte(QwNeonRegister dst,QwNeonRegister src)4743 void Assembler::vrsqrte(QwNeonRegister dst, QwNeonRegister src) {
4744   DCHECK(IsEnabled(NEON));
4745   // Qd = vrsqrte(Qm) SIMD reciprocal square root estimate.
4746   // Instruction details available in ARM DDI 0406C.b, A8-1038.
4747   emit(EncodeNeonUnaryOp(VRSQRTE, NEON_Q, Neon32, dst.code(), src.code()));
4748 }
4749 
vrecps(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4750 void Assembler::vrecps(QwNeonRegister dst, QwNeonRegister src1,
4751                        QwNeonRegister src2) {
4752   DCHECK(IsEnabled(NEON));
4753   // Qd = vrecps(Qn, Qm) SIMD reciprocal refinement step.
4754   // Instruction details available in ARM DDI 0406C.b, A8-1026.
4755   emit(EncodeNeonBinOp(VRECPS, dst, src1, src2));
4756 }
4757 
vrsqrts(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4758 void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1,
4759                         QwNeonRegister src2) {
4760   DCHECK(IsEnabled(NEON));
4761   // Qd = vrsqrts(Qn, Qm) SIMD reciprocal square root refinement step.
4762   // Instruction details available in ARM DDI 0406C.b, A8-1040.
4763   emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2));
4764 }
4765 
4766 enum NeonPairwiseOp { VPADD, VPMIN, VPMAX };
4767 
EncodeNeonPairwiseOp(NeonPairwiseOp op,NeonDataType dt,DwVfpRegister dst,DwVfpRegister src1,DwVfpRegister src2)4768 static Instr EncodeNeonPairwiseOp(NeonPairwiseOp op, NeonDataType dt,
4769                                   DwVfpRegister dst, DwVfpRegister src1,
4770                                   DwVfpRegister src2) {
4771   int op_encoding = 0;
4772   switch (op) {
4773     case VPADD:
4774       op_encoding = 0xB * B8 | B4;
4775       break;
4776     case VPMIN:
4777       op_encoding = 0xA * B8 | B4;
4778       break;
4779     case VPMAX:
4780       op_encoding = 0xA * B8;
4781       break;
4782     default:
4783       UNREACHABLE();
4784   }
4785   int vd, d;
4786   dst.split_code(&vd, &d);
4787   int vn, n;
4788   src1.split_code(&vn, &n);
4789   int vm, m;
4790   src2.split_code(&vm, &m);
4791   int size = NeonSz(dt);
4792   int u = NeonU(dt);
4793   return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 |
4794          n * B7 | m * B5 | vm | op_encoding;
4795 }
4796 
vpadd(DwVfpRegister dst,DwVfpRegister src1,DwVfpRegister src2)4797 void Assembler::vpadd(DwVfpRegister dst, DwVfpRegister src1,
4798                       DwVfpRegister src2) {
4799   DCHECK(IsEnabled(NEON));
4800   // Dd = vpadd(Dn, Dm) SIMD floating point pairwise ADD.
4801   // Instruction details available in ARM DDI 0406C.b, A8-982.
4802   int vd, d;
4803   dst.split_code(&vd, &d);
4804   int vn, n;
4805   src1.split_code(&vn, &n);
4806   int vm, m;
4807   src2.split_code(&vm, &m);
4808 
4809   emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 |
4810        m * B5 | vm);
4811 }
4812 
vpadd(NeonSize size,DwVfpRegister dst,DwVfpRegister src1,DwVfpRegister src2)4813 void Assembler::vpadd(NeonSize size, DwVfpRegister dst, DwVfpRegister src1,
4814                       DwVfpRegister src2) {
4815   DCHECK(IsEnabled(NEON));
4816   // Dd = vpadd(Dn, Dm) SIMD integer pairwise ADD.
4817   // Instruction details available in ARM DDI 0406C.b, A8-980.
4818   emit(EncodeNeonPairwiseOp(VPADD, NeonSizeToDataType(size), dst, src1, src2));
4819 }
4820 
vpmin(NeonDataType dt,DwVfpRegister dst,DwVfpRegister src1,DwVfpRegister src2)4821 void Assembler::vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
4822                       DwVfpRegister src2) {
4823   DCHECK(IsEnabled(NEON));
4824   // Dd = vpmin(Dn, Dm) SIMD integer pairwise MIN.
4825   // Instruction details available in ARM DDI 0406C.b, A8-986.
4826   emit(EncodeNeonPairwiseOp(VPMIN, dt, dst, src1, src2));
4827 }
4828 
vpmax(NeonDataType dt,DwVfpRegister dst,DwVfpRegister src1,DwVfpRegister src2)4829 void Assembler::vpmax(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
4830                       DwVfpRegister src2) {
4831   DCHECK(IsEnabled(NEON));
4832   // Dd = vpmax(Dn, Dm) SIMD integer pairwise MAX.
4833   // Instruction details available in ARM DDI 0406C.b, A8-986.
4834   emit(EncodeNeonPairwiseOp(VPMAX, dt, dst, src1, src2));
4835 }
4836 
vrintm(NeonDataType dt,const QwNeonRegister dst,const QwNeonRegister src)4837 void Assembler::vrintm(NeonDataType dt, const QwNeonRegister dst,
4838                        const QwNeonRegister src) {
4839   // SIMD vector round floating-point to integer towards -Infinity.
4840   // See ARM DDI 0487F.b, F6-5493.
4841   DCHECK(IsEnabled(ARMv8));
4842   emit(EncodeNeonUnaryOp(VRINTM, NEON_Q, NeonSize(dt), dst.code(), src.code()));
4843 }
4844 
vrintn(NeonDataType dt,const QwNeonRegister dst,const QwNeonRegister src)4845 void Assembler::vrintn(NeonDataType dt, const QwNeonRegister dst,
4846                        const QwNeonRegister src) {
4847   // SIMD vector round floating-point to integer to Nearest.
4848   // See ARM DDI 0487F.b, F6-5497.
4849   DCHECK(IsEnabled(ARMv8));
4850   emit(EncodeNeonUnaryOp(VRINTN, NEON_Q, NeonSize(dt), dst.code(), src.code()));
4851 }
4852 
vrintp(NeonDataType dt,const QwNeonRegister dst,const QwNeonRegister src)4853 void Assembler::vrintp(NeonDataType dt, const QwNeonRegister dst,
4854                        const QwNeonRegister src) {
4855   // SIMD vector round floating-point to integer towards +Infinity.
4856   // See ARM DDI 0487F.b, F6-5501.
4857   DCHECK(IsEnabled(ARMv8));
4858   emit(EncodeNeonUnaryOp(VRINTP, NEON_Q, NeonSize(dt), dst.code(), src.code()));
4859 }
4860 
vrintz(NeonDataType dt,const QwNeonRegister dst,const QwNeonRegister src)4861 void Assembler::vrintz(NeonDataType dt, const QwNeonRegister dst,
4862                        const QwNeonRegister src) {
4863   // SIMD vector round floating-point to integer towards Zero.
4864   // See ARM DDI 0487F.b, F6-5511.
4865   DCHECK(IsEnabled(ARMv8));
4866   emit(EncodeNeonUnaryOp(VRINTZ, NEON_Q, NeonSize(dt), dst.code(), src.code()));
4867 }
4868 
vtst(NeonSize size,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4869 void Assembler::vtst(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4870                      QwNeonRegister src2) {
4871   DCHECK(IsEnabled(NEON));
4872   // Qd = vtst(Qn, Qm) SIMD test integer operands.
4873   // Instruction details available in ARM DDI 0406C.b, A8-1098.
4874   emit(EncodeNeonBinOp(VTST, size, dst, src1, src2));
4875 }
4876 
vceq(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4877 void Assembler::vceq(QwNeonRegister dst, QwNeonRegister src1,
4878                      QwNeonRegister src2) {
4879   DCHECK(IsEnabled(NEON));
4880   // Qd = vceq(Qn, Qm) SIMD floating point compare equal.
4881   // Instruction details available in ARM DDI 0406C.b, A8-844.
4882   emit(EncodeNeonBinOp(VCEQF, dst, src1, src2));
4883 }
4884 
vceq(NeonSize size,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4885 void Assembler::vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4886                      QwNeonRegister src2) {
4887   DCHECK(IsEnabled(NEON));
4888   // Qd = vceq(Qn, Qm) SIMD integer compare equal.
4889   // Instruction details available in ARM DDI 0406C.b, A8-844.
4890   emit(EncodeNeonBinOp(VCEQ, size, dst, src1, src2));
4891 }
4892 
vceq(NeonSize size,QwNeonRegister dst,QwNeonRegister src1,int value)4893 void Assembler::vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4894                      int value) {
4895   DCHECK(IsEnabled(NEON));
4896   DCHECK_EQ(0, value);
4897   // Qd = vceq(Qn, Qm, #0) Vector Compare Equal to Zero.
4898   // Instruction details available in ARM DDI 0406C.d, A8-847.
4899   emit(EncodeNeonUnaryOp(VCEQ0, NEON_Q, size, dst.code(), src1.code()));
4900 }
4901 
vcge(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4902 void Assembler::vcge(QwNeonRegister dst, QwNeonRegister src1,
4903                      QwNeonRegister src2) {
4904   DCHECK(IsEnabled(NEON));
4905   // Qd = vcge(Qn, Qm) SIMD floating point compare greater or equal.
4906   // Instruction details available in ARM DDI 0406C.b, A8-848.
4907   emit(EncodeNeonBinOp(VCGEF, dst, src1, src2));
4908 }
4909 
vcge(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4910 void Assembler::vcge(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4911                      QwNeonRegister src2) {
4912   DCHECK(IsEnabled(NEON));
4913   // Qd = vcge(Qn, Qm) SIMD integer compare greater or equal.
4914   // Instruction details available in ARM DDI 0406C.b, A8-848.
4915   emit(EncodeNeonBinOp(VCGE, dt, dst, src1, src2));
4916 }
4917 
vcgt(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4918 void Assembler::vcgt(QwNeonRegister dst, QwNeonRegister src1,
4919                      QwNeonRegister src2) {
4920   DCHECK(IsEnabled(NEON));
4921   // Qd = vcgt(Qn, Qm) SIMD floating point compare greater than.
4922   // Instruction details available in ARM DDI 0406C.b, A8-852.
4923   emit(EncodeNeonBinOp(VCGTF, dst, src1, src2));
4924 }
4925 
vcgt(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4926 void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4927                      QwNeonRegister src2) {
4928   DCHECK(IsEnabled(NEON));
4929   // Qd = vcgt(Qn, Qm) SIMD integer compare greater than.
4930   // Instruction details available in ARM DDI 0406C.b, A8-852.
4931   emit(EncodeNeonBinOp(VCGT, dt, dst, src1, src2));
4932 }
4933 
vclt(NeonSize size,QwNeonRegister dst,QwNeonRegister src,int value)4934 void Assembler::vclt(NeonSize size, QwNeonRegister dst, QwNeonRegister src,
4935                      int value) {
4936   DCHECK(IsEnabled(NEON));
4937   DCHECK_EQ(0, value);
4938   // vclt.<size>(Qn, Qm, #0) SIMD Vector Compare Less Than Zero.
4939   // Instruction details available in ARM DDI 0487F.b, F6-5072.
4940   emit(EncodeNeonUnaryOp(VCLT0, NEON_Q, size, dst.code(), src.code()));
4941 }
4942 
vrhadd(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4943 void Assembler::vrhadd(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4944                        QwNeonRegister src2) {
4945   DCHECK(IsEnabled(NEON));
4946   // Qd = vrhadd(Qn, Qm) SIMD integer rounding halving add.
4947   // Instruction details available in ARM DDI 0406C.b, A8-1030.
4948   emit(EncodeNeonBinOp(VRHADD, dt, dst, src1, src2));
4949 }
4950 
vext(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2,int bytes)4951 void Assembler::vext(QwNeonRegister dst, QwNeonRegister src1,
4952                      QwNeonRegister src2, int bytes) {
4953   DCHECK(IsEnabled(NEON));
4954   // Qd = vext(Qn, Qm) SIMD byte extract.
4955   // Instruction details available in ARM DDI 0406C.b, A8-890.
4956   int vd, d;
4957   dst.split_code(&vd, &d);
4958   int vn, n;
4959   src1.split_code(&vn, &n);
4960   int vm, m;
4961   src2.split_code(&vm, &m);
4962   DCHECK_GT(16, bytes);
4963   emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 |
4964        n * B7 | B6 | m * B5 | vm);
4965 }
4966 
vzip(NeonSize size,DwVfpRegister src1,DwVfpRegister src2)4967 void Assembler::vzip(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
4968   if (size == Neon32) {  // vzip.32 Dd, Dm is a pseudo-op for vtrn.32 Dd, Dm.
4969     vtrn(size, src1, src2);
4970   } else {
4971     DCHECK(IsEnabled(NEON));
4972     // vzip.<size>(Dn, Dm) SIMD zip (interleave).
4973     // Instruction details available in ARM DDI 0406C.b, A8-1102.
4974     emit(EncodeNeonUnaryOp(VZIP, NEON_D, size, src1.code(), src2.code()));
4975   }
4976 }
4977 
vzip(NeonSize size,QwNeonRegister src1,QwNeonRegister src2)4978 void Assembler::vzip(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
4979   DCHECK(IsEnabled(NEON));
4980   // vzip.<size>(Qn, Qm) SIMD zip (interleave).
4981   // Instruction details available in ARM DDI 0406C.b, A8-1102.
4982   emit(EncodeNeonUnaryOp(VZIP, NEON_Q, size, src1.code(), src2.code()));
4983 }
4984 
vuzp(NeonSize size,DwVfpRegister src1,DwVfpRegister src2)4985 void Assembler::vuzp(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
4986   if (size == Neon32) {  // vuzp.32 Dd, Dm is a pseudo-op for vtrn.32 Dd, Dm.
4987     vtrn(size, src1, src2);
4988   } else {
4989     DCHECK(IsEnabled(NEON));
4990     // vuzp.<size>(Dn, Dm) SIMD un-zip (de-interleave).
4991     // Instruction details available in ARM DDI 0406C.b, A8-1100.
4992     emit(EncodeNeonUnaryOp(VUZP, NEON_D, size, src1.code(), src2.code()));
4993   }
4994 }
4995 
vuzp(NeonSize size,QwNeonRegister src1,QwNeonRegister src2)4996 void Assembler::vuzp(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
4997   DCHECK(IsEnabled(NEON));
4998   // vuzp.<size>(Qn, Qm) SIMD un-zip (de-interleave).
4999   // Instruction details available in ARM DDI 0406C.b, A8-1100.
5000   emit(EncodeNeonUnaryOp(VUZP, NEON_Q, size, src1.code(), src2.code()));
5001 }
5002 
vrev16(NeonSize size,QwNeonRegister dst,QwNeonRegister src)5003 void Assembler::vrev16(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
5004   DCHECK(IsEnabled(NEON));
5005   // Qd = vrev16.<size>(Qm) SIMD element reverse.
5006   // Instruction details available in ARM DDI 0406C.b, A8-1028.
5007   emit(EncodeNeonUnaryOp(VREV16, NEON_Q, size, dst.code(), src.code()));
5008 }
5009 
vrev32(NeonSize size,QwNeonRegister dst,QwNeonRegister src)5010 void Assembler::vrev32(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
5011   DCHECK(IsEnabled(NEON));
5012   // Qd = vrev32.<size>(Qm) SIMD element reverse.
5013   // Instruction details available in ARM DDI 0406C.b, A8-1028.
5014   emit(EncodeNeonUnaryOp(VREV32, NEON_Q, size, dst.code(), src.code()));
5015 }
5016 
vrev64(NeonSize size,QwNeonRegister dst,QwNeonRegister src)5017 void Assembler::vrev64(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
5018   DCHECK(IsEnabled(NEON));
5019   // Qd = vrev64.<size>(Qm) SIMD element reverse.
5020   // Instruction details available in ARM DDI 0406C.b, A8-1028.
5021   emit(EncodeNeonUnaryOp(VREV64, NEON_Q, size, dst.code(), src.code()));
5022 }
5023 
vtrn(NeonSize size,DwVfpRegister src1,DwVfpRegister src2)5024 void Assembler::vtrn(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
5025   DCHECK(IsEnabled(NEON));
5026   // vtrn.<size>(Dn, Dm) SIMD element transpose.
5027   // Instruction details available in ARM DDI 0406C.b, A8-1096.
5028   emit(EncodeNeonUnaryOp(VTRN, NEON_D, size, src1.code(), src2.code()));
5029 }
5030 
vtrn(NeonSize size,QwNeonRegister src1,QwNeonRegister src2)5031 void Assembler::vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
5032   DCHECK(IsEnabled(NEON));
5033   // vtrn.<size>(Qn, Qm) SIMD element transpose.
5034   // Instruction details available in ARM DDI 0406C.b, A8-1096.
5035   emit(EncodeNeonUnaryOp(VTRN, NEON_Q, size, src1.code(), src2.code()));
5036 }
5037 
vpadal(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src)5038 void Assembler::vpadal(NeonDataType dt, QwNeonRegister dst,
5039                        QwNeonRegister src) {
5040   DCHECK(IsEnabled(NEON));
5041   // vpadal.<dt>(Qd, Qm) SIMD Vector Pairwise Add and Accumulate Long
5042   emit(EncodeNeonUnaryOp(NeonU(dt) ? VPADAL_U : VPADAL_S, NEON_Q,
5043                          NeonDataTypeToSize(dt), dst.code(), src.code()));
5044 }
5045 
vpaddl(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src)5046 void Assembler::vpaddl(NeonDataType dt, QwNeonRegister dst,
5047                        QwNeonRegister src) {
5048   DCHECK(IsEnabled(NEON));
5049   // vpaddl.<dt>(Qd, Qm) SIMD Vector Pairwise Add Long.
5050   emit(EncodeNeonUnaryOp(NeonU(dt) ? VPADDL_U : VPADDL_S, NEON_Q,
5051                          NeonDataTypeToSize(dt), dst.code(), src.code()));
5052 }
5053 
vqrdmulh(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)5054 void Assembler::vqrdmulh(NeonDataType dt, QwNeonRegister dst,
5055                          QwNeonRegister src1, QwNeonRegister src2) {
5056   DCHECK(IsEnabled(NEON));
5057   DCHECK(dt == NeonS16 || dt == NeonS32);
5058   emit(EncodeNeonBinOp(VQRDMULH, dt, dst, src1, src2));
5059 }
5060 
vcnt(QwNeonRegister dst,QwNeonRegister src)5061 void Assembler::vcnt(QwNeonRegister dst, QwNeonRegister src) {
5062   // Qd = vcnt(Qm) SIMD Vector Count Set Bits.
5063   // Instruction details available at ARM DDI 0487F.b, F6-5094.
5064   DCHECK(IsEnabled(NEON));
5065   emit(EncodeNeonUnaryOp(VCNT, NEON_Q, Neon8, dst.code(), src.code()));
5066 }
5067 
5068 // Encode NEON vtbl / vtbx instruction.
EncodeNeonVTB(DwVfpRegister dst,const NeonListOperand & list,DwVfpRegister index,bool vtbx)5069 static Instr EncodeNeonVTB(DwVfpRegister dst, const NeonListOperand& list,
5070                            DwVfpRegister index, bool vtbx) {
5071   // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices.
5072   // Instruction details available in ARM DDI 0406C.b, A8-1094.
5073   // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices.
5074   // Instruction details available in ARM DDI 0406C.b, A8-1094.
5075   int vd, d;
5076   dst.split_code(&vd, &d);
5077   int vn, n;
5078   list.base().split_code(&vn, &n);
5079   int vm, m;
5080   index.split_code(&vm, &m);
5081   int op = vtbx ? 1 : 0;  // vtbl = 0, vtbx = 1.
5082   return 0x1E7U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 |
5083          list.length() * B8 | n * B7 | op * B6 | m * B5 | vm;
5084 }
5085 
vtbl(DwVfpRegister dst,const NeonListOperand & list,DwVfpRegister index)5086 void Assembler::vtbl(DwVfpRegister dst, const NeonListOperand& list,
5087                      DwVfpRegister index) {
5088   DCHECK(IsEnabled(NEON));
5089   emit(EncodeNeonVTB(dst, list, index, false));
5090 }
5091 
vtbx(DwVfpRegister dst,const NeonListOperand & list,DwVfpRegister index)5092 void Assembler::vtbx(DwVfpRegister dst, const NeonListOperand& list,
5093                      DwVfpRegister index) {
5094   DCHECK(IsEnabled(NEON));
5095   emit(EncodeNeonVTB(dst, list, index, true));
5096 }
5097 
5098 // Pseudo instructions.
nop(int type)5099 void Assembler::nop(int type) {
5100   // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes
5101   // some of the CPU's pipeline and has to issue. Older ARM chips simply used
5102   // MOV Rx, Rx as NOP and it performs better even in newer CPUs.
5103   // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode
5104   // a type.
5105   DCHECK(0 <= type && type <= 14);  // mov pc, pc isn't a nop.
5106   emit(al | 13 * B21 | type * B12 | type);
5107 }
5108 
pop()5109 void Assembler::pop() { add(sp, sp, Operand(kPointerSize)); }
5110 
IsMovT(Instr instr)5111 bool Assembler::IsMovT(Instr instr) {
5112   instr &= ~(((kNumberOfConditions - 1) << 28) |  // Mask off conditions
5113              ((kNumRegisters - 1) * B12) |        // mask out register
5114              EncodeMovwImmediate(0xFFFF));        // mask out immediate value
5115   return instr == kMovtPattern;
5116 }
5117 
IsMovW(Instr instr)5118 bool Assembler::IsMovW(Instr instr) {
5119   instr &= ~(((kNumberOfConditions - 1) << 28) |  // Mask off conditions
5120              ((kNumRegisters - 1) * B12) |        // mask out destination
5121              EncodeMovwImmediate(0xFFFF));        // mask out immediate value
5122   return instr == kMovwPattern;
5123 }
5124 
GetMovTPattern()5125 Instr Assembler::GetMovTPattern() { return kMovtPattern; }
5126 
GetMovWPattern()5127 Instr Assembler::GetMovWPattern() { return kMovwPattern; }
5128 
EncodeMovwImmediate(uint32_t immediate)5129 Instr Assembler::EncodeMovwImmediate(uint32_t immediate) {
5130   DCHECK_LT(immediate, 0x10000);
5131   return ((immediate & 0xF000) << 4) | (immediate & 0xFFF);
5132 }
5133 
PatchMovwImmediate(Instr instruction,uint32_t immediate)5134 Instr Assembler::PatchMovwImmediate(Instr instruction, uint32_t immediate) {
5135   instruction &= ~EncodeMovwImmediate(0xFFFF);
5136   return instruction | EncodeMovwImmediate(immediate);
5137 }
5138 
DecodeShiftImm(Instr instr)5139 int Assembler::DecodeShiftImm(Instr instr) {
5140   int rotate = Instruction::RotateValue(instr) * 2;
5141   int immed8 = Instruction::Immed8Value(instr);
5142   return base::bits::RotateRight32(immed8, rotate);
5143 }
5144 
PatchShiftImm(Instr instr,int immed)5145 Instr Assembler::PatchShiftImm(Instr instr, int immed) {
5146   uint32_t rotate_imm = 0;
5147   uint32_t immed_8 = 0;
5148   bool immed_fits = FitsShifter(immed, &rotate_imm, &immed_8, nullptr);
5149   DCHECK(immed_fits);
5150   USE(immed_fits);
5151   return (instr & ~kOff12Mask) | (rotate_imm << 8) | immed_8;
5152 }
5153 
IsNop(Instr instr,int type)5154 bool Assembler::IsNop(Instr instr, int type) {
5155   DCHECK(0 <= type && type <= 14);  // mov pc, pc isn't a nop.
5156   // Check for mov rx, rx where x = type.
5157   return instr == (al | 13 * B21 | type * B12 | type);
5158 }
5159 
IsMovImmed(Instr instr)5160 bool Assembler::IsMovImmed(Instr instr) {
5161   return (instr & kMovImmedMask) == kMovImmedPattern;
5162 }
5163 
IsOrrImmed(Instr instr)5164 bool Assembler::IsOrrImmed(Instr instr) {
5165   return (instr & kOrrImmedMask) == kOrrImmedPattern;
5166 }
5167 
5168 // static
ImmediateFitsAddrMode1Instruction(int32_t imm32)5169 bool Assembler::ImmediateFitsAddrMode1Instruction(int32_t imm32) {
5170   uint32_t dummy1;
5171   uint32_t dummy2;
5172   return FitsShifter(imm32, &dummy1, &dummy2, nullptr);
5173 }
5174 
ImmediateFitsAddrMode2Instruction(int32_t imm32)5175 bool Assembler::ImmediateFitsAddrMode2Instruction(int32_t imm32) {
5176   return is_uint12(abs(imm32));
5177 }
5178 
5179 // Debugging.
RecordConstPool(int size)5180 void Assembler::RecordConstPool(int size) {
5181   // We only need this for debugger support, to correctly compute offsets in the
5182   // code.
5183   RecordRelocInfo(RelocInfo::CONST_POOL, static_cast<intptr_t>(size));
5184 }
5185 
GrowBuffer()5186 void Assembler::GrowBuffer() {
5187   DCHECK_EQ(buffer_start_, buffer_->start());
5188 
5189   // Compute new buffer size.
5190   int old_size = buffer_->size();
5191   int new_size = std::min(2 * old_size, old_size + 1 * MB);
5192 
5193   // Some internal data structures overflow for very large buffers,
5194   // they must ensure that kMaximalBufferSize is not too large.
5195   if (new_size > kMaximalBufferSize) {
5196     V8::FatalProcessOutOfMemory(nullptr, "Assembler::GrowBuffer");
5197   }
5198 
5199   // Set up new buffer.
5200   std::unique_ptr<AssemblerBuffer> new_buffer = buffer_->Grow(new_size);
5201   DCHECK_EQ(new_size, new_buffer->size());
5202   byte* new_start = new_buffer->start();
5203 
5204   // Copy the data.
5205   int pc_delta = new_start - buffer_start_;
5206   int rc_delta = (new_start + new_size) - (buffer_start_ + old_size);
5207   size_t reloc_size = (buffer_start_ + old_size) - reloc_info_writer.pos();
5208   MemMove(new_start, buffer_start_, pc_offset());
5209   byte* new_reloc_start = reinterpret_cast<byte*>(
5210       reinterpret_cast<Address>(reloc_info_writer.pos()) + rc_delta);
5211   MemMove(new_reloc_start, reloc_info_writer.pos(), reloc_size);
5212 
5213   // Switch buffers.
5214   buffer_ = std::move(new_buffer);
5215   buffer_start_ = new_start;
5216   pc_ = reinterpret_cast<byte*>(reinterpret_cast<Address>(pc_) + pc_delta);
5217   byte* new_last_pc = reinterpret_cast<byte*>(
5218       reinterpret_cast<Address>(reloc_info_writer.last_pc()) + pc_delta);
5219   reloc_info_writer.Reposition(new_reloc_start, new_last_pc);
5220 
5221   // None of our relocation types are pc relative pointing outside the code
5222   // buffer nor pc absolute pointing inside the code buffer, so there is no need
5223   // to relocate any emitted relocation entries.
5224 }
5225 
db(uint8_t data)5226 void Assembler::db(uint8_t data) {
5227   // db is used to write raw data. The constant pool should be emitted or
5228   // blocked before using db.
5229   DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty());
5230   CheckBuffer();
5231   *reinterpret_cast<uint8_t*>(pc_) = data;
5232   pc_ += sizeof(uint8_t);
5233 }
5234 
dd(uint32_t data,RelocInfo::Mode rmode)5235 void Assembler::dd(uint32_t data, RelocInfo::Mode rmode) {
5236   // dd is used to write raw data. The constant pool should be emitted or
5237   // blocked before using dd.
5238   DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty());
5239   CheckBuffer();
5240   if (!RelocInfo::IsNoInfo(rmode)) {
5241     DCHECK(RelocInfo::IsDataEmbeddedObject(rmode) ||
5242            RelocInfo::IsLiteralConstant(rmode));
5243     RecordRelocInfo(rmode);
5244   }
5245   base::WriteUnalignedValue(reinterpret_cast<Address>(pc_), data);
5246   pc_ += sizeof(uint32_t);
5247 }
5248 
dq(uint64_t value,RelocInfo::Mode rmode)5249 void Assembler::dq(uint64_t value, RelocInfo::Mode rmode) {
5250   // dq is used to write raw data. The constant pool should be emitted or
5251   // blocked before using dq.
5252   DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty());
5253   CheckBuffer();
5254   if (!RelocInfo::IsNoInfo(rmode)) {
5255     DCHECK(RelocInfo::IsDataEmbeddedObject(rmode) ||
5256            RelocInfo::IsLiteralConstant(rmode));
5257     RecordRelocInfo(rmode);
5258   }
5259   base::WriteUnalignedValue(reinterpret_cast<Address>(pc_), value);
5260   pc_ += sizeof(uint64_t);
5261 }
5262 
RecordRelocInfo(RelocInfo::Mode rmode,intptr_t data)5263 void Assembler::RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data) {
5264   if (!ShouldRecordRelocInfo(rmode)) return;
5265   DCHECK_GE(buffer_space(), kMaxRelocSize);  // too late to grow buffer here
5266   RelocInfo rinfo(reinterpret_cast<Address>(pc_), rmode, data, Code());
5267   reloc_info_writer.Write(&rinfo);
5268 }
5269 
ConstantPoolAddEntry(int position,RelocInfo::Mode rmode,intptr_t value)5270 void Assembler::ConstantPoolAddEntry(int position, RelocInfo::Mode rmode,
5271                                      intptr_t value) {
5272   DCHECK(rmode != RelocInfo::CONST_POOL);
5273   // We can share CODE_TARGETs and embedded objects, but we must make sure we
5274   // only emit one reloc info for them (thus delta patching will apply the delta
5275   // only once). At the moment, we do not deduplicate heap object request which
5276   // are indicated by value == 0.
5277   bool sharing_ok = RelocInfo::IsShareableRelocMode(rmode) ||
5278                     (rmode == RelocInfo::CODE_TARGET && value != 0) ||
5279                     (RelocInfo::IsEmbeddedObjectMode(rmode) && value != 0);
5280   DCHECK_LT(pending_32_bit_constants_.size(), kMaxNumPending32Constants);
5281   if (first_const_pool_32_use_ < 0) {
5282     DCHECK(pending_32_bit_constants_.empty());
5283     DCHECK_EQ(constant_pool_deadline_, kMaxInt);
5284     first_const_pool_32_use_ = position;
5285     constant_pool_deadline_ = position + kCheckPoolDeadline;
5286   } else {
5287     DCHECK(!pending_32_bit_constants_.empty());
5288   }
5289   ConstantPoolEntry entry(position, value, sharing_ok, rmode);
5290 
5291   bool shared = false;
5292   if (sharing_ok) {
5293     // Merge the constant, if possible.
5294     for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) {
5295       ConstantPoolEntry& current_entry = pending_32_bit_constants_[i];
5296       if (!current_entry.sharing_ok()) continue;
5297       if (entry.value() == current_entry.value() &&
5298           entry.rmode() == current_entry.rmode()) {
5299         entry.set_merged_index(i);
5300         shared = true;
5301         break;
5302       }
5303     }
5304   }
5305 
5306   pending_32_bit_constants_.emplace_back(entry);
5307 
5308   // Make sure the constant pool is not emitted in place of the next
5309   // instruction for which we just recorded relocation info.
5310   BlockConstPoolFor(1);
5311 
5312   // Emit relocation info.
5313   if (MustOutputRelocInfo(rmode, this) && !shared) {
5314     RecordRelocInfo(rmode);
5315   }
5316 }
5317 
BlockConstPoolFor(int instructions)5318 void Assembler::BlockConstPoolFor(int instructions) {
5319   int pc_limit = pc_offset() + instructions * kInstrSize;
5320   if (no_const_pool_before_ < pc_limit) {
5321     no_const_pool_before_ = pc_limit;
5322   }
5323 
5324   // If we're due a const pool check before the block finishes, move it to just
5325   // after the block.
5326   if (constant_pool_deadline_ < no_const_pool_before_) {
5327     // Make sure that the new deadline isn't too late (including a jump and the
5328     // constant pool marker).
5329     DCHECK_LE(no_const_pool_before_,
5330               first_const_pool_32_use_ + kMaxDistToIntPool);
5331     constant_pool_deadline_ = no_const_pool_before_;
5332   }
5333 }
5334 
CheckConstPool(bool force_emit,bool require_jump)5335 void Assembler::CheckConstPool(bool force_emit, bool require_jump) {
5336   // Some short sequence of instruction mustn't be broken up by constant pool
5337   // emission, such sequences are protected by calls to BlockConstPoolFor and
5338   // BlockConstPoolScope.
5339   if (is_const_pool_blocked()) {
5340     // Something is wrong if emission is forced and blocked at the same time.
5341     DCHECK(!force_emit);
5342     return;
5343   }
5344 
5345   // There is nothing to do if there are no pending constant pool entries.
5346   if (pending_32_bit_constants_.empty()) {
5347     // We should only fall into this case if we're either trying to forcing
5348     // emission or opportunistically checking after a jump.
5349     DCHECK(force_emit || !require_jump);
5350     return;
5351   }
5352 
5353   // We emit a constant pool when:
5354   //  * requested to do so by parameter force_emit (e.g. after each function).
5355   //  * the distance from the first instruction accessing the constant pool to
5356   //    the first constant pool entry will exceed its limit the next time the
5357   //    pool is checked.
5358   //  * the instruction doesn't require a jump after itself to jump over the
5359   //    constant pool, and we're getting close to running out of range.
5360   if (!force_emit) {
5361     DCHECK_NE(first_const_pool_32_use_, -1);
5362     int dist32 = pc_offset() - first_const_pool_32_use_;
5363     if (require_jump) {
5364       // We should only be on this path if we've exceeded our deadline.
5365       DCHECK_GE(dist32, kCheckPoolDeadline);
5366     } else if (dist32 < kCheckPoolDeadline / 2) {
5367       return;
5368     }
5369   }
5370 
5371   int size_after_marker = pending_32_bit_constants_.size() * kPointerSize;
5372 
5373   // Deduplicate constants.
5374   for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) {
5375     ConstantPoolEntry& entry = pending_32_bit_constants_[i];
5376     if (entry.is_merged()) size_after_marker -= kPointerSize;
5377   }
5378 
5379   // Check that the code buffer is large enough before emitting the constant
5380   // pool (include the jump over the pool and the constant pool marker and
5381   // the gap to the relocation information).
5382   int jump_instr = require_jump ? kInstrSize : 0;
5383   int size_up_to_marker = jump_instr + kInstrSize;
5384   int size = size_up_to_marker + size_after_marker;
5385   int needed_space = size + kGap;
5386   while (buffer_space() <= needed_space) GrowBuffer();
5387 
5388   {
5389     ASM_CODE_COMMENT_STRING(this, "Constant Pool");
5390     // Block recursive calls to CheckConstPool.
5391     BlockConstPoolScope block_const_pool(this);
5392     RecordConstPool(size);
5393 
5394     Label size_check;
5395     bind(&size_check);
5396 
5397     // Emit jump over constant pool if necessary.
5398     Label after_pool;
5399     if (require_jump) {
5400       b(&after_pool);
5401     }
5402 
5403     // Put down constant pool marker "Undefined instruction".
5404     // The data size helps disassembly know what to print.
5405     emit(kConstantPoolMarker |
5406          EncodeConstantPoolLength(size_after_marker / kPointerSize));
5407 
5408     // The first entry in the constant pool should also be the first
5409     CHECK_EQ(first_const_pool_32_use_, pending_32_bit_constants_[0].position());
5410     CHECK(!pending_32_bit_constants_[0].is_merged());
5411 
5412     // Make sure we're not emitting the constant too late.
5413     CHECK_LE(pc_offset(),
5414              first_const_pool_32_use_ + kMaxDistToPcRelativeConstant);
5415 
5416     // Check that the code buffer is large enough before emitting the constant
5417     // pool (this includes the gap to the relocation information).
5418     int needed_space = pending_32_bit_constants_.size() * kPointerSize + kGap;
5419     while (buffer_space() <= needed_space) {
5420       GrowBuffer();
5421     }
5422 
5423     // Emit 32-bit constant pool entries.
5424     for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) {
5425       ConstantPoolEntry& entry = pending_32_bit_constants_[i];
5426       Instr instr = instr_at(entry.position());
5427 
5428       // 64-bit loads shouldn't get here.
5429       DCHECK(!IsVldrDPcImmediateOffset(instr));
5430       DCHECK(!IsMovW(instr));
5431       DCHECK(IsLdrPcImmediateOffset(instr) &&
5432              GetLdrRegisterImmediateOffset(instr) == 0);
5433 
5434       int delta = pc_offset() - entry.position() - Instruction::kPcLoadDelta;
5435       DCHECK(is_uint12(delta));
5436       // 0 is the smallest delta:
5437       //   ldr rd, [pc, #0]
5438       //   constant pool marker
5439       //   data
5440 
5441       if (entry.is_merged()) {
5442         DCHECK(entry.sharing_ok());
5443         ConstantPoolEntry& merged =
5444             pending_32_bit_constants_[entry.merged_index()];
5445         DCHECK(entry.value() == merged.value());
5446         DCHECK_LT(merged.position(), entry.position());
5447         Instr merged_instr = instr_at(merged.position());
5448         DCHECK(IsLdrPcImmediateOffset(merged_instr));
5449         delta = GetLdrRegisterImmediateOffset(merged_instr);
5450         delta += merged.position() - entry.position();
5451       }
5452       instr_at_put(entry.position(),
5453                    SetLdrRegisterImmediateOffset(instr, delta));
5454       if (!entry.is_merged()) {
5455         emit(entry.value());
5456       }
5457     }
5458 
5459     pending_32_bit_constants_.clear();
5460 
5461     first_const_pool_32_use_ = -1;
5462 
5463     DCHECK_EQ(size, SizeOfCodeGeneratedSince(&size_check));
5464 
5465     if (after_pool.is_linked()) {
5466       bind(&after_pool);
5467     }
5468   }
5469 
5470   // Since a constant pool was just emitted, we don't need another check until
5471   // the next constant pool entry is added.
5472   constant_pool_deadline_ = kMaxInt;
5473 }
5474 
PatchingAssembler(const AssemblerOptions & options,byte * address,int instructions)5475 PatchingAssembler::PatchingAssembler(const AssemblerOptions& options,
5476                                      byte* address, int instructions)
5477     : Assembler(options, ExternalAssemblerBuffer(
5478                              address, instructions * kInstrSize + kGap)) {
5479   DCHECK_EQ(reloc_info_writer.pos(), buffer_start_ + buffer_->size());
5480 }
5481 
~PatchingAssembler()5482 PatchingAssembler::~PatchingAssembler() {
5483   // Check that we don't have any pending constant pools.
5484   DCHECK(pending_32_bit_constants_.empty());
5485 
5486   // Check that the code was patched as expected.
5487   DCHECK_EQ(pc_, buffer_start_ + buffer_->size() - kGap);
5488   DCHECK_EQ(reloc_info_writer.pos(), buffer_start_ + buffer_->size());
5489 }
5490 
Emit(Address addr)5491 void PatchingAssembler::Emit(Address addr) { emit(static_cast<Instr>(addr)); }
5492 
PadWithNops()5493 void PatchingAssembler::PadWithNops() {
5494   DCHECK_LE(pc_, buffer_start_ + buffer_->size() - kGap);
5495   while (pc_ < buffer_start_ + buffer_->size() - kGap) {
5496     nop();
5497   }
5498 }
5499 
UseScratchRegisterScope(Assembler * assembler)5500 UseScratchRegisterScope::UseScratchRegisterScope(Assembler* assembler)
5501     : assembler_(assembler),
5502       old_available_(*assembler->GetScratchRegisterList()),
5503       old_available_vfp_(*assembler->GetScratchVfpRegisterList()) {}
5504 
~UseScratchRegisterScope()5505 UseScratchRegisterScope::~UseScratchRegisterScope() {
5506   *assembler_->GetScratchRegisterList() = old_available_;
5507   *assembler_->GetScratchVfpRegisterList() = old_available_vfp_;
5508 }
5509 
Acquire()5510 Register UseScratchRegisterScope::Acquire() {
5511   RegList* available = assembler_->GetScratchRegisterList();
5512   DCHECK_NOT_NULL(available);
5513   return available->PopFirst();
5514 }
5515 
LoadStoreLaneParams(MachineRepresentation rep,uint8_t laneidx)5516 LoadStoreLaneParams::LoadStoreLaneParams(MachineRepresentation rep,
5517                                          uint8_t laneidx) {
5518   if (rep == MachineRepresentation::kWord8) {
5519     *this = LoadStoreLaneParams(laneidx, Neon8, 8);
5520   } else if (rep == MachineRepresentation::kWord16) {
5521     *this = LoadStoreLaneParams(laneidx, Neon16, 4);
5522   } else if (rep == MachineRepresentation::kWord32) {
5523     *this = LoadStoreLaneParams(laneidx, Neon32, 2);
5524   } else if (rep == MachineRepresentation::kWord64) {
5525     *this = LoadStoreLaneParams(laneidx, Neon64, 1);
5526   } else {
5527     UNREACHABLE();
5528   }
5529 }
5530 
5531 }  // namespace internal
5532 }  // namespace v8
5533 
5534 #endif  // V8_TARGET_ARCH_ARM
5535