• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions
6 // are met:
7 //
8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 //
11 // - Redistribution in binary form must reproduce the above copyright
12 // notice, this list of conditions and the following disclaimer in the
13 // documentation and/or other materials provided with the
14 // distribution.
15 //
16 // - Neither the name of Sun Microsystems or the names of contributors may
17 // be used to endorse or promote products derived from this software without
18 // specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 // COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29 // STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
31 // OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 // The original source code covered by the above license above has been
34 // modified significantly by Google Inc.
35 // Copyright 2012 the V8 project authors. All rights reserved.
36 
37 #include "src/codegen/arm/assembler-arm.h"
38 
39 #if V8_TARGET_ARCH_ARM
40 
41 #include "src/base/bits.h"
42 #include "src/base/cpu.h"
43 #include "src/base/overflowing-math.h"
44 #include "src/codegen/arm/assembler-arm-inl.h"
45 #include "src/codegen/assembler-inl.h"
46 #include "src/codegen/macro-assembler.h"
47 #include "src/codegen/string-constants.h"
48 #include "src/deoptimizer/deoptimizer.h"
49 #include "src/objects/objects-inl.h"
50 
51 namespace v8 {
52 namespace internal {
53 
54 static const unsigned kArmv6 = 0u;
55 static const unsigned kArmv7 = kArmv6 | (1u << ARMv7);
56 static const unsigned kArmv7WithSudiv = kArmv7 | (1u << ARMv7_SUDIV);
57 static const unsigned kArmv8 = kArmv7WithSudiv | (1u << ARMv8);
58 
CpuFeaturesFromCommandLine()59 static unsigned CpuFeaturesFromCommandLine() {
60   unsigned result;
61   if (strcmp(FLAG_arm_arch, "armv8") == 0) {
62     result = kArmv8;
63   } else if (strcmp(FLAG_arm_arch, "armv7+sudiv") == 0) {
64     result = kArmv7WithSudiv;
65   } else if (strcmp(FLAG_arm_arch, "armv7") == 0) {
66     result = kArmv7;
67   } else if (strcmp(FLAG_arm_arch, "armv6") == 0) {
68     result = kArmv6;
69   } else {
70     fprintf(stderr, "Error: unrecognised value for --arm-arch ('%s').\n",
71             FLAG_arm_arch);
72     fprintf(stderr,
73             "Supported values are:  armv8\n"
74             "                       armv7+sudiv\n"
75             "                       armv7\n"
76             "                       armv6\n");
77     FATAL("arm-arch");
78   }
79 
80   // If any of the old (deprecated) flags are specified, print a warning, but
81   // otherwise try to respect them for now.
82   // TODO(jbramley): When all the old bots have been updated, remove this.
83   if (FLAG_enable_armv7.has_value || FLAG_enable_vfp3.has_value ||
84       FLAG_enable_32dregs.has_value || FLAG_enable_neon.has_value ||
85       FLAG_enable_sudiv.has_value || FLAG_enable_armv8.has_value) {
86     // As an approximation of the old behaviour, set the default values from the
87     // arm_arch setting, then apply the flags over the top.
88     bool enable_armv7 = (result & (1u << ARMv7)) != 0;
89     bool enable_vfp3 = (result & (1u << ARMv7)) != 0;
90     bool enable_32dregs = (result & (1u << ARMv7)) != 0;
91     bool enable_neon = (result & (1u << ARMv7)) != 0;
92     bool enable_sudiv = (result & (1u << ARMv7_SUDIV)) != 0;
93     bool enable_armv8 = (result & (1u << ARMv8)) != 0;
94     if (FLAG_enable_armv7.has_value) {
95       fprintf(stderr,
96               "Warning: --enable_armv7 is deprecated. "
97               "Use --arm_arch instead.\n");
98       enable_armv7 = FLAG_enable_armv7.value;
99     }
100     if (FLAG_enable_vfp3.has_value) {
101       fprintf(stderr,
102               "Warning: --enable_vfp3 is deprecated. "
103               "Use --arm_arch instead.\n");
104       enable_vfp3 = FLAG_enable_vfp3.value;
105     }
106     if (FLAG_enable_32dregs.has_value) {
107       fprintf(stderr,
108               "Warning: --enable_32dregs is deprecated. "
109               "Use --arm_arch instead.\n");
110       enable_32dregs = FLAG_enable_32dregs.value;
111     }
112     if (FLAG_enable_neon.has_value) {
113       fprintf(stderr,
114               "Warning: --enable_neon is deprecated. "
115               "Use --arm_arch instead.\n");
116       enable_neon = FLAG_enable_neon.value;
117     }
118     if (FLAG_enable_sudiv.has_value) {
119       fprintf(stderr,
120               "Warning: --enable_sudiv is deprecated. "
121               "Use --arm_arch instead.\n");
122       enable_sudiv = FLAG_enable_sudiv.value;
123     }
124     if (FLAG_enable_armv8.has_value) {
125       fprintf(stderr,
126               "Warning: --enable_armv8 is deprecated. "
127               "Use --arm_arch instead.\n");
128       enable_armv8 = FLAG_enable_armv8.value;
129     }
130     // Emulate the old implications.
131     if (enable_armv8) {
132       enable_vfp3 = true;
133       enable_neon = true;
134       enable_32dregs = true;
135       enable_sudiv = true;
136     }
137     // Select the best available configuration.
138     if (enable_armv7 && enable_vfp3 && enable_32dregs && enable_neon) {
139       if (enable_sudiv) {
140         if (enable_armv8) {
141           result = kArmv8;
142         } else {
143           result = kArmv7WithSudiv;
144         }
145       } else {
146         result = kArmv7;
147       }
148     } else {
149       result = kArmv6;
150     }
151   }
152   return result;
153 }
154 
155 // Get the CPU features enabled by the build.
156 // For cross compilation the preprocessor symbols such as
157 // CAN_USE_ARMV7_INSTRUCTIONS and CAN_USE_VFP3_INSTRUCTIONS can be used to
158 // enable ARMv7 and VFPv3 instructions when building the snapshot. However,
159 // these flags should be consistent with a supported ARM configuration:
160 //  "armv6":       ARMv6 + VFPv2
161 //  "armv7":       ARMv7 + VFPv3-D32 + NEON
162 //  "armv7+sudiv": ARMv7 + VFPv4-D32 + NEON + SUDIV
163 //  "armv8":       ARMv8 (+ all of the above)
CpuFeaturesFromCompiler()164 static constexpr unsigned CpuFeaturesFromCompiler() {
165 // TODO(jbramley): Once the build flags are simplified, these tests should
166 // also be simplified.
167 
168 // Check *architectural* implications.
169 #if defined(CAN_USE_ARMV8_INSTRUCTIONS) && !defined(CAN_USE_ARMV7_INSTRUCTIONS)
170 #error "CAN_USE_ARMV8_INSTRUCTIONS should imply CAN_USE_ARMV7_INSTRUCTIONS"
171 #endif
172 #if defined(CAN_USE_ARMV8_INSTRUCTIONS) && !defined(CAN_USE_SUDIV)
173 #error "CAN_USE_ARMV8_INSTRUCTIONS should imply CAN_USE_SUDIV"
174 #endif
175 #if defined(CAN_USE_ARMV7_INSTRUCTIONS) != defined(CAN_USE_VFP3_INSTRUCTIONS)
176 // V8 requires VFP, and all ARMv7 devices with VFP have VFPv3. Similarly,
177 // VFPv3 isn't available before ARMv7.
178 #error "CAN_USE_ARMV7_INSTRUCTIONS should match CAN_USE_VFP3_INSTRUCTIONS"
179 #endif
180 #if defined(CAN_USE_NEON) && !defined(CAN_USE_ARMV7_INSTRUCTIONS)
181 #error "CAN_USE_NEON should imply CAN_USE_ARMV7_INSTRUCTIONS"
182 #endif
183 
184 // Find compiler-implied features.
185 #if defined(CAN_USE_ARMV8_INSTRUCTIONS) &&                           \
186     defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_SUDIV) && \
187     defined(CAN_USE_NEON) && defined(CAN_USE_VFP3_INSTRUCTIONS)
188   return kArmv8;
189 #elif defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_SUDIV) && \
190     defined(CAN_USE_NEON) && defined(CAN_USE_VFP3_INSTRUCTIONS)
191   return kArmv7WithSudiv;
192 #elif defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_NEON) && \
193     defined(CAN_USE_VFP3_INSTRUCTIONS)
194   return kArmv7;
195 #else
196   return kArmv6;
197 #endif
198 }
199 
ProbeImpl(bool cross_compile)200 void CpuFeatures::ProbeImpl(bool cross_compile) {
201   dcache_line_size_ = 64;
202 
203   unsigned command_line = CpuFeaturesFromCommandLine();
204   // Only use statically determined features for cross compile (snapshot).
205   if (cross_compile) {
206     supported_ |= command_line & CpuFeaturesFromCompiler();
207     return;
208   }
209 
210 #ifndef __arm__
211   // For the simulator build, use whatever the flags specify.
212   supported_ |= command_line;
213 
214 #else  // __arm__
215   // Probe for additional features at runtime.
216   base::CPU cpu;
217   // Runtime detection is slightly fuzzy, and some inferences are necessary.
218   unsigned runtime = kArmv6;
219   // NEON and VFPv3 imply at least ARMv7-A.
220   if (cpu.has_neon() && cpu.has_vfp3_d32()) {
221     DCHECK(cpu.has_vfp3());
222     runtime |= kArmv7;
223     if (cpu.has_idiva()) {
224       runtime |= kArmv7WithSudiv;
225       if (cpu.architecture() >= 8) {
226         runtime |= kArmv8;
227       }
228     }
229   }
230 
231   // Use the best of the features found by CPU detection and those inferred from
232   // the build system. In both cases, restrict available features using the
233   // command-line. Note that the command-line flags are very permissive (kArmv8)
234   // by default.
235   supported_ |= command_line & CpuFeaturesFromCompiler();
236   supported_ |= command_line & runtime;
237 
238   // Additional tuning options.
239 
240   // ARM Cortex-A9 and Cortex-A5 have 32 byte cachelines.
241   if (cpu.implementer() == base::CPU::ARM &&
242       (cpu.part() == base::CPU::ARM_CORTEX_A5 ||
243        cpu.part() == base::CPU::ARM_CORTEX_A9)) {
244     dcache_line_size_ = 32;
245   }
246 #endif
247 
248   DCHECK_IMPLIES(IsSupported(ARMv7_SUDIV), IsSupported(ARMv7));
249   DCHECK_IMPLIES(IsSupported(ARMv8), IsSupported(ARMv7_SUDIV));
250 }
251 
PrintTarget()252 void CpuFeatures::PrintTarget() {
253   const char* arm_arch = nullptr;
254   const char* arm_target_type = "";
255   const char* arm_no_probe = "";
256   const char* arm_fpu = "";
257   const char* arm_thumb = "";
258   const char* arm_float_abi = nullptr;
259 
260 #if !defined __arm__
261   arm_target_type = " simulator";
262 #endif
263 
264 #if defined ARM_TEST_NO_FEATURE_PROBE
265   arm_no_probe = " noprobe";
266 #endif
267 
268 #if defined CAN_USE_ARMV8_INSTRUCTIONS
269   arm_arch = "arm v8";
270 #elif defined CAN_USE_ARMV7_INSTRUCTIONS
271   arm_arch = "arm v7";
272 #else
273   arm_arch = "arm v6";
274 #endif
275 
276 #if defined CAN_USE_NEON
277   arm_fpu = " neon";
278 #elif defined CAN_USE_VFP3_INSTRUCTIONS
279 #if defined CAN_USE_VFP32DREGS
280   arm_fpu = " vfp3";
281 #else
282   arm_fpu = " vfp3-d16";
283 #endif
284 #else
285   arm_fpu = " vfp2";
286 #endif
287 
288 #ifdef __arm__
289   arm_float_abi = base::OS::ArmUsingHardFloat() ? "hard" : "softfp";
290 #elif USE_EABI_HARDFLOAT
291   arm_float_abi = "hard";
292 #else
293   arm_float_abi = "softfp";
294 #endif
295 
296 #if defined __arm__ && (defined __thumb__) || (defined __thumb2__)
297   arm_thumb = " thumb";
298 #endif
299 
300   printf("target%s%s %s%s%s %s\n", arm_target_type, arm_no_probe, arm_arch,
301          arm_fpu, arm_thumb, arm_float_abi);
302 }
303 
PrintFeatures()304 void CpuFeatures::PrintFeatures() {
305   printf("ARMv8=%d ARMv7=%d VFPv3=%d VFP32DREGS=%d NEON=%d SUDIV=%d",
306          CpuFeatures::IsSupported(ARMv8), CpuFeatures::IsSupported(ARMv7),
307          CpuFeatures::IsSupported(VFPv3), CpuFeatures::IsSupported(VFP32DREGS),
308          CpuFeatures::IsSupported(NEON), CpuFeatures::IsSupported(SUDIV));
309 #ifdef __arm__
310   bool eabi_hardfloat = base::OS::ArmUsingHardFloat();
311 #elif USE_EABI_HARDFLOAT
312   bool eabi_hardfloat = true;
313 #else
314   bool eabi_hardfloat = false;
315 #endif
316   printf(" USE_EABI_HARDFLOAT=%d\n", eabi_hardfloat);
317 }
318 
319 // -----------------------------------------------------------------------------
320 // Implementation of RelocInfo
321 
322 // static
323 const int RelocInfo::kApplyMask =
324     RelocInfo::ModeMask(RelocInfo::RELATIVE_CODE_TARGET);
325 
IsCodedSpecially()326 bool RelocInfo::IsCodedSpecially() {
327   // The deserializer needs to know whether a pointer is specially coded.  Being
328   // specially coded on ARM means that it is a movw/movt instruction. We don't
329   // generate those for relocatable pointers.
330   return false;
331 }
332 
IsInConstantPool()333 bool RelocInfo::IsInConstantPool() {
334   return Assembler::is_constant_pool_load(pc_);
335 }
336 
wasm_call_tag() const337 uint32_t RelocInfo::wasm_call_tag() const {
338   DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
339   return static_cast<uint32_t>(
340       Assembler::target_address_at(pc_, constant_pool_));
341 }
342 
343 // -----------------------------------------------------------------------------
344 // Implementation of Operand and MemOperand
345 // See assembler-arm-inl.h for inlined constructors
346 
Operand(Handle<HeapObject> handle)347 Operand::Operand(Handle<HeapObject> handle) {
348   rm_ = no_reg;
349   value_.immediate = static_cast<intptr_t>(handle.address());
350   rmode_ = RelocInfo::FULL_EMBEDDED_OBJECT;
351 }
352 
Operand(Register rm,ShiftOp shift_op,int shift_imm)353 Operand::Operand(Register rm, ShiftOp shift_op, int shift_imm) {
354   DCHECK(is_uint5(shift_imm));
355 
356   rm_ = rm;
357   rs_ = no_reg;
358   shift_op_ = shift_op;
359   shift_imm_ = shift_imm & 31;
360 
361   if ((shift_op == ROR) && (shift_imm == 0)) {
362     // ROR #0 is functionally equivalent to LSL #0 and this allow us to encode
363     // RRX as ROR #0 (See below).
364     shift_op = LSL;
365   } else if (shift_op == RRX) {
366     // encoded as ROR with shift_imm == 0
367     DCHECK_EQ(shift_imm, 0);
368     shift_op_ = ROR;
369     shift_imm_ = 0;
370   }
371 }
372 
Operand(Register rm,ShiftOp shift_op,Register rs)373 Operand::Operand(Register rm, ShiftOp shift_op, Register rs) {
374   DCHECK(shift_op != RRX);
375   rm_ = rm;
376   rs_ = no_reg;
377   shift_op_ = shift_op;
378   rs_ = rs;
379 }
380 
EmbeddedNumber(double value)381 Operand Operand::EmbeddedNumber(double value) {
382   int32_t smi;
383   if (DoubleToSmiInteger(value, &smi)) return Operand(Smi::FromInt(smi));
384   Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT);
385   result.is_heap_object_request_ = true;
386   result.value_.heap_object_request = HeapObjectRequest(value);
387   return result;
388 }
389 
EmbeddedStringConstant(const StringConstantBase * str)390 Operand Operand::EmbeddedStringConstant(const StringConstantBase* str) {
391   Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT);
392   result.is_heap_object_request_ = true;
393   result.value_.heap_object_request = HeapObjectRequest(str);
394   return result;
395 }
396 
MemOperand(Register rn,int32_t offset,AddrMode am)397 MemOperand::MemOperand(Register rn, int32_t offset, AddrMode am)
398     : rn_(rn), rm_(no_reg), offset_(offset), am_(am) {
399   // Accesses below the stack pointer are not safe, and are prohibited by the
400   // ABI. We can check obvious violations here.
401   if (rn == sp) {
402     if (am == Offset) DCHECK_LE(0, offset);
403     if (am == NegOffset) DCHECK_GE(0, offset);
404   }
405 }
406 
MemOperand(Register rn,Register rm,AddrMode am)407 MemOperand::MemOperand(Register rn, Register rm, AddrMode am)
408     : rn_(rn), rm_(rm), shift_op_(LSL), shift_imm_(0), am_(am) {}
409 
MemOperand(Register rn,Register rm,ShiftOp shift_op,int shift_imm,AddrMode am)410 MemOperand::MemOperand(Register rn, Register rm, ShiftOp shift_op,
411                        int shift_imm, AddrMode am)
412     : rn_(rn),
413       rm_(rm),
414       shift_op_(shift_op),
415       shift_imm_(shift_imm & 31),
416       am_(am) {
417   DCHECK(is_uint5(shift_imm));
418 }
419 
NeonMemOperand(Register rn,AddrMode am,int align)420 NeonMemOperand::NeonMemOperand(Register rn, AddrMode am, int align)
421     : rn_(rn), rm_(am == Offset ? pc : sp) {
422   DCHECK((am == Offset) || (am == PostIndex));
423   SetAlignment(align);
424 }
425 
NeonMemOperand(Register rn,Register rm,int align)426 NeonMemOperand::NeonMemOperand(Register rn, Register rm, int align)
427     : rn_(rn), rm_(rm) {
428   SetAlignment(align);
429 }
430 
SetAlignment(int align)431 void NeonMemOperand::SetAlignment(int align) {
432   switch (align) {
433     case 0:
434       align_ = 0;
435       break;
436     case 64:
437       align_ = 1;
438       break;
439     case 128:
440       align_ = 2;
441       break;
442     case 256:
443       align_ = 3;
444       break;
445     default:
446       UNREACHABLE();
447   }
448 }
449 
AllocateAndInstallRequestedHeapObjects(Isolate * isolate)450 void Assembler::AllocateAndInstallRequestedHeapObjects(Isolate* isolate) {
451   DCHECK_IMPLIES(isolate == nullptr, heap_object_requests_.empty());
452   for (auto& request : heap_object_requests_) {
453     Handle<HeapObject> object;
454     switch (request.kind()) {
455       case HeapObjectRequest::kHeapNumber:
456         object = isolate->factory()->NewHeapNumber<AllocationType::kOld>(
457             request.heap_number());
458         break;
459       case HeapObjectRequest::kStringConstant: {
460         const StringConstantBase* str = request.string();
461         CHECK_NOT_NULL(str);
462         object = str->AllocateStringConstant(isolate);
463         break;
464       }
465     }
466     Address pc = reinterpret_cast<Address>(buffer_start_) + request.offset();
467     Memory<Address>(constant_pool_entry_address(pc, 0 /* unused */)) =
468         object.address();
469   }
470 }
471 
472 // -----------------------------------------------------------------------------
473 // Specific instructions, constants, and masks.
474 
475 // str(r, MemOperand(sp, 4, NegPreIndex), al) instruction (aka push(r))
476 // register r is not encoded.
477 const Instr kPushRegPattern = al | B26 | 4 | NegPreIndex | sp.code() * B16;
478 // ldr(r, MemOperand(sp, 4, PostIndex), al) instruction (aka pop(r))
479 // register r is not encoded.
480 const Instr kPopRegPattern = al | B26 | L | 4 | PostIndex | sp.code() * B16;
481 // ldr rd, [pc, #offset]
482 const Instr kLdrPCImmedMask = 15 * B24 | 7 * B20 | 15 * B16;
483 const Instr kLdrPCImmedPattern = 5 * B24 | L | pc.code() * B16;
484 // Pc-relative call or jump to a signed imm24 offset.
485 // bl pc + #offset
486 // b  pc + #offset
487 const Instr kBOrBlPCImmedMask = 0xE * B24;
488 const Instr kBOrBlPCImmedPattern = 0xA * B24;
489 // vldr dd, [pc, #offset]
490 const Instr kVldrDPCMask = 15 * B24 | 3 * B20 | 15 * B16 | 15 * B8;
491 const Instr kVldrDPCPattern = 13 * B24 | L | pc.code() * B16 | 11 * B8;
492 // blxcc rm
493 const Instr kBlxRegMask =
494     15 * B24 | 15 * B20 | 15 * B16 | 15 * B12 | 15 * B8 | 15 * B4;
495 const Instr kBlxRegPattern = B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | BLX;
496 const Instr kBlxIp = al | kBlxRegPattern | ip.code();
497 const Instr kMovMvnMask = 0x6D * B21 | 0xF * B16;
498 const Instr kMovMvnPattern = 0xD * B21;
499 const Instr kMovMvnFlip = B22;
500 const Instr kMovLeaveCCMask = 0xDFF * B16;
501 const Instr kMovLeaveCCPattern = 0x1A0 * B16;
502 const Instr kMovwPattern = 0x30 * B20;
503 const Instr kMovtPattern = 0x34 * B20;
504 const Instr kMovwLeaveCCFlip = 0x5 * B21;
505 const Instr kMovImmedMask = 0x7F * B21;
506 const Instr kMovImmedPattern = 0x1D * B21;
507 const Instr kOrrImmedMask = 0x7F * B21;
508 const Instr kOrrImmedPattern = 0x1C * B21;
509 const Instr kCmpCmnMask = 0xDD * B20 | 0xF * B12;
510 const Instr kCmpCmnPattern = 0x15 * B20;
511 const Instr kCmpCmnFlip = B21;
512 const Instr kAddSubFlip = 0x6 * B21;
513 const Instr kAndBicFlip = 0xE * B21;
514 
515 // A mask for the Rd register for push, pop, ldr, str instructions.
516 const Instr kLdrRegFpOffsetPattern = al | B26 | L | Offset | fp.code() * B16;
517 const Instr kStrRegFpOffsetPattern = al | B26 | Offset | fp.code() * B16;
518 const Instr kLdrRegFpNegOffsetPattern =
519     al | B26 | L | NegOffset | fp.code() * B16;
520 const Instr kStrRegFpNegOffsetPattern = al | B26 | NegOffset | fp.code() * B16;
521 const Instr kLdrStrInstrTypeMask = 0xFFFF0000;
522 
Assembler(const AssemblerOptions & options,std::unique_ptr<AssemblerBuffer> buffer)523 Assembler::Assembler(const AssemblerOptions& options,
524                      std::unique_ptr<AssemblerBuffer> buffer)
525     : AssemblerBase(options, std::move(buffer)),
526       pending_32_bit_constants_(),
527       scratch_register_list_(ip.bit()) {
528   pending_32_bit_constants_.reserve(kMinNumPendingConstants);
529   reloc_info_writer.Reposition(buffer_start_ + buffer_->size(), pc_);
530   next_buffer_check_ = 0;
531   const_pool_blocked_nesting_ = 0;
532   no_const_pool_before_ = 0;
533   first_const_pool_32_use_ = -1;
534   last_bound_pos_ = 0;
535   if (CpuFeatures::IsSupported(VFP32DREGS)) {
536     // Register objects tend to be abstracted and survive between scopes, so
537     // it's awkward to use CpuFeatures::VFP32DREGS with CpuFeatureScope. To make
538     // its use consistent with other features, we always enable it if we can.
539     EnableCpuFeature(VFP32DREGS);
540     // Make sure we pick two D registers which alias a Q register. This way, we
541     // can use a Q as a scratch if NEON is supported.
542     scratch_vfp_register_list_ = d14.ToVfpRegList() | d15.ToVfpRegList();
543   } else {
544     // When VFP32DREGS is not supported, d15 become allocatable. Therefore we
545     // cannot use it as a scratch.
546     scratch_vfp_register_list_ = d14.ToVfpRegList();
547   }
548 }
549 
~Assembler()550 Assembler::~Assembler() { DCHECK_EQ(const_pool_blocked_nesting_, 0); }
551 
GetCode(Isolate * isolate,CodeDesc * desc,SafepointTableBuilder * safepoint_table_builder,int handler_table_offset)552 void Assembler::GetCode(Isolate* isolate, CodeDesc* desc,
553                         SafepointTableBuilder* safepoint_table_builder,
554                         int handler_table_offset) {
555   // As a crutch to avoid having to add manual Align calls wherever we use a
556   // raw workflow to create Code objects (mostly in tests), add another Align
557   // call here. It does no harm - the end of the Code object is aligned to the
558   // (larger) kCodeAlignment anyways.
559   // TODO(jgruber): Consider moving responsibility for proper alignment to
560   // metadata table builders (safepoint, handler, constant pool, code
561   // comments).
562   DataAlign(Code::kMetadataAlignment);
563 
564   // Emit constant pool if necessary.
565   CheckConstPool(true, false);
566   DCHECK(pending_32_bit_constants_.empty());
567 
568   int code_comments_size = WriteCodeComments();
569 
570   AllocateAndInstallRequestedHeapObjects(isolate);
571 
572   // Set up code descriptor.
573   // TODO(jgruber): Reconsider how these offsets and sizes are maintained up to
574   // this point to make CodeDesc initialization less fiddly.
575 
576   static constexpr int kConstantPoolSize = 0;
577   const int instruction_size = pc_offset();
578   const int code_comments_offset = instruction_size - code_comments_size;
579   const int constant_pool_offset = code_comments_offset - kConstantPoolSize;
580   const int handler_table_offset2 = (handler_table_offset == kNoHandlerTable)
581                                         ? constant_pool_offset
582                                         : handler_table_offset;
583   const int safepoint_table_offset =
584       (safepoint_table_builder == kNoSafepointTable)
585           ? handler_table_offset2
586           : safepoint_table_builder->GetCodeOffset();
587   const int reloc_info_offset =
588       static_cast<int>(reloc_info_writer.pos() - buffer_->start());
589   CodeDesc::Initialize(desc, this, safepoint_table_offset,
590                        handler_table_offset2, constant_pool_offset,
591                        code_comments_offset, reloc_info_offset);
592 }
593 
Align(int m)594 void Assembler::Align(int m) {
595   DCHECK(m >= 4 && base::bits::IsPowerOfTwo(m));
596   DCHECK_EQ(pc_offset() & (kInstrSize - 1), 0);
597   while ((pc_offset() & (m - 1)) != 0) {
598     nop();
599   }
600 }
601 
CodeTargetAlign()602 void Assembler::CodeTargetAlign() {
603   // Preferred alignment of jump targets on some ARM chips.
604   Align(8);
605 }
606 
GetCondition(Instr instr)607 Condition Assembler::GetCondition(Instr instr) {
608   return Instruction::ConditionField(instr);
609 }
610 
IsLdrRegisterImmediate(Instr instr)611 bool Assembler::IsLdrRegisterImmediate(Instr instr) {
612   return (instr & (B27 | B26 | B25 | B22 | B20)) == (B26 | B20);
613 }
614 
IsVldrDRegisterImmediate(Instr instr)615 bool Assembler::IsVldrDRegisterImmediate(Instr instr) {
616   return (instr & (15 * B24 | 3 * B20 | 15 * B8)) == (13 * B24 | B20 | 11 * B8);
617 }
618 
GetLdrRegisterImmediateOffset(Instr instr)619 int Assembler::GetLdrRegisterImmediateOffset(Instr instr) {
620   DCHECK(IsLdrRegisterImmediate(instr));
621   bool positive = (instr & B23) == B23;
622   int offset = instr & kOff12Mask;  // Zero extended offset.
623   return positive ? offset : -offset;
624 }
625 
GetVldrDRegisterImmediateOffset(Instr instr)626 int Assembler::GetVldrDRegisterImmediateOffset(Instr instr) {
627   DCHECK(IsVldrDRegisterImmediate(instr));
628   bool positive = (instr & B23) == B23;
629   int offset = instr & kOff8Mask;  // Zero extended offset.
630   offset <<= 2;
631   return positive ? offset : -offset;
632 }
633 
SetLdrRegisterImmediateOffset(Instr instr,int offset)634 Instr Assembler::SetLdrRegisterImmediateOffset(Instr instr, int offset) {
635   DCHECK(IsLdrRegisterImmediate(instr));
636   bool positive = offset >= 0;
637   if (!positive) offset = -offset;
638   DCHECK(is_uint12(offset));
639   // Set bit indicating whether the offset should be added.
640   instr = (instr & ~B23) | (positive ? B23 : 0);
641   // Set the actual offset.
642   return (instr & ~kOff12Mask) | offset;
643 }
644 
SetVldrDRegisterImmediateOffset(Instr instr,int offset)645 Instr Assembler::SetVldrDRegisterImmediateOffset(Instr instr, int offset) {
646   DCHECK(IsVldrDRegisterImmediate(instr));
647   DCHECK((offset & ~3) == offset);  // Must be 64-bit aligned.
648   bool positive = offset >= 0;
649   if (!positive) offset = -offset;
650   DCHECK(is_uint10(offset));
651   // Set bit indicating whether the offset should be added.
652   instr = (instr & ~B23) | (positive ? B23 : 0);
653   // Set the actual offset. Its bottom 2 bits are zero.
654   return (instr & ~kOff8Mask) | (offset >> 2);
655 }
656 
IsStrRegisterImmediate(Instr instr)657 bool Assembler::IsStrRegisterImmediate(Instr instr) {
658   return (instr & (B27 | B26 | B25 | B22 | B20)) == B26;
659 }
660 
SetStrRegisterImmediateOffset(Instr instr,int offset)661 Instr Assembler::SetStrRegisterImmediateOffset(Instr instr, int offset) {
662   DCHECK(IsStrRegisterImmediate(instr));
663   bool positive = offset >= 0;
664   if (!positive) offset = -offset;
665   DCHECK(is_uint12(offset));
666   // Set bit indicating whether the offset should be added.
667   instr = (instr & ~B23) | (positive ? B23 : 0);
668   // Set the actual offset.
669   return (instr & ~kOff12Mask) | offset;
670 }
671 
IsAddRegisterImmediate(Instr instr)672 bool Assembler::IsAddRegisterImmediate(Instr instr) {
673   return (instr & (B27 | B26 | B25 | B24 | B23 | B22 | B21)) == (B25 | B23);
674 }
675 
SetAddRegisterImmediateOffset(Instr instr,int offset)676 Instr Assembler::SetAddRegisterImmediateOffset(Instr instr, int offset) {
677   DCHECK(IsAddRegisterImmediate(instr));
678   DCHECK_GE(offset, 0);
679   DCHECK(is_uint12(offset));
680   // Set the offset.
681   return (instr & ~kOff12Mask) | offset;
682 }
683 
GetRd(Instr instr)684 Register Assembler::GetRd(Instr instr) {
685   return Register::from_code(Instruction::RdValue(instr));
686 }
687 
GetRn(Instr instr)688 Register Assembler::GetRn(Instr instr) {
689   return Register::from_code(Instruction::RnValue(instr));
690 }
691 
GetRm(Instr instr)692 Register Assembler::GetRm(Instr instr) {
693   return Register::from_code(Instruction::RmValue(instr));
694 }
695 
IsPush(Instr instr)696 bool Assembler::IsPush(Instr instr) {
697   return ((instr & ~kRdMask) == kPushRegPattern);
698 }
699 
IsPop(Instr instr)700 bool Assembler::IsPop(Instr instr) {
701   return ((instr & ~kRdMask) == kPopRegPattern);
702 }
703 
IsStrRegFpOffset(Instr instr)704 bool Assembler::IsStrRegFpOffset(Instr instr) {
705   return ((instr & kLdrStrInstrTypeMask) == kStrRegFpOffsetPattern);
706 }
707 
IsLdrRegFpOffset(Instr instr)708 bool Assembler::IsLdrRegFpOffset(Instr instr) {
709   return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpOffsetPattern);
710 }
711 
IsStrRegFpNegOffset(Instr instr)712 bool Assembler::IsStrRegFpNegOffset(Instr instr) {
713   return ((instr & kLdrStrInstrTypeMask) == kStrRegFpNegOffsetPattern);
714 }
715 
IsLdrRegFpNegOffset(Instr instr)716 bool Assembler::IsLdrRegFpNegOffset(Instr instr) {
717   return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpNegOffsetPattern);
718 }
719 
IsLdrPcImmediateOffset(Instr instr)720 bool Assembler::IsLdrPcImmediateOffset(Instr instr) {
721   // Check the instruction is indeed a
722   // ldr<cond> <Rd>, [pc +/- offset_12].
723   return (instr & kLdrPCImmedMask) == kLdrPCImmedPattern;
724 }
725 
IsBOrBlPcImmediateOffset(Instr instr)726 bool Assembler::IsBOrBlPcImmediateOffset(Instr instr) {
727   return (instr & kBOrBlPCImmedMask) == kBOrBlPCImmedPattern;
728 }
729 
IsVldrDPcImmediateOffset(Instr instr)730 bool Assembler::IsVldrDPcImmediateOffset(Instr instr) {
731   // Check the instruction is indeed a
732   // vldr<cond> <Dd>, [pc +/- offset_10].
733   return (instr & kVldrDPCMask) == kVldrDPCPattern;
734 }
735 
IsBlxReg(Instr instr)736 bool Assembler::IsBlxReg(Instr instr) {
737   // Check the instruction is indeed a
738   // blxcc <Rm>
739   return (instr & kBlxRegMask) == kBlxRegPattern;
740 }
741 
IsBlxIp(Instr instr)742 bool Assembler::IsBlxIp(Instr instr) {
743   // Check the instruction is indeed a
744   // blx ip
745   return instr == kBlxIp;
746 }
747 
IsTstImmediate(Instr instr)748 bool Assembler::IsTstImmediate(Instr instr) {
749   return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask)) == (I | TST | S);
750 }
751 
IsCmpRegister(Instr instr)752 bool Assembler::IsCmpRegister(Instr instr) {
753   return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask | B4)) ==
754          (CMP | S);
755 }
756 
IsCmpImmediate(Instr instr)757 bool Assembler::IsCmpImmediate(Instr instr) {
758   return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask)) == (I | CMP | S);
759 }
760 
GetCmpImmediateRegister(Instr instr)761 Register Assembler::GetCmpImmediateRegister(Instr instr) {
762   DCHECK(IsCmpImmediate(instr));
763   return GetRn(instr);
764 }
765 
GetCmpImmediateRawImmediate(Instr instr)766 int Assembler::GetCmpImmediateRawImmediate(Instr instr) {
767   DCHECK(IsCmpImmediate(instr));
768   return instr & kOff12Mask;
769 }
770 
771 // Labels refer to positions in the (to be) generated code.
772 // There are bound, linked, and unused labels.
773 //
774 // Bound labels refer to known positions in the already
775 // generated code. pos() is the position the label refers to.
776 //
777 // Linked labels refer to unknown positions in the code
778 // to be generated; pos() is the position of the last
779 // instruction using the label.
780 //
781 // The linked labels form a link chain by making the branch offset
782 // in the instruction steam to point to the previous branch
783 // instruction using the same label.
784 //
785 // The link chain is terminated by a branch offset pointing to the
786 // same position.
787 
target_at(int pos)788 int Assembler::target_at(int pos) {
789   Instr instr = instr_at(pos);
790   if (is_uint24(instr)) {
791     // Emitted link to a label, not part of a branch.
792     return instr;
793   }
794   DCHECK_EQ(5 * B25, instr & 7 * B25);  // b, bl, or blx imm24
795   int imm26 = ((instr & kImm24Mask) << 8) >> 6;
796   if ((Instruction::ConditionField(instr) == kSpecialCondition) &&
797       ((instr & B24) != 0)) {
798     // blx uses bit 24 to encode bit 2 of imm26
799     imm26 += 2;
800   }
801   return pos + Instruction::kPcLoadDelta + imm26;
802 }
803 
target_at_put(int pos,int target_pos)804 void Assembler::target_at_put(int pos, int target_pos) {
805   Instr instr = instr_at(pos);
806   if (is_uint24(instr)) {
807     DCHECK(target_pos == pos || target_pos >= 0);
808     // Emitted link to a label, not part of a branch.
809     // Load the position of the label relative to the generated code object
810     // pointer in a register.
811 
812     // The existing code must be a single 24-bit label chain link, followed by
813     // nops encoding the destination register. See mov_label_offset.
814 
815     // Extract the destination register from the first nop instructions.
816     Register dst =
817         Register::from_code(Instruction::RmValue(instr_at(pos + kInstrSize)));
818     // In addition to the 24-bit label chain link, we expect to find one nop for
819     // ARMv7 and above, or two nops for ARMv6. See mov_label_offset.
820     DCHECK(IsNop(instr_at(pos + kInstrSize), dst.code()));
821     if (!CpuFeatures::IsSupported(ARMv7)) {
822       DCHECK(IsNop(instr_at(pos + 2 * kInstrSize), dst.code()));
823     }
824 
825     // Here are the instructions we need to emit:
826     //   For ARMv7: target24 => target16_1:target16_0
827     //      movw dst, #target16_0
828     //      movt dst, #target16_1
829     //   For ARMv6: target24 => target8_2:target8_1:target8_0
830     //      mov dst, #target8_0
831     //      orr dst, dst, #target8_1 << 8
832     //      orr dst, dst, #target8_2 << 16
833 
834     uint32_t target24 = target_pos + (Code::kHeaderSize - kHeapObjectTag);
835     DCHECK(is_uint24(target24));
836     if (is_uint8(target24)) {
837       // If the target fits in a byte then only patch with a mov
838       // instruction.
839       PatchingAssembler patcher(
840           options(), reinterpret_cast<byte*>(buffer_start_ + pos), 1);
841       patcher.mov(dst, Operand(target24));
842     } else {
843       uint16_t target16_0 = target24 & kImm16Mask;
844       uint16_t target16_1 = target24 >> 16;
845       if (CpuFeatures::IsSupported(ARMv7)) {
846         // Patch with movw/movt.
847         if (target16_1 == 0) {
848           PatchingAssembler patcher(
849               options(), reinterpret_cast<byte*>(buffer_start_ + pos), 1);
850           CpuFeatureScope scope(&patcher, ARMv7);
851           patcher.movw(dst, target16_0);
852         } else {
853           PatchingAssembler patcher(
854               options(), reinterpret_cast<byte*>(buffer_start_ + pos), 2);
855           CpuFeatureScope scope(&patcher, ARMv7);
856           patcher.movw(dst, target16_0);
857           patcher.movt(dst, target16_1);
858         }
859       } else {
860         // Patch with a sequence of mov/orr/orr instructions.
861         uint8_t target8_0 = target16_0 & kImm8Mask;
862         uint8_t target8_1 = target16_0 >> 8;
863         uint8_t target8_2 = target16_1 & kImm8Mask;
864         if (target8_2 == 0) {
865           PatchingAssembler patcher(
866               options(), reinterpret_cast<byte*>(buffer_start_ + pos), 2);
867           patcher.mov(dst, Operand(target8_0));
868           patcher.orr(dst, dst, Operand(target8_1 << 8));
869         } else {
870           PatchingAssembler patcher(
871               options(), reinterpret_cast<byte*>(buffer_start_ + pos), 3);
872           patcher.mov(dst, Operand(target8_0));
873           patcher.orr(dst, dst, Operand(target8_1 << 8));
874           patcher.orr(dst, dst, Operand(target8_2 << 16));
875         }
876       }
877     }
878     return;
879   }
880   int imm26 = target_pos - (pos + Instruction::kPcLoadDelta);
881   DCHECK_EQ(5 * B25, instr & 7 * B25);  // b, bl, or blx imm24
882   if (Instruction::ConditionField(instr) == kSpecialCondition) {
883     // blx uses bit 24 to encode bit 2 of imm26
884     DCHECK_EQ(0, imm26 & 1);
885     instr = (instr & ~(B24 | kImm24Mask)) | ((imm26 & 2) >> 1) * B24;
886   } else {
887     DCHECK_EQ(0, imm26 & 3);
888     instr &= ~kImm24Mask;
889   }
890   int imm24 = imm26 >> 2;
891   DCHECK(is_int24(imm24));
892   instr_at_put(pos, instr | (imm24 & kImm24Mask));
893 }
894 
print(const Label * L)895 void Assembler::print(const Label* L) {
896   if (L->is_unused()) {
897     PrintF("unused label\n");
898   } else if (L->is_bound()) {
899     PrintF("bound label to %d\n", L->pos());
900   } else if (L->is_linked()) {
901     Label l;
902     l.link_to(L->pos());
903     PrintF("unbound label");
904     while (l.is_linked()) {
905       PrintF("@ %d ", l.pos());
906       Instr instr = instr_at(l.pos());
907       if ((instr & ~kImm24Mask) == 0) {
908         PrintF("value\n");
909       } else {
910         DCHECK_EQ(instr & 7 * B25, 5 * B25);  // b, bl, or blx
911         Condition cond = Instruction::ConditionField(instr);
912         const char* b;
913         const char* c;
914         if (cond == kSpecialCondition) {
915           b = "blx";
916           c = "";
917         } else {
918           if ((instr & B24) != 0)
919             b = "bl";
920           else
921             b = "b";
922 
923           switch (cond) {
924             case eq:
925               c = "eq";
926               break;
927             case ne:
928               c = "ne";
929               break;
930             case hs:
931               c = "hs";
932               break;
933             case lo:
934               c = "lo";
935               break;
936             case mi:
937               c = "mi";
938               break;
939             case pl:
940               c = "pl";
941               break;
942             case vs:
943               c = "vs";
944               break;
945             case vc:
946               c = "vc";
947               break;
948             case hi:
949               c = "hi";
950               break;
951             case ls:
952               c = "ls";
953               break;
954             case ge:
955               c = "ge";
956               break;
957             case lt:
958               c = "lt";
959               break;
960             case gt:
961               c = "gt";
962               break;
963             case le:
964               c = "le";
965               break;
966             case al:
967               c = "";
968               break;
969             default:
970               c = "";
971               UNREACHABLE();
972           }
973         }
974         PrintF("%s%s\n", b, c);
975       }
976       next(&l);
977     }
978   } else {
979     PrintF("label in inconsistent state (pos = %d)\n", L->pos_);
980   }
981 }
982 
bind_to(Label * L,int pos)983 void Assembler::bind_to(Label* L, int pos) {
984   DCHECK(0 <= pos && pos <= pc_offset());  // must have a valid binding position
985   while (L->is_linked()) {
986     int fixup_pos = L->pos();
987     next(L);  // call next before overwriting link with target at fixup_pos
988     target_at_put(fixup_pos, pos);
989   }
990   L->bind_to(pos);
991 
992   // Keep track of the last bound label so we don't eliminate any instructions
993   // before a bound label.
994   if (pos > last_bound_pos_) last_bound_pos_ = pos;
995 }
996 
bind(Label * L)997 void Assembler::bind(Label* L) {
998   DCHECK(!L->is_bound());  // label can only be bound once
999   bind_to(L, pc_offset());
1000 }
1001 
next(Label * L)1002 void Assembler::next(Label* L) {
1003   DCHECK(L->is_linked());
1004   int link = target_at(L->pos());
1005   if (link == L->pos()) {
1006     // Branch target points to the same instruction. This is the end of the link
1007     // chain.
1008     L->Unuse();
1009   } else {
1010     DCHECK_GE(link, 0);
1011     L->link_to(link);
1012   }
1013 }
1014 
1015 namespace {
1016 
1017 // Low-level code emission routines depending on the addressing mode.
1018 // If this returns true then you have to use the rotate_imm and immed_8
1019 // that it returns, because it may have already changed the instruction
1020 // to match them!
FitsShifter(uint32_t imm32,uint32_t * rotate_imm,uint32_t * immed_8,Instr * instr)1021 bool FitsShifter(uint32_t imm32, uint32_t* rotate_imm, uint32_t* immed_8,
1022                  Instr* instr) {
1023   // imm32 must be unsigned.
1024   for (int rot = 0; rot < 16; rot++) {
1025     uint32_t imm8 = base::bits::RotateLeft32(imm32, 2 * rot);
1026     if ((imm8 <= 0xFF)) {
1027       *rotate_imm = rot;
1028       *immed_8 = imm8;
1029       return true;
1030     }
1031   }
1032   // If the opcode is one with a complementary version and the complementary
1033   // immediate fits, change the opcode.
1034   if (instr != nullptr) {
1035     if ((*instr & kMovMvnMask) == kMovMvnPattern) {
1036       if (FitsShifter(~imm32, rotate_imm, immed_8, nullptr)) {
1037         *instr ^= kMovMvnFlip;
1038         return true;
1039       } else if ((*instr & kMovLeaveCCMask) == kMovLeaveCCPattern) {
1040         if (CpuFeatures::IsSupported(ARMv7)) {
1041           if (imm32 < 0x10000) {
1042             *instr ^= kMovwLeaveCCFlip;
1043             *instr |= Assembler::EncodeMovwImmediate(imm32);
1044             *rotate_imm = *immed_8 = 0;  // Not used for movw.
1045             return true;
1046           }
1047         }
1048       }
1049     } else if ((*instr & kCmpCmnMask) == kCmpCmnPattern) {
1050       if (FitsShifter(-static_cast<int>(imm32), rotate_imm, immed_8, nullptr)) {
1051         *instr ^= kCmpCmnFlip;
1052         return true;
1053       }
1054     } else {
1055       Instr alu_insn = (*instr & kALUMask);
1056       if (alu_insn == ADD || alu_insn == SUB) {
1057         if (FitsShifter(-static_cast<int>(imm32), rotate_imm, immed_8,
1058                         nullptr)) {
1059           *instr ^= kAddSubFlip;
1060           return true;
1061         }
1062       } else if (alu_insn == AND || alu_insn == BIC) {
1063         if (FitsShifter(~imm32, rotate_imm, immed_8, nullptr)) {
1064           *instr ^= kAndBicFlip;
1065           return true;
1066         }
1067       }
1068     }
1069   }
1070   return false;
1071 }
1072 
1073 // We have to use the temporary register for things that can be relocated even
1074 // if they can be encoded in the ARM's 12 bits of immediate-offset instruction
1075 // space.  There is no guarantee that the relocated location can be similarly
1076 // encoded.
MustOutputRelocInfo(RelocInfo::Mode rmode,const Assembler * assembler)1077 bool MustOutputRelocInfo(RelocInfo::Mode rmode, const Assembler* assembler) {
1078   if (RelocInfo::IsOnlyForSerializer(rmode)) {
1079     if (assembler->predictable_code_size()) return true;
1080     return assembler->options().record_reloc_info_for_serialization;
1081   } else if (RelocInfo::IsNone(rmode)) {
1082     return false;
1083   }
1084   return true;
1085 }
1086 
UseMovImmediateLoad(const Operand & x,const Assembler * assembler)1087 bool UseMovImmediateLoad(const Operand& x, const Assembler* assembler) {
1088   DCHECK_NOT_NULL(assembler);
1089   if (x.MustOutputRelocInfo(assembler)) {
1090     // Prefer constant pool if data is likely to be patched.
1091     return false;
1092   } else {
1093     // Otherwise, use immediate load if movw / movt is available.
1094     return CpuFeatures::IsSupported(ARMv7);
1095   }
1096 }
1097 
1098 }  // namespace
1099 
MustOutputRelocInfo(const Assembler * assembler) const1100 bool Operand::MustOutputRelocInfo(const Assembler* assembler) const {
1101   return v8::internal::MustOutputRelocInfo(rmode_, assembler);
1102 }
1103 
InstructionsRequired(const Assembler * assembler,Instr instr) const1104 int Operand::InstructionsRequired(const Assembler* assembler,
1105                                   Instr instr) const {
1106   DCHECK_NOT_NULL(assembler);
1107   if (rm_.is_valid()) return 1;
1108   uint32_t dummy1, dummy2;
1109   if (MustOutputRelocInfo(assembler) ||
1110       !FitsShifter(immediate(), &dummy1, &dummy2, &instr)) {
1111     // The immediate operand cannot be encoded as a shifter operand, or use of
1112     // constant pool is required.  First account for the instructions required
1113     // for the constant pool or immediate load
1114     int instructions;
1115     if (UseMovImmediateLoad(*this, assembler)) {
1116       DCHECK(CpuFeatures::IsSupported(ARMv7));
1117       // A movw / movt immediate load.
1118       instructions = 2;
1119     } else {
1120       // A small constant pool load.
1121       instructions = 1;
1122     }
1123     if ((instr & ~kCondMask) != 13 * B21) {  // mov, S not set
1124       // For a mov or mvn instruction which doesn't set the condition
1125       // code, the constant pool or immediate load is enough, otherwise we need
1126       // to account for the actual instruction being requested.
1127       instructions += 1;
1128     }
1129     return instructions;
1130   } else {
1131     // No use of constant pool and the immediate operand can be encoded as a
1132     // shifter operand.
1133     return 1;
1134   }
1135 }
1136 
Move32BitImmediate(Register rd,const Operand & x,Condition cond)1137 void Assembler::Move32BitImmediate(Register rd, const Operand& x,
1138                                    Condition cond) {
1139   if (UseMovImmediateLoad(x, this)) {
1140     CpuFeatureScope scope(this, ARMv7);
1141     // UseMovImmediateLoad should return false when we need to output
1142     // relocation info, since we prefer the constant pool for values that
1143     // can be patched.
1144     DCHECK(!x.MustOutputRelocInfo(this));
1145     UseScratchRegisterScope temps(this);
1146     // Re-use the destination register as a scratch if possible.
1147     Register target = rd != pc && rd != sp ? rd : temps.Acquire();
1148     uint32_t imm32 = static_cast<uint32_t>(x.immediate());
1149     movw(target, imm32 & 0xFFFF, cond);
1150     movt(target, imm32 >> 16, cond);
1151     if (target.code() != rd.code()) {
1152       mov(rd, target, LeaveCC, cond);
1153     }
1154   } else {
1155     int32_t immediate;
1156     if (x.IsHeapObjectRequest()) {
1157       RequestHeapObject(x.heap_object_request());
1158       immediate = 0;
1159     } else {
1160       immediate = x.immediate();
1161     }
1162     ConstantPoolAddEntry(pc_offset(), x.rmode_, immediate);
1163     ldr_pcrel(rd, 0, cond);
1164   }
1165 }
1166 
AddrMode1(Instr instr,Register rd,Register rn,const Operand & x)1167 void Assembler::AddrMode1(Instr instr, Register rd, Register rn,
1168                           const Operand& x) {
1169   CheckBuffer();
1170   uint32_t opcode = instr & kOpCodeMask;
1171   bool set_flags = (instr & S) != 0;
1172   DCHECK((opcode == ADC) || (opcode == ADD) || (opcode == AND) ||
1173          (opcode == BIC) || (opcode == EOR) || (opcode == ORR) ||
1174          (opcode == RSB) || (opcode == RSC) || (opcode == SBC) ||
1175          (opcode == SUB) || (opcode == CMN) || (opcode == CMP) ||
1176          (opcode == TEQ) || (opcode == TST) || (opcode == MOV) ||
1177          (opcode == MVN));
1178   // For comparison instructions, rd is not defined.
1179   DCHECK(rd.is_valid() || (opcode == CMN) || (opcode == CMP) ||
1180          (opcode == TEQ) || (opcode == TST));
1181   // For move instructions, rn is not defined.
1182   DCHECK(rn.is_valid() || (opcode == MOV) || (opcode == MVN));
1183   DCHECK(rd.is_valid() || rn.is_valid());
1184   DCHECK_EQ(instr & ~(kCondMask | kOpCodeMask | S), 0);
1185   if (!AddrMode1TryEncodeOperand(&instr, x)) {
1186     DCHECK(x.IsImmediate());
1187     // Upon failure to encode, the opcode should not have changed.
1188     DCHECK(opcode == (instr & kOpCodeMask));
1189     UseScratchRegisterScope temps(this);
1190     Condition cond = Instruction::ConditionField(instr);
1191     if ((opcode == MOV) && !set_flags) {
1192       // Generate a sequence of mov instructions or a load from the constant
1193       // pool only for a MOV instruction which does not set the flags.
1194       DCHECK(!rn.is_valid());
1195       Move32BitImmediate(rd, x, cond);
1196     } else if ((opcode == ADD) && !set_flags && (rd == rn) &&
1197                !temps.CanAcquire()) {
1198       // Split the operation into a sequence of additions if we cannot use a
1199       // scratch register. In this case, we cannot re-use rn and the assembler
1200       // does not have any scratch registers to spare.
1201       uint32_t imm = x.immediate();
1202       do {
1203         // The immediate encoding format is composed of 8 bits of data and 4
1204         // bits encoding a rotation. Each of the 16 possible rotations accounts
1205         // for a rotation by an even number.
1206         //   4 bits -> 16 rotations possible
1207         //          -> 16 rotations of 2 bits each fits in a 32-bit value.
1208         // This means that finding the even number of trailing zeroes of the
1209         // immediate allows us to more efficiently split it:
1210         int trailing_zeroes = base::bits::CountTrailingZeros(imm) & ~1u;
1211         uint32_t mask = (0xFF << trailing_zeroes);
1212         add(rd, rd, Operand(imm & mask), LeaveCC, cond);
1213         imm = imm & ~mask;
1214       } while (!ImmediateFitsAddrMode1Instruction(imm));
1215       add(rd, rd, Operand(imm), LeaveCC, cond);
1216     } else {
1217       // The immediate operand cannot be encoded as a shifter operand, so load
1218       // it first to a scratch register and change the original instruction to
1219       // use it.
1220       // Re-use the destination register if possible.
1221       Register scratch = (rd.is_valid() && rd != rn && rd != pc && rd != sp)
1222                              ? rd
1223                              : temps.Acquire();
1224       mov(scratch, x, LeaveCC, cond);
1225       AddrMode1(instr, rd, rn, Operand(scratch));
1226     }
1227     return;
1228   }
1229   if (!rd.is_valid()) {
1230     // Emit a comparison instruction.
1231     emit(instr | rn.code() * B16);
1232   } else if (!rn.is_valid()) {
1233     // Emit a move instruction. If the operand is a register-shifted register,
1234     // then prevent the destination from being PC as this is unpredictable.
1235     DCHECK(!x.IsRegisterShiftedRegister() || rd != pc);
1236     emit(instr | rd.code() * B12);
1237   } else {
1238     emit(instr | rn.code() * B16 | rd.code() * B12);
1239   }
1240   if (rn == pc || x.rm_ == pc) {
1241     // Block constant pool emission for one instruction after reading pc.
1242     BlockConstPoolFor(1);
1243   }
1244 }
1245 
AddrMode1TryEncodeOperand(Instr * instr,const Operand & x)1246 bool Assembler::AddrMode1TryEncodeOperand(Instr* instr, const Operand& x) {
1247   if (x.IsImmediate()) {
1248     // Immediate.
1249     uint32_t rotate_imm;
1250     uint32_t immed_8;
1251     if (x.MustOutputRelocInfo(this) ||
1252         !FitsShifter(x.immediate(), &rotate_imm, &immed_8, instr)) {
1253       // Let the caller handle generating multiple instructions.
1254       return false;
1255     }
1256     *instr |= I | rotate_imm * B8 | immed_8;
1257   } else if (x.IsImmediateShiftedRegister()) {
1258     *instr |= x.shift_imm_ * B7 | x.shift_op_ | x.rm_.code();
1259   } else {
1260     DCHECK(x.IsRegisterShiftedRegister());
1261     // It is unpredictable to use the PC in this case.
1262     DCHECK(x.rm_ != pc && x.rs_ != pc);
1263     *instr |= x.rs_.code() * B8 | x.shift_op_ | B4 | x.rm_.code();
1264   }
1265 
1266   return true;
1267 }
1268 
AddrMode2(Instr instr,Register rd,const MemOperand & x)1269 void Assembler::AddrMode2(Instr instr, Register rd, const MemOperand& x) {
1270   DCHECK((instr & ~(kCondMask | B | L)) == B26);
1271   // This method does not handle pc-relative addresses. ldr_pcrel() should be
1272   // used instead.
1273   DCHECK(x.rn_ != pc);
1274   int am = x.am_;
1275   if (!x.rm_.is_valid()) {
1276     // Immediate offset.
1277     int offset_12 = x.offset_;
1278     if (offset_12 < 0) {
1279       offset_12 = -offset_12;
1280       am ^= U;
1281     }
1282     if (!is_uint12(offset_12)) {
1283       // Immediate offset cannot be encoded, load it first to a scratch
1284       // register.
1285       UseScratchRegisterScope temps(this);
1286       // Allow re-using rd for load instructions if possible.
1287       bool is_load = (instr & L) == L;
1288       Register scratch = (is_load && rd != x.rn_ && rd != pc && rd != sp)
1289                              ? rd
1290                              : temps.Acquire();
1291       mov(scratch, Operand(x.offset_), LeaveCC,
1292           Instruction::ConditionField(instr));
1293       AddrMode2(instr, rd, MemOperand(x.rn_, scratch, x.am_));
1294       return;
1295     }
1296     DCHECK_GE(offset_12, 0);  // no masking needed
1297     instr |= offset_12;
1298   } else {
1299     // Register offset (shift_imm_ and shift_op_ are 0) or scaled
1300     // register offset the constructors make sure than both shift_imm_
1301     // and shift_op_ are initialized.
1302     DCHECK(x.rm_ != pc);
1303     instr |= B25 | x.shift_imm_ * B7 | x.shift_op_ | x.rm_.code();
1304   }
1305   DCHECK((am & (P | W)) == P || x.rn_ != pc);  // no pc base with writeback
1306   emit(instr | am | x.rn_.code() * B16 | rd.code() * B12);
1307 }
1308 
AddrMode3(Instr instr,Register rd,const MemOperand & x)1309 void Assembler::AddrMode3(Instr instr, Register rd, const MemOperand& x) {
1310   DCHECK((instr & ~(kCondMask | L | S6 | H)) == (B4 | B7));
1311   DCHECK(x.rn_.is_valid());
1312   // This method does not handle pc-relative addresses. ldr_pcrel() should be
1313   // used instead.
1314   DCHECK(x.rn_ != pc);
1315   int am = x.am_;
1316   bool is_load = (instr & L) == L;
1317   if (!x.rm_.is_valid()) {
1318     // Immediate offset.
1319     int offset_8 = x.offset_;
1320     if (offset_8 < 0) {
1321       offset_8 = -offset_8;
1322       am ^= U;
1323     }
1324     if (!is_uint8(offset_8)) {
1325       // Immediate offset cannot be encoded, load it first to a scratch
1326       // register.
1327       UseScratchRegisterScope temps(this);
1328       // Allow re-using rd for load instructions if possible.
1329       Register scratch = (is_load && rd != x.rn_ && rd != pc && rd != sp)
1330                              ? rd
1331                              : temps.Acquire();
1332       mov(scratch, Operand(x.offset_), LeaveCC,
1333           Instruction::ConditionField(instr));
1334       AddrMode3(instr, rd, MemOperand(x.rn_, scratch, x.am_));
1335       return;
1336     }
1337     DCHECK_GE(offset_8, 0);  // no masking needed
1338     instr |= B | (offset_8 >> 4) * B8 | (offset_8 & 0xF);
1339   } else if (x.shift_imm_ != 0) {
1340     // Scaled register offsets are not supported, compute the offset separately
1341     // to a scratch register.
1342     UseScratchRegisterScope temps(this);
1343     // Allow re-using rd for load instructions if possible.
1344     Register scratch =
1345         (is_load && rd != x.rn_ && rd != pc && rd != sp) ? rd : temps.Acquire();
1346     mov(scratch, Operand(x.rm_, x.shift_op_, x.shift_imm_), LeaveCC,
1347         Instruction::ConditionField(instr));
1348     AddrMode3(instr, rd, MemOperand(x.rn_, scratch, x.am_));
1349     return;
1350   } else {
1351     // Register offset.
1352     DCHECK((am & (P | W)) == P || x.rm_ != pc);  // no pc index with writeback
1353     instr |= x.rm_.code();
1354   }
1355   DCHECK((am & (P | W)) == P || x.rn_ != pc);  // no pc base with writeback
1356   emit(instr | am | x.rn_.code() * B16 | rd.code() * B12);
1357 }
1358 
AddrMode4(Instr instr,Register rn,RegList rl)1359 void Assembler::AddrMode4(Instr instr, Register rn, RegList rl) {
1360   DCHECK((instr & ~(kCondMask | P | U | W | L)) == B27);
1361   DCHECK_NE(rl, 0);
1362   DCHECK(rn != pc);
1363   emit(instr | rn.code() * B16 | rl);
1364 }
1365 
AddrMode5(Instr instr,CRegister crd,const MemOperand & x)1366 void Assembler::AddrMode5(Instr instr, CRegister crd, const MemOperand& x) {
1367   // Unindexed addressing is not encoded by this function.
1368   DCHECK_EQ((B27 | B26),
1369             (instr & ~(kCondMask | kCoprocessorMask | P | U | N | W | L)));
1370   DCHECK(x.rn_.is_valid() && !x.rm_.is_valid());
1371   int am = x.am_;
1372   int offset_8 = x.offset_;
1373   DCHECK_EQ(offset_8 & 3, 0);  // offset must be an aligned word offset
1374   offset_8 >>= 2;
1375   if (offset_8 < 0) {
1376     offset_8 = -offset_8;
1377     am ^= U;
1378   }
1379   DCHECK(is_uint8(offset_8));  // unsigned word offset must fit in a byte
1380   DCHECK((am & (P | W)) == P || x.rn_ != pc);  // no pc base with writeback
1381 
1382   // Post-indexed addressing requires W == 1; different than in AddrMode2/3.
1383   if ((am & P) == 0) am |= W;
1384 
1385   DCHECK_GE(offset_8, 0);  // no masking needed
1386   emit(instr | am | x.rn_.code() * B16 | crd.code() * B12 | offset_8);
1387 }
1388 
branch_offset(Label * L)1389 int Assembler::branch_offset(Label* L) {
1390   int target_pos;
1391   if (L->is_bound()) {
1392     target_pos = L->pos();
1393   } else {
1394     if (L->is_linked()) {
1395       // Point to previous instruction that uses the link.
1396       target_pos = L->pos();
1397     } else {
1398       // First entry of the link chain points to itself.
1399       target_pos = pc_offset();
1400     }
1401     L->link_to(pc_offset());
1402   }
1403 
1404   // Block the emission of the constant pool, since the branch instruction must
1405   // be emitted at the pc offset recorded by the label.
1406   if (!is_const_pool_blocked()) BlockConstPoolFor(1);
1407 
1408   return target_pos - (pc_offset() + Instruction::kPcLoadDelta);
1409 }
1410 
1411 // Branch instructions.
b(int branch_offset,Condition cond,RelocInfo::Mode rmode)1412 void Assembler::b(int branch_offset, Condition cond, RelocInfo::Mode rmode) {
1413   if (!RelocInfo::IsNone(rmode)) RecordRelocInfo(rmode);
1414   DCHECK_EQ(branch_offset & 3, 0);
1415   int imm24 = branch_offset >> 2;
1416   const bool b_imm_check = is_int24(imm24);
1417   CHECK(b_imm_check);
1418   emit(cond | B27 | B25 | (imm24 & kImm24Mask));
1419 
1420   if (cond == al) {
1421     // Dead code is a good location to emit the constant pool.
1422     CheckConstPool(false, false);
1423   }
1424 }
1425 
bl(int branch_offset,Condition cond,RelocInfo::Mode rmode)1426 void Assembler::bl(int branch_offset, Condition cond, RelocInfo::Mode rmode) {
1427   if (!RelocInfo::IsNone(rmode)) RecordRelocInfo(rmode);
1428   DCHECK_EQ(branch_offset & 3, 0);
1429   int imm24 = branch_offset >> 2;
1430   const bool bl_imm_check = is_int24(imm24);
1431   CHECK(bl_imm_check);
1432   emit(cond | B27 | B25 | B24 | (imm24 & kImm24Mask));
1433 }
1434 
blx(int branch_offset)1435 void Assembler::blx(int branch_offset) {
1436   DCHECK_EQ(branch_offset & 1, 0);
1437   int h = ((branch_offset & 2) >> 1) * B24;
1438   int imm24 = branch_offset >> 2;
1439   const bool blx_imm_check = is_int24(imm24);
1440   CHECK(blx_imm_check);
1441   emit(kSpecialCondition | B27 | B25 | h | (imm24 & kImm24Mask));
1442 }
1443 
blx(Register target,Condition cond)1444 void Assembler::blx(Register target, Condition cond) {
1445   DCHECK(target != pc);
1446   emit(cond | B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | BLX | target.code());
1447 }
1448 
bx(Register target,Condition cond)1449 void Assembler::bx(Register target, Condition cond) {
1450   DCHECK(target != pc);  // use of pc is actually allowed, but discouraged
1451   emit(cond | B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | BX | target.code());
1452 }
1453 
b(Label * L,Condition cond)1454 void Assembler::b(Label* L, Condition cond) {
1455   CheckBuffer();
1456   b(branch_offset(L), cond);
1457 }
1458 
bl(Label * L,Condition cond)1459 void Assembler::bl(Label* L, Condition cond) {
1460   CheckBuffer();
1461   bl(branch_offset(L), cond);
1462 }
1463 
blx(Label * L)1464 void Assembler::blx(Label* L) {
1465   CheckBuffer();
1466   blx(branch_offset(L));
1467 }
1468 
1469 // Data-processing instructions.
1470 
and_(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1471 void Assembler::and_(Register dst, Register src1, const Operand& src2, SBit s,
1472                      Condition cond) {
1473   AddrMode1(cond | AND | s, dst, src1, src2);
1474 }
1475 
and_(Register dst,Register src1,Register src2,SBit s,Condition cond)1476 void Assembler::and_(Register dst, Register src1, Register src2, SBit s,
1477                      Condition cond) {
1478   and_(dst, src1, Operand(src2), s, cond);
1479 }
1480 
eor(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1481 void Assembler::eor(Register dst, Register src1, const Operand& src2, SBit s,
1482                     Condition cond) {
1483   AddrMode1(cond | EOR | s, dst, src1, src2);
1484 }
1485 
eor(Register dst,Register src1,Register src2,SBit s,Condition cond)1486 void Assembler::eor(Register dst, Register src1, Register src2, SBit s,
1487                     Condition cond) {
1488   AddrMode1(cond | EOR | s, dst, src1, Operand(src2));
1489 }
1490 
sub(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1491 void Assembler::sub(Register dst, Register src1, const Operand& src2, SBit s,
1492                     Condition cond) {
1493   AddrMode1(cond | SUB | s, dst, src1, src2);
1494 }
1495 
sub(Register dst,Register src1,Register src2,SBit s,Condition cond)1496 void Assembler::sub(Register dst, Register src1, Register src2, SBit s,
1497                     Condition cond) {
1498   sub(dst, src1, Operand(src2), s, cond);
1499 }
1500 
rsb(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1501 void Assembler::rsb(Register dst, Register src1, const Operand& src2, SBit s,
1502                     Condition cond) {
1503   AddrMode1(cond | RSB | s, dst, src1, src2);
1504 }
1505 
add(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1506 void Assembler::add(Register dst, Register src1, const Operand& src2, SBit s,
1507                     Condition cond) {
1508   AddrMode1(cond | ADD | s, dst, src1, src2);
1509 }
1510 
add(Register dst,Register src1,Register src2,SBit s,Condition cond)1511 void Assembler::add(Register dst, Register src1, Register src2, SBit s,
1512                     Condition cond) {
1513   add(dst, src1, Operand(src2), s, cond);
1514 }
1515 
adc(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1516 void Assembler::adc(Register dst, Register src1, const Operand& src2, SBit s,
1517                     Condition cond) {
1518   AddrMode1(cond | ADC | s, dst, src1, src2);
1519 }
1520 
sbc(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1521 void Assembler::sbc(Register dst, Register src1, const Operand& src2, SBit s,
1522                     Condition cond) {
1523   AddrMode1(cond | SBC | s, dst, src1, src2);
1524 }
1525 
rsc(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1526 void Assembler::rsc(Register dst, Register src1, const Operand& src2, SBit s,
1527                     Condition cond) {
1528   AddrMode1(cond | RSC | s, dst, src1, src2);
1529 }
1530 
tst(Register src1,const Operand & src2,Condition cond)1531 void Assembler::tst(Register src1, const Operand& src2, Condition cond) {
1532   AddrMode1(cond | TST | S, no_reg, src1, src2);
1533 }
1534 
tst(Register src1,Register src2,Condition cond)1535 void Assembler::tst(Register src1, Register src2, Condition cond) {
1536   tst(src1, Operand(src2), cond);
1537 }
1538 
teq(Register src1,const Operand & src2,Condition cond)1539 void Assembler::teq(Register src1, const Operand& src2, Condition cond) {
1540   AddrMode1(cond | TEQ | S, no_reg, src1, src2);
1541 }
1542 
cmp(Register src1,const Operand & src2,Condition cond)1543 void Assembler::cmp(Register src1, const Operand& src2, Condition cond) {
1544   AddrMode1(cond | CMP | S, no_reg, src1, src2);
1545 }
1546 
cmp(Register src1,Register src2,Condition cond)1547 void Assembler::cmp(Register src1, Register src2, Condition cond) {
1548   cmp(src1, Operand(src2), cond);
1549 }
1550 
cmp_raw_immediate(Register src,int raw_immediate,Condition cond)1551 void Assembler::cmp_raw_immediate(Register src, int raw_immediate,
1552                                   Condition cond) {
1553   DCHECK(is_uint12(raw_immediate));
1554   emit(cond | I | CMP | S | src.code() << 16 | raw_immediate);
1555 }
1556 
cmn(Register src1,const Operand & src2,Condition cond)1557 void Assembler::cmn(Register src1, const Operand& src2, Condition cond) {
1558   AddrMode1(cond | CMN | S, no_reg, src1, src2);
1559 }
1560 
orr(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1561 void Assembler::orr(Register dst, Register src1, const Operand& src2, SBit s,
1562                     Condition cond) {
1563   AddrMode1(cond | ORR | s, dst, src1, src2);
1564 }
1565 
orr(Register dst,Register src1,Register src2,SBit s,Condition cond)1566 void Assembler::orr(Register dst, Register src1, Register src2, SBit s,
1567                     Condition cond) {
1568   orr(dst, src1, Operand(src2), s, cond);
1569 }
1570 
mov(Register dst,const Operand & src,SBit s,Condition cond)1571 void Assembler::mov(Register dst, const Operand& src, SBit s, Condition cond) {
1572   // Don't allow nop instructions in the form mov rn, rn to be generated using
1573   // the mov instruction. They must be generated using nop(int/NopMarkerTypes).
1574   DCHECK(!(src.IsRegister() && src.rm() == dst && s == LeaveCC && cond == al));
1575   AddrMode1(cond | MOV | s, dst, no_reg, src);
1576 }
1577 
mov(Register dst,Register src,SBit s,Condition cond)1578 void Assembler::mov(Register dst, Register src, SBit s, Condition cond) {
1579   mov(dst, Operand(src), s, cond);
1580 }
1581 
mov_label_offset(Register dst,Label * label)1582 void Assembler::mov_label_offset(Register dst, Label* label) {
1583   if (label->is_bound()) {
1584     mov(dst, Operand(label->pos() + (Code::kHeaderSize - kHeapObjectTag)));
1585   } else {
1586     // Emit the link to the label in the code stream followed by extra nop
1587     // instructions.
1588     // If the label is not linked, then start a new link chain by linking it to
1589     // itself, emitting pc_offset().
1590     int link = label->is_linked() ? label->pos() : pc_offset();
1591     label->link_to(pc_offset());
1592 
1593     // When the label is bound, these instructions will be patched with a
1594     // sequence of movw/movt or mov/orr/orr instructions. They will load the
1595     // destination register with the position of the label from the beginning
1596     // of the code.
1597     //
1598     // The link will be extracted from the first instruction and the destination
1599     // register from the second.
1600     //   For ARMv7:
1601     //      link
1602     //      mov dst, dst
1603     //   For ARMv6:
1604     //      link
1605     //      mov dst, dst
1606     //      mov dst, dst
1607     //
1608     // When the label gets bound: target_at extracts the link and target_at_put
1609     // patches the instructions.
1610     CHECK(is_uint24(link));
1611     BlockConstPoolScope block_const_pool(this);
1612     emit(link);
1613     nop(dst.code());
1614     if (!CpuFeatures::IsSupported(ARMv7)) {
1615       nop(dst.code());
1616     }
1617   }
1618 }
1619 
movw(Register reg,uint32_t immediate,Condition cond)1620 void Assembler::movw(Register reg, uint32_t immediate, Condition cond) {
1621   DCHECK(IsEnabled(ARMv7));
1622   emit(cond | 0x30 * B20 | reg.code() * B12 | EncodeMovwImmediate(immediate));
1623 }
1624 
movt(Register reg,uint32_t immediate,Condition cond)1625 void Assembler::movt(Register reg, uint32_t immediate, Condition cond) {
1626   DCHECK(IsEnabled(ARMv7));
1627   emit(cond | 0x34 * B20 | reg.code() * B12 | EncodeMovwImmediate(immediate));
1628 }
1629 
bic(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1630 void Assembler::bic(Register dst, Register src1, const Operand& src2, SBit s,
1631                     Condition cond) {
1632   AddrMode1(cond | BIC | s, dst, src1, src2);
1633 }
1634 
mvn(Register dst,const Operand & src,SBit s,Condition cond)1635 void Assembler::mvn(Register dst, const Operand& src, SBit s, Condition cond) {
1636   AddrMode1(cond | MVN | s, dst, no_reg, src);
1637 }
1638 
asr(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1639 void Assembler::asr(Register dst, Register src1, const Operand& src2, SBit s,
1640                     Condition cond) {
1641   if (src2.IsRegister()) {
1642     mov(dst, Operand(src1, ASR, src2.rm()), s, cond);
1643   } else {
1644     mov(dst, Operand(src1, ASR, src2.immediate()), s, cond);
1645   }
1646 }
1647 
lsl(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1648 void Assembler::lsl(Register dst, Register src1, const Operand& src2, SBit s,
1649                     Condition cond) {
1650   if (src2.IsRegister()) {
1651     mov(dst, Operand(src1, LSL, src2.rm()), s, cond);
1652   } else {
1653     mov(dst, Operand(src1, LSL, src2.immediate()), s, cond);
1654   }
1655 }
1656 
lsr(Register dst,Register src1,const Operand & src2,SBit s,Condition cond)1657 void Assembler::lsr(Register dst, Register src1, const Operand& src2, SBit s,
1658                     Condition cond) {
1659   if (src2.IsRegister()) {
1660     mov(dst, Operand(src1, LSR, src2.rm()), s, cond);
1661   } else {
1662     mov(dst, Operand(src1, LSR, src2.immediate()), s, cond);
1663   }
1664 }
1665 
1666 // Multiply instructions.
mla(Register dst,Register src1,Register src2,Register srcA,SBit s,Condition cond)1667 void Assembler::mla(Register dst, Register src1, Register src2, Register srcA,
1668                     SBit s, Condition cond) {
1669   DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc);
1670   emit(cond | A | s | dst.code() * B16 | srcA.code() * B12 | src2.code() * B8 |
1671        B7 | B4 | src1.code());
1672 }
1673 
mls(Register dst,Register src1,Register src2,Register srcA,Condition cond)1674 void Assembler::mls(Register dst, Register src1, Register src2, Register srcA,
1675                     Condition cond) {
1676   DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc);
1677   DCHECK(IsEnabled(ARMv7));
1678   emit(cond | B22 | B21 | dst.code() * B16 | srcA.code() * B12 |
1679        src2.code() * B8 | B7 | B4 | src1.code());
1680 }
1681 
sdiv(Register dst,Register src1,Register src2,Condition cond)1682 void Assembler::sdiv(Register dst, Register src1, Register src2,
1683                      Condition cond) {
1684   DCHECK(dst != pc && src1 != pc && src2 != pc);
1685   DCHECK(IsEnabled(SUDIV));
1686   emit(cond | B26 | B25 | B24 | B20 | dst.code() * B16 | 0xF * B12 |
1687        src2.code() * B8 | B4 | src1.code());
1688 }
1689 
udiv(Register dst,Register src1,Register src2,Condition cond)1690 void Assembler::udiv(Register dst, Register src1, Register src2,
1691                      Condition cond) {
1692   DCHECK(dst != pc && src1 != pc && src2 != pc);
1693   DCHECK(IsEnabled(SUDIV));
1694   emit(cond | B26 | B25 | B24 | B21 | B20 | dst.code() * B16 | 0xF * B12 |
1695        src2.code() * B8 | B4 | src1.code());
1696 }
1697 
mul(Register dst,Register src1,Register src2,SBit s,Condition cond)1698 void Assembler::mul(Register dst, Register src1, Register src2, SBit s,
1699                     Condition cond) {
1700   DCHECK(dst != pc && src1 != pc && src2 != pc);
1701   // dst goes in bits 16-19 for this instruction!
1702   emit(cond | s | dst.code() * B16 | src2.code() * B8 | B7 | B4 | src1.code());
1703 }
1704 
smmla(Register dst,Register src1,Register src2,Register srcA,Condition cond)1705 void Assembler::smmla(Register dst, Register src1, Register src2, Register srcA,
1706                       Condition cond) {
1707   DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc);
1708   emit(cond | B26 | B25 | B24 | B22 | B20 | dst.code() * B16 |
1709        srcA.code() * B12 | src2.code() * B8 | B4 | src1.code());
1710 }
1711 
smmul(Register dst,Register src1,Register src2,Condition cond)1712 void Assembler::smmul(Register dst, Register src1, Register src2,
1713                       Condition cond) {
1714   DCHECK(dst != pc && src1 != pc && src2 != pc);
1715   emit(cond | B26 | B25 | B24 | B22 | B20 | dst.code() * B16 | 0xF * B12 |
1716        src2.code() * B8 | B4 | src1.code());
1717 }
1718 
smlal(Register dstL,Register dstH,Register src1,Register src2,SBit s,Condition cond)1719 void Assembler::smlal(Register dstL, Register dstH, Register src1,
1720                       Register src2, SBit s, Condition cond) {
1721   DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
1722   DCHECK(dstL != dstH);
1723   emit(cond | B23 | B22 | A | s | dstH.code() * B16 | dstL.code() * B12 |
1724        src2.code() * B8 | B7 | B4 | src1.code());
1725 }
1726 
smull(Register dstL,Register dstH,Register src1,Register src2,SBit s,Condition cond)1727 void Assembler::smull(Register dstL, Register dstH, Register src1,
1728                       Register src2, SBit s, Condition cond) {
1729   DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
1730   DCHECK(dstL != dstH);
1731   emit(cond | B23 | B22 | s | dstH.code() * B16 | dstL.code() * B12 |
1732        src2.code() * B8 | B7 | B4 | src1.code());
1733 }
1734 
umlal(Register dstL,Register dstH,Register src1,Register src2,SBit s,Condition cond)1735 void Assembler::umlal(Register dstL, Register dstH, Register src1,
1736                       Register src2, SBit s, Condition cond) {
1737   DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
1738   DCHECK(dstL != dstH);
1739   emit(cond | B23 | A | s | dstH.code() * B16 | dstL.code() * B12 |
1740        src2.code() * B8 | B7 | B4 | src1.code());
1741 }
1742 
umull(Register dstL,Register dstH,Register src1,Register src2,SBit s,Condition cond)1743 void Assembler::umull(Register dstL, Register dstH, Register src1,
1744                       Register src2, SBit s, Condition cond) {
1745   DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
1746   DCHECK(dstL != dstH);
1747   emit(cond | B23 | s | dstH.code() * B16 | dstL.code() * B12 |
1748        src2.code() * B8 | B7 | B4 | src1.code());
1749 }
1750 
1751 // Miscellaneous arithmetic instructions.
clz(Register dst,Register src,Condition cond)1752 void Assembler::clz(Register dst, Register src, Condition cond) {
1753   DCHECK(dst != pc && src != pc);
1754   emit(cond | B24 | B22 | B21 | 15 * B16 | dst.code() * B12 | 15 * B8 | CLZ |
1755        src.code());
1756 }
1757 
1758 // Saturating instructions.
1759 
1760 // Unsigned saturate.
usat(Register dst,int satpos,const Operand & src,Condition cond)1761 void Assembler::usat(Register dst, int satpos, const Operand& src,
1762                      Condition cond) {
1763   DCHECK(dst != pc && src.rm_ != pc);
1764   DCHECK((satpos >= 0) && (satpos <= 31));
1765   DCHECK(src.IsImmediateShiftedRegister());
1766   DCHECK((src.shift_op_ == ASR) || (src.shift_op_ == LSL));
1767 
1768   int sh = 0;
1769   if (src.shift_op_ == ASR) {
1770     sh = 1;
1771   }
1772 
1773   emit(cond | 0x6 * B24 | 0xE * B20 | satpos * B16 | dst.code() * B12 |
1774        src.shift_imm_ * B7 | sh * B6 | 0x1 * B4 | src.rm_.code());
1775 }
1776 
1777 // Bitfield manipulation instructions.
1778 
1779 // Unsigned bit field extract.
1780 // Extracts #width adjacent bits from position #lsb in a register, and
1781 // writes them to the low bits of a destination register.
1782 //   ubfx dst, src, #lsb, #width
ubfx(Register dst,Register src,int lsb,int width,Condition cond)1783 void Assembler::ubfx(Register dst, Register src, int lsb, int width,
1784                      Condition cond) {
1785   DCHECK(IsEnabled(ARMv7));
1786   DCHECK(dst != pc && src != pc);
1787   DCHECK((lsb >= 0) && (lsb <= 31));
1788   DCHECK((width >= 1) && (width <= (32 - lsb)));
1789   emit(cond | 0xF * B23 | B22 | B21 | (width - 1) * B16 | dst.code() * B12 |
1790        lsb * B7 | B6 | B4 | src.code());
1791 }
1792 
1793 // Signed bit field extract.
1794 // Extracts #width adjacent bits from position #lsb in a register, and
1795 // writes them to the low bits of a destination register. The extracted
1796 // value is sign extended to fill the destination register.
1797 //   sbfx dst, src, #lsb, #width
sbfx(Register dst,Register src,int lsb,int width,Condition cond)1798 void Assembler::sbfx(Register dst, Register src, int lsb, int width,
1799                      Condition cond) {
1800   DCHECK(IsEnabled(ARMv7));
1801   DCHECK(dst != pc && src != pc);
1802   DCHECK((lsb >= 0) && (lsb <= 31));
1803   DCHECK((width >= 1) && (width <= (32 - lsb)));
1804   emit(cond | 0xF * B23 | B21 | (width - 1) * B16 | dst.code() * B12 |
1805        lsb * B7 | B6 | B4 | src.code());
1806 }
1807 
1808 // Bit field clear.
1809 // Sets #width adjacent bits at position #lsb in the destination register
1810 // to zero, preserving the value of the other bits.
1811 //   bfc dst, #lsb, #width
bfc(Register dst,int lsb,int width,Condition cond)1812 void Assembler::bfc(Register dst, int lsb, int width, Condition cond) {
1813   DCHECK(IsEnabled(ARMv7));
1814   DCHECK(dst != pc);
1815   DCHECK((lsb >= 0) && (lsb <= 31));
1816   DCHECK((width >= 1) && (width <= (32 - lsb)));
1817   int msb = lsb + width - 1;
1818   emit(cond | 0x1F * B22 | msb * B16 | dst.code() * B12 | lsb * B7 | B4 | 0xF);
1819 }
1820 
1821 // Bit field insert.
1822 // Inserts #width adjacent bits from the low bits of the source register
1823 // into position #lsb of the destination register.
1824 //   bfi dst, src, #lsb, #width
bfi(Register dst,Register src,int lsb,int width,Condition cond)1825 void Assembler::bfi(Register dst, Register src, int lsb, int width,
1826                     Condition cond) {
1827   DCHECK(IsEnabled(ARMv7));
1828   DCHECK(dst != pc && src != pc);
1829   DCHECK((lsb >= 0) && (lsb <= 31));
1830   DCHECK((width >= 1) && (width <= (32 - lsb)));
1831   int msb = lsb + width - 1;
1832   emit(cond | 0x1F * B22 | msb * B16 | dst.code() * B12 | lsb * B7 | B4 |
1833        src.code());
1834 }
1835 
pkhbt(Register dst,Register src1,const Operand & src2,Condition cond)1836 void Assembler::pkhbt(Register dst, Register src1, const Operand& src2,
1837                       Condition cond) {
1838   // Instruction details available in ARM DDI 0406C.b, A8.8.125.
1839   // cond(31-28) | 01101000(27-20) | Rn(19-16) |
1840   // Rd(15-12) | imm5(11-7) | 0(6) | 01(5-4) | Rm(3-0)
1841   DCHECK(dst != pc);
1842   DCHECK(src1 != pc);
1843   DCHECK(src2.IsImmediateShiftedRegister());
1844   DCHECK(src2.rm() != pc);
1845   DCHECK((src2.shift_imm_ >= 0) && (src2.shift_imm_ <= 31));
1846   DCHECK(src2.shift_op() == LSL);
1847   emit(cond | 0x68 * B20 | src1.code() * B16 | dst.code() * B12 |
1848        src2.shift_imm_ * B7 | B4 | src2.rm().code());
1849 }
1850 
pkhtb(Register dst,Register src1,const Operand & src2,Condition cond)1851 void Assembler::pkhtb(Register dst, Register src1, const Operand& src2,
1852                       Condition cond) {
1853   // Instruction details available in ARM DDI 0406C.b, A8.8.125.
1854   // cond(31-28) | 01101000(27-20) | Rn(19-16) |
1855   // Rd(15-12) | imm5(11-7) | 1(6) | 01(5-4) | Rm(3-0)
1856   DCHECK(dst != pc);
1857   DCHECK(src1 != pc);
1858   DCHECK(src2.IsImmediateShiftedRegister());
1859   DCHECK(src2.rm() != pc);
1860   DCHECK((src2.shift_imm_ >= 1) && (src2.shift_imm_ <= 32));
1861   DCHECK(src2.shift_op() == ASR);
1862   int asr = (src2.shift_imm_ == 32) ? 0 : src2.shift_imm_;
1863   emit(cond | 0x68 * B20 | src1.code() * B16 | dst.code() * B12 | asr * B7 |
1864        B6 | B4 | src2.rm().code());
1865 }
1866 
sxtb(Register dst,Register src,int rotate,Condition cond)1867 void Assembler::sxtb(Register dst, Register src, int rotate, Condition cond) {
1868   // Instruction details available in ARM DDI 0406C.b, A8.8.233.
1869   // cond(31-28) | 01101010(27-20) | 1111(19-16) |
1870   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1871   DCHECK(dst != pc);
1872   DCHECK(src != pc);
1873   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1874   emit(cond | 0x6A * B20 | 0xF * B16 | dst.code() * B12 |
1875        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
1876 }
1877 
sxtab(Register dst,Register src1,Register src2,int rotate,Condition cond)1878 void Assembler::sxtab(Register dst, Register src1, Register src2, int rotate,
1879                       Condition cond) {
1880   // Instruction details available in ARM DDI 0406C.b, A8.8.233.
1881   // cond(31-28) | 01101010(27-20) | Rn(19-16) |
1882   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1883   DCHECK(dst != pc);
1884   DCHECK(src1 != pc);
1885   DCHECK(src2 != pc);
1886   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1887   emit(cond | 0x6A * B20 | src1.code() * B16 | dst.code() * B12 |
1888        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
1889 }
1890 
sxth(Register dst,Register src,int rotate,Condition cond)1891 void Assembler::sxth(Register dst, Register src, int rotate, Condition cond) {
1892   // Instruction details available in ARM DDI 0406C.b, A8.8.235.
1893   // cond(31-28) | 01101011(27-20) | 1111(19-16) |
1894   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1895   DCHECK(dst != pc);
1896   DCHECK(src != pc);
1897   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1898   emit(cond | 0x6B * B20 | 0xF * B16 | dst.code() * B12 |
1899        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
1900 }
1901 
sxtah(Register dst,Register src1,Register src2,int rotate,Condition cond)1902 void Assembler::sxtah(Register dst, Register src1, Register src2, int rotate,
1903                       Condition cond) {
1904   // Instruction details available in ARM DDI 0406C.b, A8.8.235.
1905   // cond(31-28) | 01101011(27-20) | Rn(19-16) |
1906   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1907   DCHECK(dst != pc);
1908   DCHECK(src1 != pc);
1909   DCHECK(src2 != pc);
1910   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1911   emit(cond | 0x6B * B20 | src1.code() * B16 | dst.code() * B12 |
1912        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
1913 }
1914 
uxtb(Register dst,Register src,int rotate,Condition cond)1915 void Assembler::uxtb(Register dst, Register src, int rotate, Condition cond) {
1916   // Instruction details available in ARM DDI 0406C.b, A8.8.274.
1917   // cond(31-28) | 01101110(27-20) | 1111(19-16) |
1918   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1919   DCHECK(dst != pc);
1920   DCHECK(src != pc);
1921   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1922   emit(cond | 0x6E * B20 | 0xF * B16 | dst.code() * B12 |
1923        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
1924 }
1925 
uxtab(Register dst,Register src1,Register src2,int rotate,Condition cond)1926 void Assembler::uxtab(Register dst, Register src1, Register src2, int rotate,
1927                       Condition cond) {
1928   // Instruction details available in ARM DDI 0406C.b, A8.8.271.
1929   // cond(31-28) | 01101110(27-20) | Rn(19-16) |
1930   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1931   DCHECK(dst != pc);
1932   DCHECK(src1 != pc);
1933   DCHECK(src2 != pc);
1934   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1935   emit(cond | 0x6E * B20 | src1.code() * B16 | dst.code() * B12 |
1936        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
1937 }
1938 
uxtb16(Register dst,Register src,int rotate,Condition cond)1939 void Assembler::uxtb16(Register dst, Register src, int rotate, Condition cond) {
1940   // Instruction details available in ARM DDI 0406C.b, A8.8.275.
1941   // cond(31-28) | 01101100(27-20) | 1111(19-16) |
1942   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1943   DCHECK(dst != pc);
1944   DCHECK(src != pc);
1945   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1946   emit(cond | 0x6C * B20 | 0xF * B16 | dst.code() * B12 |
1947        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
1948 }
1949 
uxth(Register dst,Register src,int rotate,Condition cond)1950 void Assembler::uxth(Register dst, Register src, int rotate, Condition cond) {
1951   // Instruction details available in ARM DDI 0406C.b, A8.8.276.
1952   // cond(31-28) | 01101111(27-20) | 1111(19-16) |
1953   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1954   DCHECK(dst != pc);
1955   DCHECK(src != pc);
1956   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1957   emit(cond | 0x6F * B20 | 0xF * B16 | dst.code() * B12 |
1958        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
1959 }
1960 
uxtah(Register dst,Register src1,Register src2,int rotate,Condition cond)1961 void Assembler::uxtah(Register dst, Register src1, Register src2, int rotate,
1962                       Condition cond) {
1963   // Instruction details available in ARM DDI 0406C.b, A8.8.273.
1964   // cond(31-28) | 01101111(27-20) | Rn(19-16) |
1965   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1966   DCHECK(dst != pc);
1967   DCHECK(src1 != pc);
1968   DCHECK(src2 != pc);
1969   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
1970   emit(cond | 0x6F * B20 | src1.code() * B16 | dst.code() * B12 |
1971        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
1972 }
1973 
rbit(Register dst,Register src,Condition cond)1974 void Assembler::rbit(Register dst, Register src, Condition cond) {
1975   // Instruction details available in ARM DDI 0406C.b, A8.8.144.
1976   // cond(31-28) | 011011111111(27-16) | Rd(15-12) | 11110011(11-4) | Rm(3-0)
1977   DCHECK(IsEnabled(ARMv7));
1978   DCHECK(dst != pc);
1979   DCHECK(src != pc);
1980   emit(cond | 0x6FF * B16 | dst.code() * B12 | 0xF3 * B4 | src.code());
1981 }
1982 
rev(Register dst,Register src,Condition cond)1983 void Assembler::rev(Register dst, Register src, Condition cond) {
1984   // Instruction details available in ARM DDI 0406C.b, A8.8.144.
1985   // cond(31-28) | 011010111111(27-16) | Rd(15-12) | 11110011(11-4) | Rm(3-0)
1986   DCHECK(dst != pc);
1987   DCHECK(src != pc);
1988   emit(cond | 0x6BF * B16 | dst.code() * B12 | 0xF3 * B4 | src.code());
1989 }
1990 
1991 // Status register access instructions.
mrs(Register dst,SRegister s,Condition cond)1992 void Assembler::mrs(Register dst, SRegister s, Condition cond) {
1993   DCHECK(dst != pc);
1994   emit(cond | B24 | s | 15 * B16 | dst.code() * B12);
1995 }
1996 
msr(SRegisterFieldMask fields,const Operand & src,Condition cond)1997 void Assembler::msr(SRegisterFieldMask fields, const Operand& src,
1998                     Condition cond) {
1999   DCHECK_NE(fields & 0x000F0000, 0);  // At least one field must be set.
2000   DCHECK(((fields & 0xFFF0FFFF) == CPSR) || ((fields & 0xFFF0FFFF) == SPSR));
2001   Instr instr;
2002   if (src.IsImmediate()) {
2003     // Immediate.
2004     uint32_t rotate_imm;
2005     uint32_t immed_8;
2006     if (src.MustOutputRelocInfo(this) ||
2007         !FitsShifter(src.immediate(), &rotate_imm, &immed_8, nullptr)) {
2008       UseScratchRegisterScope temps(this);
2009       Register scratch = temps.Acquire();
2010       // Immediate operand cannot be encoded, load it first to a scratch
2011       // register.
2012       Move32BitImmediate(scratch, src);
2013       msr(fields, Operand(scratch), cond);
2014       return;
2015     }
2016     instr = I | rotate_imm * B8 | immed_8;
2017   } else {
2018     DCHECK(src.IsRegister());  // Only rm is allowed.
2019     instr = src.rm_.code();
2020   }
2021   emit(cond | instr | B24 | B21 | fields | 15 * B12);
2022 }
2023 
2024 // Load/Store instructions.
ldr(Register dst,const MemOperand & src,Condition cond)2025 void Assembler::ldr(Register dst, const MemOperand& src, Condition cond) {
2026   AddrMode2(cond | B26 | L, dst, src);
2027 }
2028 
str(Register src,const MemOperand & dst,Condition cond)2029 void Assembler::str(Register src, const MemOperand& dst, Condition cond) {
2030   AddrMode2(cond | B26, src, dst);
2031 }
2032 
ldrb(Register dst,const MemOperand & src,Condition cond)2033 void Assembler::ldrb(Register dst, const MemOperand& src, Condition cond) {
2034   AddrMode2(cond | B26 | B | L, dst, src);
2035 }
2036 
strb(Register src,const MemOperand & dst,Condition cond)2037 void Assembler::strb(Register src, const MemOperand& dst, Condition cond) {
2038   AddrMode2(cond | B26 | B, src, dst);
2039 }
2040 
ldrh(Register dst,const MemOperand & src,Condition cond)2041 void Assembler::ldrh(Register dst, const MemOperand& src, Condition cond) {
2042   AddrMode3(cond | L | B7 | H | B4, dst, src);
2043 }
2044 
strh(Register src,const MemOperand & dst,Condition cond)2045 void Assembler::strh(Register src, const MemOperand& dst, Condition cond) {
2046   AddrMode3(cond | B7 | H | B4, src, dst);
2047 }
2048 
ldrsb(Register dst,const MemOperand & src,Condition cond)2049 void Assembler::ldrsb(Register dst, const MemOperand& src, Condition cond) {
2050   AddrMode3(cond | L | B7 | S6 | B4, dst, src);
2051 }
2052 
ldrsh(Register dst,const MemOperand & src,Condition cond)2053 void Assembler::ldrsh(Register dst, const MemOperand& src, Condition cond) {
2054   AddrMode3(cond | L | B7 | S6 | H | B4, dst, src);
2055 }
2056 
ldrd(Register dst1,Register dst2,const MemOperand & src,Condition cond)2057 void Assembler::ldrd(Register dst1, Register dst2, const MemOperand& src,
2058                      Condition cond) {
2059   DCHECK(src.rm() == no_reg);
2060   DCHECK(dst1 != lr);  // r14.
2061   DCHECK_EQ(0, dst1.code() % 2);
2062   DCHECK_EQ(dst1.code() + 1, dst2.code());
2063   AddrMode3(cond | B7 | B6 | B4, dst1, src);
2064 }
2065 
strd(Register src1,Register src2,const MemOperand & dst,Condition cond)2066 void Assembler::strd(Register src1, Register src2, const MemOperand& dst,
2067                      Condition cond) {
2068   DCHECK(dst.rm() == no_reg);
2069   DCHECK(src1 != lr);  // r14.
2070   DCHECK_EQ(0, src1.code() % 2);
2071   DCHECK_EQ(src1.code() + 1, src2.code());
2072   AddrMode3(cond | B7 | B6 | B5 | B4, src1, dst);
2073 }
2074 
ldr_pcrel(Register dst,int imm12,Condition cond)2075 void Assembler::ldr_pcrel(Register dst, int imm12, Condition cond) {
2076   AddrMode am = Offset;
2077   if (imm12 < 0) {
2078     imm12 = -imm12;
2079     am = NegOffset;
2080   }
2081   DCHECK(is_uint12(imm12));
2082   emit(cond | B26 | am | L | pc.code() * B16 | dst.code() * B12 | imm12);
2083 }
2084 
2085 // Load/Store exclusive instructions.
ldrex(Register dst,Register src,Condition cond)2086 void Assembler::ldrex(Register dst, Register src, Condition cond) {
2087   // Instruction details available in ARM DDI 0406C.b, A8.8.75.
2088   // cond(31-28) | 00011001(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2089   DCHECK(dst != pc);
2090   DCHECK(src != pc);
2091   emit(cond | B24 | B23 | B20 | src.code() * B16 | dst.code() * B12 | 0xF9F);
2092 }
2093 
strex(Register src1,Register src2,Register dst,Condition cond)2094 void Assembler::strex(Register src1, Register src2, Register dst,
2095                       Condition cond) {
2096   // Instruction details available in ARM DDI 0406C.b, A8.8.212.
2097   // cond(31-28) | 00011000(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) |
2098   // Rt(3-0)
2099   DCHECK(dst != pc);
2100   DCHECK(src1 != pc);
2101   DCHECK(src2 != pc);
2102   DCHECK(src1 != dst);
2103   DCHECK(src1 != src2);
2104   emit(cond | B24 | B23 | dst.code() * B16 | src1.code() * B12 | 0xF9 * B4 |
2105        src2.code());
2106 }
2107 
ldrexb(Register dst,Register src,Condition cond)2108 void Assembler::ldrexb(Register dst, Register src, Condition cond) {
2109   // Instruction details available in ARM DDI 0406C.b, A8.8.76.
2110   // cond(31-28) | 00011101(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2111   DCHECK(dst != pc);
2112   DCHECK(src != pc);
2113   emit(cond | B24 | B23 | B22 | B20 | src.code() * B16 | dst.code() * B12 |
2114        0xF9F);
2115 }
2116 
strexb(Register src1,Register src2,Register dst,Condition cond)2117 void Assembler::strexb(Register src1, Register src2, Register dst,
2118                        Condition cond) {
2119   // Instruction details available in ARM DDI 0406C.b, A8.8.213.
2120   // cond(31-28) | 00011100(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) |
2121   // Rt(3-0)
2122   DCHECK(dst != pc);
2123   DCHECK(src1 != pc);
2124   DCHECK(src2 != pc);
2125   DCHECK(src1 != dst);
2126   DCHECK(src1 != src2);
2127   emit(cond | B24 | B23 | B22 | dst.code() * B16 | src1.code() * B12 |
2128        0xF9 * B4 | src2.code());
2129 }
2130 
ldrexh(Register dst,Register src,Condition cond)2131 void Assembler::ldrexh(Register dst, Register src, Condition cond) {
2132   // Instruction details available in ARM DDI 0406C.b, A8.8.78.
2133   // cond(31-28) | 00011111(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2134   DCHECK(dst != pc);
2135   DCHECK(src != pc);
2136   emit(cond | B24 | B23 | B22 | B21 | B20 | src.code() * B16 |
2137        dst.code() * B12 | 0xF9F);
2138 }
2139 
strexh(Register src1,Register src2,Register dst,Condition cond)2140 void Assembler::strexh(Register src1, Register src2, Register dst,
2141                        Condition cond) {
2142   // Instruction details available in ARM DDI 0406C.b, A8.8.215.
2143   // cond(31-28) | 00011110(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) |
2144   // Rt(3-0)
2145   DCHECK(dst != pc);
2146   DCHECK(src1 != pc);
2147   DCHECK(src2 != pc);
2148   DCHECK(src1 != dst);
2149   DCHECK(src1 != src2);
2150   emit(cond | B24 | B23 | B22 | B21 | dst.code() * B16 | src1.code() * B12 |
2151        0xF9 * B4 | src2.code());
2152 }
2153 
ldrexd(Register dst1,Register dst2,Register src,Condition cond)2154 void Assembler::ldrexd(Register dst1, Register dst2, Register src,
2155                        Condition cond) {
2156   // cond(31-28) | 00011011(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2157   DCHECK(dst1 != lr);  // r14.
2158   // The pair of destination registers is restricted to being an even-numbered
2159   // register and the odd-numbered register that immediately follows it.
2160   DCHECK_EQ(0, dst1.code() % 2);
2161   DCHECK_EQ(dst1.code() + 1, dst2.code());
2162   emit(cond | B24 | B23 | B21 | B20 | src.code() * B16 | dst1.code() * B12 |
2163        0xF9F);
2164 }
2165 
strexd(Register res,Register src1,Register src2,Register dst,Condition cond)2166 void Assembler::strexd(Register res, Register src1, Register src2, Register dst,
2167                        Condition cond) {
2168   // cond(31-28) | 00011010(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2169   DCHECK(src1 != lr);  // r14.
2170   // The pair of source registers is restricted to being an even-numbered
2171   // register and the odd-numbered register that immediately follows it.
2172   DCHECK_EQ(0, src1.code() % 2);
2173   DCHECK_EQ(src1.code() + 1, src2.code());
2174   emit(cond | B24 | B23 | B21 | dst.code() * B16 | res.code() * B12 |
2175        0xF9 * B4 | src1.code());
2176 }
2177 
2178 // Preload instructions.
pld(const MemOperand & address)2179 void Assembler::pld(const MemOperand& address) {
2180   // Instruction details available in ARM DDI 0406C.b, A8.8.128.
2181   // 1111(31-28) | 0111(27-24) | U(23) | R(22) | 01(21-20) | Rn(19-16) |
2182   // 1111(15-12) | imm5(11-07) | type(6-5) | 0(4)| Rm(3-0) |
2183   DCHECK(address.rm() == no_reg);
2184   DCHECK(address.am() == Offset);
2185   int U = B23;
2186   int offset = address.offset();
2187   if (offset < 0) {
2188     offset = -offset;
2189     U = 0;
2190   }
2191   DCHECK_LT(offset, 4096);
2192   emit(kSpecialCondition | B26 | B24 | U | B22 | B20 |
2193        address.rn().code() * B16 | 0xF * B12 | offset);
2194 }
2195 
2196 // Load/Store multiple instructions.
ldm(BlockAddrMode am,Register base,RegList dst,Condition cond)2197 void Assembler::ldm(BlockAddrMode am, Register base, RegList dst,
2198                     Condition cond) {
2199   // ABI stack constraint: ldmxx base, {..sp..}  base != sp  is not restartable.
2200   DCHECK(base == sp || (dst & sp.bit()) == 0);
2201 
2202   AddrMode4(cond | B27 | am | L, base, dst);
2203 
2204   // Emit the constant pool after a function return implemented by ldm ..{..pc}.
2205   if (cond == al && (dst & pc.bit()) != 0) {
2206     // There is a slight chance that the ldm instruction was actually a call,
2207     // in which case it would be wrong to return into the constant pool; we
2208     // recognize this case by checking if the emission of the pool was blocked
2209     // at the pc of the ldm instruction by a mov lr, pc instruction; if this is
2210     // the case, we emit a jump over the pool.
2211     CheckConstPool(true, no_const_pool_before_ == pc_offset() - kInstrSize);
2212   }
2213 }
2214 
stm(BlockAddrMode am,Register base,RegList src,Condition cond)2215 void Assembler::stm(BlockAddrMode am, Register base, RegList src,
2216                     Condition cond) {
2217   AddrMode4(cond | B27 | am, base, src);
2218 }
2219 
2220 // Exception-generating instructions and debugging support.
2221 // Stops with a non-negative code less than kNumOfWatchedStops support
2222 // enabling/disabling and a counter feature. See simulator-arm.h .
stop(Condition cond,int32_t code)2223 void Assembler::stop(Condition cond, int32_t code) {
2224 #ifndef __arm__
2225   DCHECK_GE(code, kDefaultStopCode);
2226   {
2227     BlockConstPoolScope block_const_pool(this);
2228     if (code >= 0) {
2229       svc(kStopCode + code, cond);
2230     } else {
2231       svc(kStopCode + kMaxStopCode, cond);
2232     }
2233   }
2234 #else   // def __arm__
2235   if (cond != al) {
2236     Label skip;
2237     b(&skip, NegateCondition(cond));
2238     bkpt(0);
2239     bind(&skip);
2240   } else {
2241     bkpt(0);
2242   }
2243 #endif  // def __arm__
2244 }
2245 
bkpt(uint32_t imm16)2246 void Assembler::bkpt(uint32_t imm16) {
2247   DCHECK(is_uint16(imm16));
2248   emit(al | B24 | B21 | (imm16 >> 4) * B8 | BKPT | (imm16 & 0xF));
2249 }
2250 
svc(uint32_t imm24,Condition cond)2251 void Assembler::svc(uint32_t imm24, Condition cond) {
2252   DCHECK(is_uint24(imm24));
2253   emit(cond | 15 * B24 | imm24);
2254 }
2255 
dmb(BarrierOption option)2256 void Assembler::dmb(BarrierOption option) {
2257   if (CpuFeatures::IsSupported(ARMv7)) {
2258     // Details available in ARM DDI 0406C.b, A8-378.
2259     emit(kSpecialCondition | 0x57FF * B12 | 5 * B4 | option);
2260   } else {
2261     // Details available in ARM DDI 0406C.b, B3-1750.
2262     // CP15DMB: CRn=c7, opc1=0, CRm=c10, opc2=5, Rt is ignored.
2263     mcr(p15, 0, r0, cr7, cr10, 5);
2264   }
2265 }
2266 
dsb(BarrierOption option)2267 void Assembler::dsb(BarrierOption option) {
2268   if (CpuFeatures::IsSupported(ARMv7)) {
2269     // Details available in ARM DDI 0406C.b, A8-380.
2270     emit(kSpecialCondition | 0x57FF * B12 | 4 * B4 | option);
2271   } else {
2272     // Details available in ARM DDI 0406C.b, B3-1750.
2273     // CP15DSB: CRn=c7, opc1=0, CRm=c10, opc2=4, Rt is ignored.
2274     mcr(p15, 0, r0, cr7, cr10, 4);
2275   }
2276 }
2277 
isb(BarrierOption option)2278 void Assembler::isb(BarrierOption option) {
2279   if (CpuFeatures::IsSupported(ARMv7)) {
2280     // Details available in ARM DDI 0406C.b, A8-389.
2281     emit(kSpecialCondition | 0x57FF * B12 | 6 * B4 | option);
2282   } else {
2283     // Details available in ARM DDI 0406C.b, B3-1750.
2284     // CP15ISB: CRn=c7, opc1=0, CRm=c5, opc2=4, Rt is ignored.
2285     mcr(p15, 0, r0, cr7, cr5, 4);
2286   }
2287 }
2288 
csdb()2289 void Assembler::csdb() {
2290   // Details available in Arm Cache Speculation Side-channels white paper,
2291   // version 1.1, page 4.
2292   emit(0xE320F014);
2293 }
2294 
2295 // Coprocessor instructions.
cdp(Coprocessor coproc,int opcode_1,CRegister crd,CRegister crn,CRegister crm,int opcode_2,Condition cond)2296 void Assembler::cdp(Coprocessor coproc, int opcode_1, CRegister crd,
2297                     CRegister crn, CRegister crm, int opcode_2,
2298                     Condition cond) {
2299   DCHECK(is_uint4(opcode_1) && is_uint3(opcode_2));
2300   emit(cond | B27 | B26 | B25 | (opcode_1 & 15) * B20 | crn.code() * B16 |
2301        crd.code() * B12 | coproc * B8 | (opcode_2 & 7) * B5 | crm.code());
2302 }
2303 
cdp2(Coprocessor coproc,int opcode_1,CRegister crd,CRegister crn,CRegister crm,int opcode_2)2304 void Assembler::cdp2(Coprocessor coproc, int opcode_1, CRegister crd,
2305                      CRegister crn, CRegister crm, int opcode_2) {
2306   cdp(coproc, opcode_1, crd, crn, crm, opcode_2, kSpecialCondition);
2307 }
2308 
mcr(Coprocessor coproc,int opcode_1,Register rd,CRegister crn,CRegister crm,int opcode_2,Condition cond)2309 void Assembler::mcr(Coprocessor coproc, int opcode_1, Register rd,
2310                     CRegister crn, CRegister crm, int opcode_2,
2311                     Condition cond) {
2312   DCHECK(is_uint3(opcode_1) && is_uint3(opcode_2));
2313   emit(cond | B27 | B26 | B25 | (opcode_1 & 7) * B21 | crn.code() * B16 |
2314        rd.code() * B12 | coproc * B8 | (opcode_2 & 7) * B5 | B4 | crm.code());
2315 }
2316 
mcr2(Coprocessor coproc,int opcode_1,Register rd,CRegister crn,CRegister crm,int opcode_2)2317 void Assembler::mcr2(Coprocessor coproc, int opcode_1, Register rd,
2318                      CRegister crn, CRegister crm, int opcode_2) {
2319   mcr(coproc, opcode_1, rd, crn, crm, opcode_2, kSpecialCondition);
2320 }
2321 
mrc(Coprocessor coproc,int opcode_1,Register rd,CRegister crn,CRegister crm,int opcode_2,Condition cond)2322 void Assembler::mrc(Coprocessor coproc, int opcode_1, Register rd,
2323                     CRegister crn, CRegister crm, int opcode_2,
2324                     Condition cond) {
2325   DCHECK(is_uint3(opcode_1) && is_uint3(opcode_2));
2326   emit(cond | B27 | B26 | B25 | (opcode_1 & 7) * B21 | L | crn.code() * B16 |
2327        rd.code() * B12 | coproc * B8 | (opcode_2 & 7) * B5 | B4 | crm.code());
2328 }
2329 
mrc2(Coprocessor coproc,int opcode_1,Register rd,CRegister crn,CRegister crm,int opcode_2)2330 void Assembler::mrc2(Coprocessor coproc, int opcode_1, Register rd,
2331                      CRegister crn, CRegister crm, int opcode_2) {
2332   mrc(coproc, opcode_1, rd, crn, crm, opcode_2, kSpecialCondition);
2333 }
2334 
ldc(Coprocessor coproc,CRegister crd,const MemOperand & src,LFlag l,Condition cond)2335 void Assembler::ldc(Coprocessor coproc, CRegister crd, const MemOperand& src,
2336                     LFlag l, Condition cond) {
2337   AddrMode5(cond | B27 | B26 | l | L | coproc * B8, crd, src);
2338 }
2339 
ldc(Coprocessor coproc,CRegister crd,Register rn,int option,LFlag l,Condition cond)2340 void Assembler::ldc(Coprocessor coproc, CRegister crd, Register rn, int option,
2341                     LFlag l, Condition cond) {
2342   // Unindexed addressing.
2343   DCHECK(is_uint8(option));
2344   emit(cond | B27 | B26 | U | l | L | rn.code() * B16 | crd.code() * B12 |
2345        coproc * B8 | (option & 255));
2346 }
2347 
ldc2(Coprocessor coproc,CRegister crd,const MemOperand & src,LFlag l)2348 void Assembler::ldc2(Coprocessor coproc, CRegister crd, const MemOperand& src,
2349                      LFlag l) {
2350   ldc(coproc, crd, src, l, kSpecialCondition);
2351 }
2352 
ldc2(Coprocessor coproc,CRegister crd,Register rn,int option,LFlag l)2353 void Assembler::ldc2(Coprocessor coproc, CRegister crd, Register rn, int option,
2354                      LFlag l) {
2355   ldc(coproc, crd, rn, option, l, kSpecialCondition);
2356 }
2357 
2358 // Support for VFP.
2359 
vldr(const DwVfpRegister dst,const Register base,int offset,const Condition cond)2360 void Assembler::vldr(const DwVfpRegister dst, const Register base, int offset,
2361                      const Condition cond) {
2362   // Ddst = MEM(Rbase + offset).
2363   // Instruction details available in ARM DDI 0406C.b, A8-924.
2364   // cond(31-28) | 1101(27-24)| U(23) | D(22) | 01(21-20) | Rbase(19-16) |
2365   // Vd(15-12) | 1011(11-8) | offset
2366   DCHECK(VfpRegisterIsAvailable(dst));
2367   int u = 1;
2368   if (offset < 0) {
2369     CHECK_NE(offset, kMinInt);
2370     offset = -offset;
2371     u = 0;
2372   }
2373   int vd, d;
2374   dst.split_code(&vd, &d);
2375 
2376   DCHECK_GE(offset, 0);
2377   if ((offset % 4) == 0 && (offset / 4) < 256) {
2378     emit(cond | 0xD * B24 | u * B23 | d * B22 | B20 | base.code() * B16 |
2379          vd * B12 | 0xB * B8 | ((offset / 4) & 255));
2380   } else {
2381     UseScratchRegisterScope temps(this);
2382     Register scratch = temps.Acquire();
2383     // Larger offsets must be handled by computing the correct address in a
2384     // scratch register.
2385     DCHECK(base != scratch);
2386     if (u == 1) {
2387       add(scratch, base, Operand(offset));
2388     } else {
2389       sub(scratch, base, Operand(offset));
2390     }
2391     emit(cond | 0xD * B24 | d * B22 | B20 | scratch.code() * B16 | vd * B12 |
2392          0xB * B8);
2393   }
2394 }
2395 
vldr(const DwVfpRegister dst,const MemOperand & operand,const Condition cond)2396 void Assembler::vldr(const DwVfpRegister dst, const MemOperand& operand,
2397                      const Condition cond) {
2398   DCHECK(VfpRegisterIsAvailable(dst));
2399   DCHECK(operand.am_ == Offset);
2400   if (operand.rm().is_valid()) {
2401     UseScratchRegisterScope temps(this);
2402     Register scratch = temps.Acquire();
2403     add(scratch, operand.rn(),
2404         Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
2405     vldr(dst, scratch, 0, cond);
2406   } else {
2407     vldr(dst, operand.rn(), operand.offset(), cond);
2408   }
2409 }
2410 
vldr(const SwVfpRegister dst,const Register base,int offset,const Condition cond)2411 void Assembler::vldr(const SwVfpRegister dst, const Register base, int offset,
2412                      const Condition cond) {
2413   // Sdst = MEM(Rbase + offset).
2414   // Instruction details available in ARM DDI 0406A, A8-628.
2415   // cond(31-28) | 1101(27-24)| U001(23-20) | Rbase(19-16) |
2416   // Vdst(15-12) | 1010(11-8) | offset
2417   int u = 1;
2418   if (offset < 0) {
2419     offset = -offset;
2420     u = 0;
2421   }
2422   int sd, d;
2423   dst.split_code(&sd, &d);
2424   DCHECK_GE(offset, 0);
2425 
2426   if ((offset % 4) == 0 && (offset / 4) < 256) {
2427     emit(cond | u * B23 | d * B22 | 0xD1 * B20 | base.code() * B16 | sd * B12 |
2428          0xA * B8 | ((offset / 4) & 255));
2429   } else {
2430     // Larger offsets must be handled by computing the correct address in a
2431     // scratch register.
2432     UseScratchRegisterScope temps(this);
2433     Register scratch = temps.Acquire();
2434     DCHECK(base != scratch);
2435     if (u == 1) {
2436       add(scratch, base, Operand(offset));
2437     } else {
2438       sub(scratch, base, Operand(offset));
2439     }
2440     emit(cond | d * B22 | 0xD1 * B20 | scratch.code() * B16 | sd * B12 |
2441          0xA * B8);
2442   }
2443 }
2444 
vldr(const SwVfpRegister dst,const MemOperand & operand,const Condition cond)2445 void Assembler::vldr(const SwVfpRegister dst, const MemOperand& operand,
2446                      const Condition cond) {
2447   DCHECK(operand.am_ == Offset);
2448   if (operand.rm().is_valid()) {
2449     UseScratchRegisterScope temps(this);
2450     Register scratch = temps.Acquire();
2451     add(scratch, operand.rn(),
2452         Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
2453     vldr(dst, scratch, 0, cond);
2454   } else {
2455     vldr(dst, operand.rn(), operand.offset(), cond);
2456   }
2457 }
2458 
vstr(const DwVfpRegister src,const Register base,int offset,const Condition cond)2459 void Assembler::vstr(const DwVfpRegister src, const Register base, int offset,
2460                      const Condition cond) {
2461   // MEM(Rbase + offset) = Dsrc.
2462   // Instruction details available in ARM DDI 0406C.b, A8-1082.
2463   // cond(31-28) | 1101(27-24)| U(23) | D(22) | 00(21-20) | Rbase(19-16) |
2464   // Vd(15-12) | 1011(11-8) | (offset/4)
2465   DCHECK(VfpRegisterIsAvailable(src));
2466   int u = 1;
2467   if (offset < 0) {
2468     CHECK_NE(offset, kMinInt);
2469     offset = -offset;
2470     u = 0;
2471   }
2472   DCHECK_GE(offset, 0);
2473   int vd, d;
2474   src.split_code(&vd, &d);
2475 
2476   if ((offset % 4) == 0 && (offset / 4) < 256) {
2477     emit(cond | 0xD * B24 | u * B23 | d * B22 | base.code() * B16 | vd * B12 |
2478          0xB * B8 | ((offset / 4) & 255));
2479   } else {
2480     // Larger offsets must be handled by computing the correct address in the a
2481     // scratch register.
2482     UseScratchRegisterScope temps(this);
2483     Register scratch = temps.Acquire();
2484     DCHECK(base != scratch);
2485     if (u == 1) {
2486       add(scratch, base, Operand(offset));
2487     } else {
2488       sub(scratch, base, Operand(offset));
2489     }
2490     emit(cond | 0xD * B24 | d * B22 | scratch.code() * B16 | vd * B12 |
2491          0xB * B8);
2492   }
2493 }
2494 
vstr(const DwVfpRegister src,const MemOperand & operand,const Condition cond)2495 void Assembler::vstr(const DwVfpRegister src, const MemOperand& operand,
2496                      const Condition cond) {
2497   DCHECK(VfpRegisterIsAvailable(src));
2498   DCHECK(operand.am_ == Offset);
2499   if (operand.rm().is_valid()) {
2500     UseScratchRegisterScope temps(this);
2501     Register scratch = temps.Acquire();
2502     add(scratch, operand.rn(),
2503         Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
2504     vstr(src, scratch, 0, cond);
2505   } else {
2506     vstr(src, operand.rn(), operand.offset(), cond);
2507   }
2508 }
2509 
vstr(const SwVfpRegister src,const Register base,int offset,const Condition cond)2510 void Assembler::vstr(const SwVfpRegister src, const Register base, int offset,
2511                      const Condition cond) {
2512   // MEM(Rbase + offset) = SSrc.
2513   // Instruction details available in ARM DDI 0406A, A8-786.
2514   // cond(31-28) | 1101(27-24)| U000(23-20) | Rbase(19-16) |
2515   // Vdst(15-12) | 1010(11-8) | (offset/4)
2516   int u = 1;
2517   if (offset < 0) {
2518     CHECK_NE(offset, kMinInt);
2519     offset = -offset;
2520     u = 0;
2521   }
2522   int sd, d;
2523   src.split_code(&sd, &d);
2524   DCHECK_GE(offset, 0);
2525   if ((offset % 4) == 0 && (offset / 4) < 256) {
2526     emit(cond | u * B23 | d * B22 | 0xD0 * B20 | base.code() * B16 | sd * B12 |
2527          0xA * B8 | ((offset / 4) & 255));
2528   } else {
2529     // Larger offsets must be handled by computing the correct address in a
2530     // scratch register.
2531     UseScratchRegisterScope temps(this);
2532     Register scratch = temps.Acquire();
2533     DCHECK(base != scratch);
2534     if (u == 1) {
2535       add(scratch, base, Operand(offset));
2536     } else {
2537       sub(scratch, base, Operand(offset));
2538     }
2539     emit(cond | d * B22 | 0xD0 * B20 | scratch.code() * B16 | sd * B12 |
2540          0xA * B8);
2541   }
2542 }
2543 
vstr(const SwVfpRegister src,const MemOperand & operand,const Condition cond)2544 void Assembler::vstr(const SwVfpRegister src, const MemOperand& operand,
2545                      const Condition cond) {
2546   DCHECK(operand.am_ == Offset);
2547   if (operand.rm().is_valid()) {
2548     UseScratchRegisterScope temps(this);
2549     Register scratch = temps.Acquire();
2550     add(scratch, operand.rn(),
2551         Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
2552     vstr(src, scratch, 0, cond);
2553   } else {
2554     vstr(src, operand.rn(), operand.offset(), cond);
2555   }
2556 }
2557 
vldm(BlockAddrMode am,Register base,DwVfpRegister first,DwVfpRegister last,Condition cond)2558 void Assembler::vldm(BlockAddrMode am, Register base, DwVfpRegister first,
2559                      DwVfpRegister last, Condition cond) {
2560   // Instruction details available in ARM DDI 0406C.b, A8-922.
2561   // cond(31-28) | 110(27-25)| PUDW1(24-20) | Rbase(19-16) |
2562   // first(15-12) | 1011(11-8) | (count * 2)
2563   DCHECK_LE(first.code(), last.code());
2564   DCHECK(VfpRegisterIsAvailable(last));
2565   DCHECK(am == ia || am == ia_w || am == db_w);
2566   DCHECK(base != pc);
2567 
2568   int sd, d;
2569   first.split_code(&sd, &d);
2570   int count = last.code() - first.code() + 1;
2571   DCHECK_LE(count, 16);
2572   emit(cond | B27 | B26 | am | d * B22 | B20 | base.code() * B16 | sd * B12 |
2573        0xB * B8 | count * 2);
2574 }
2575 
vstm(BlockAddrMode am,Register base,DwVfpRegister first,DwVfpRegister last,Condition cond)2576 void Assembler::vstm(BlockAddrMode am, Register base, DwVfpRegister first,
2577                      DwVfpRegister last, Condition cond) {
2578   // Instruction details available in ARM DDI 0406C.b, A8-1080.
2579   // cond(31-28) | 110(27-25)| PUDW0(24-20) | Rbase(19-16) |
2580   // first(15-12) | 1011(11-8) | (count * 2)
2581   DCHECK_LE(first.code(), last.code());
2582   DCHECK(VfpRegisterIsAvailable(last));
2583   DCHECK(am == ia || am == ia_w || am == db_w);
2584   DCHECK(base != pc);
2585 
2586   int sd, d;
2587   first.split_code(&sd, &d);
2588   int count = last.code() - first.code() + 1;
2589   DCHECK_LE(count, 16);
2590   emit(cond | B27 | B26 | am | d * B22 | base.code() * B16 | sd * B12 |
2591        0xB * B8 | count * 2);
2592 }
2593 
vldm(BlockAddrMode am,Register base,SwVfpRegister first,SwVfpRegister last,Condition cond)2594 void Assembler::vldm(BlockAddrMode am, Register base, SwVfpRegister first,
2595                      SwVfpRegister last, Condition cond) {
2596   // Instruction details available in ARM DDI 0406A, A8-626.
2597   // cond(31-28) | 110(27-25)| PUDW1(24-20) | Rbase(19-16) |
2598   // first(15-12) | 1010(11-8) | (count/2)
2599   DCHECK_LE(first.code(), last.code());
2600   DCHECK(am == ia || am == ia_w || am == db_w);
2601   DCHECK(base != pc);
2602 
2603   int sd, d;
2604   first.split_code(&sd, &d);
2605   int count = last.code() - first.code() + 1;
2606   emit(cond | B27 | B26 | am | d * B22 | B20 | base.code() * B16 | sd * B12 |
2607        0xA * B8 | count);
2608 }
2609 
vstm(BlockAddrMode am,Register base,SwVfpRegister first,SwVfpRegister last,Condition cond)2610 void Assembler::vstm(BlockAddrMode am, Register base, SwVfpRegister first,
2611                      SwVfpRegister last, Condition cond) {
2612   // Instruction details available in ARM DDI 0406A, A8-784.
2613   // cond(31-28) | 110(27-25)| PUDW0(24-20) | Rbase(19-16) |
2614   // first(15-12) | 1011(11-8) | (count/2)
2615   DCHECK_LE(first.code(), last.code());
2616   DCHECK(am == ia || am == ia_w || am == db_w);
2617   DCHECK(base != pc);
2618 
2619   int sd, d;
2620   first.split_code(&sd, &d);
2621   int count = last.code() - first.code() + 1;
2622   emit(cond | B27 | B26 | am | d * B22 | base.code() * B16 | sd * B12 |
2623        0xA * B8 | count);
2624 }
2625 
DoubleAsTwoUInt32(Double d,uint32_t * lo,uint32_t * hi)2626 static void DoubleAsTwoUInt32(Double d, uint32_t* lo, uint32_t* hi) {
2627   uint64_t i = d.AsUint64();
2628 
2629   *lo = i & 0xFFFFFFFF;
2630   *hi = i >> 32;
2631 }
2632 
WriteVmovIntImmEncoding(uint8_t imm,uint32_t * encoding)2633 static void WriteVmovIntImmEncoding(uint8_t imm, uint32_t* encoding) {
2634   // Integer promotion from uint8_t to int makes these all okay.
2635   *encoding = ((imm & 0x80) << (24 - 7));   // a
2636   *encoding |= ((imm & 0x70) << (16 - 4));  // bcd
2637   *encoding |= (imm & 0x0f);                //  efgh
2638 }
2639 
2640 // This checks if imm can be encoded into an immediate for vmov.
2641 // See Table A7-15 in ARM DDI 0406C.d.
2642 // Currently only supports the first row and op=0 && cmode=1110.
FitsVmovIntImm(uint64_t imm,uint32_t * encoding,uint8_t * cmode)2643 static bool FitsVmovIntImm(uint64_t imm, uint32_t* encoding, uint8_t* cmode) {
2644   uint32_t lo = imm & 0xFFFFFFFF;
2645   uint32_t hi = imm >> 32;
2646   if ((lo == hi && ((lo & 0xffffff00) == 0))) {
2647     WriteVmovIntImmEncoding(imm & 0xff, encoding);
2648     *cmode = 0;
2649     return true;
2650   } else if ((lo == hi) && ((lo & 0xffff) == (lo >> 16)) &&
2651              ((lo & 0xff) == (lo >> 24))) {
2652     // Check that all bytes in imm are the same.
2653     WriteVmovIntImmEncoding(imm & 0xff, encoding);
2654     *cmode = 0xe;
2655     return true;
2656   }
2657 
2658   return false;
2659 }
2660 
vmov(const DwVfpRegister dst,uint64_t imm)2661 void Assembler::vmov(const DwVfpRegister dst, uint64_t imm) {
2662   uint32_t enc;
2663   uint8_t cmode;
2664   uint8_t op = 0;
2665   if (CpuFeatures::IsSupported(NEON) && FitsVmovIntImm(imm, &enc, &cmode)) {
2666     CpuFeatureScope scope(this, NEON);
2667     // Instruction details available in ARM DDI 0406C.b, A8-937.
2668     // 001i1(27-23) | D(22) | 000(21-19) | imm3(18-16) | Vd(15-12) | cmode(11-8)
2669     // | 0(7) | 0(6) | op(5) | 4(1) | imm4(3-0)
2670     int vd, d;
2671     dst.split_code(&vd, &d);
2672     emit(kSpecialCondition | 0x05 * B23 | d * B22 | vd * B12 | cmode * B8 |
2673          op * B5 | 0x1 * B4 | enc);
2674   } else {
2675     UNIMPLEMENTED();
2676   }
2677 }
2678 
vmov(const QwNeonRegister dst,uint64_t imm)2679 void Assembler::vmov(const QwNeonRegister dst, uint64_t imm) {
2680   uint32_t enc;
2681   uint8_t cmode;
2682   uint8_t op = 0;
2683   if (CpuFeatures::IsSupported(NEON) && FitsVmovIntImm(imm, &enc, &cmode)) {
2684     CpuFeatureScope scope(this, NEON);
2685     // Instruction details available in ARM DDI 0406C.b, A8-937.
2686     // 001i1(27-23) | D(22) | 000(21-19) | imm3(18-16) | Vd(15-12) | cmode(11-8)
2687     // | 0(7) | Q(6) | op(5) | 4(1) | imm4(3-0)
2688     int vd, d;
2689     dst.split_code(&vd, &d);
2690     emit(kSpecialCondition | 0x05 * B23 | d * B22 | vd * B12 | cmode * B8 |
2691          0x1 * B6 | op * B5 | 0x1 * B4 | enc);
2692   } else {
2693     UNIMPLEMENTED();
2694   }
2695 }
2696 
2697 // Only works for little endian floating point formats.
2698 // We don't support VFP on the mixed endian floating point platform.
FitsVmovFPImmediate(Double d,uint32_t * encoding)2699 static bool FitsVmovFPImmediate(Double d, uint32_t* encoding) {
2700   // VMOV can accept an immediate of the form:
2701   //
2702   //  +/- m * 2^(-n) where 16 <= m <= 31 and 0 <= n <= 7
2703   //
2704   // The immediate is encoded using an 8-bit quantity, comprised of two
2705   // 4-bit fields. For an 8-bit immediate of the form:
2706   //
2707   //  [abcdefgh]
2708   //
2709   // where a is the MSB and h is the LSB, an immediate 64-bit double can be
2710   // created of the form:
2711   //
2712   //  [aBbbbbbb,bbcdefgh,00000000,00000000,
2713   //      00000000,00000000,00000000,00000000]
2714   //
2715   // where B = ~b.
2716   //
2717 
2718   uint32_t lo, hi;
2719   DoubleAsTwoUInt32(d, &lo, &hi);
2720 
2721   // The most obvious constraint is the long block of zeroes.
2722   if ((lo != 0) || ((hi & 0xFFFF) != 0)) {
2723     return false;
2724   }
2725 
2726   // Bits 61:54 must be all clear or all set.
2727   if (((hi & 0x3FC00000) != 0) && ((hi & 0x3FC00000) != 0x3FC00000)) {
2728     return false;
2729   }
2730 
2731   // Bit 62 must be NOT bit 61.
2732   if (((hi ^ (hi << 1)) & (0x40000000)) == 0) {
2733     return false;
2734   }
2735 
2736   // Create the encoded immediate in the form:
2737   //  [00000000,0000abcd,00000000,0000efgh]
2738   *encoding = (hi >> 16) & 0xF;       // Low nybble.
2739   *encoding |= (hi >> 4) & 0x70000;   // Low three bits of the high nybble.
2740   *encoding |= (hi >> 12) & 0x80000;  // Top bit of the high nybble.
2741 
2742   return true;
2743 }
2744 
vmov(const SwVfpRegister dst,Float32 imm)2745 void Assembler::vmov(const SwVfpRegister dst, Float32 imm) {
2746   uint32_t enc;
2747   if (CpuFeatures::IsSupported(VFPv3) &&
2748       FitsVmovFPImmediate(Double(imm.get_scalar()), &enc)) {
2749     CpuFeatureScope scope(this, VFPv3);
2750     // The float can be encoded in the instruction.
2751     //
2752     // Sd = immediate
2753     // Instruction details available in ARM DDI 0406C.b, A8-936.
2754     // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | imm4H(19-16) |
2755     // Vd(15-12) | 101(11-9) | sz=0(8) | imm4L(3-0)
2756     int vd, d;
2757     dst.split_code(&vd, &d);
2758     emit(al | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | enc);
2759   } else {
2760     UseScratchRegisterScope temps(this);
2761     Register scratch = temps.Acquire();
2762     mov(scratch, Operand(imm.get_bits()));
2763     vmov(dst, scratch);
2764   }
2765 }
2766 
vmov(const DwVfpRegister dst,Double imm,const Register extra_scratch)2767 void Assembler::vmov(const DwVfpRegister dst, Double imm,
2768                      const Register extra_scratch) {
2769   DCHECK(VfpRegisterIsAvailable(dst));
2770   uint32_t enc;
2771   if (CpuFeatures::IsSupported(VFPv3) && FitsVmovFPImmediate(imm, &enc)) {
2772     CpuFeatureScope scope(this, VFPv3);
2773     // The double can be encoded in the instruction.
2774     //
2775     // Dd = immediate
2776     // Instruction details available in ARM DDI 0406C.b, A8-936.
2777     // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | imm4H(19-16) |
2778     // Vd(15-12) | 101(11-9) | sz=1(8) | imm4L(3-0)
2779     int vd, d;
2780     dst.split_code(&vd, &d);
2781     emit(al | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B8 |
2782          enc);
2783   } else {
2784     // Synthesise the double from ARM immediates.
2785     uint32_t lo, hi;
2786     DoubleAsTwoUInt32(imm, &lo, &hi);
2787     UseScratchRegisterScope temps(this);
2788     Register scratch = temps.Acquire();
2789 
2790     if (lo == hi) {
2791       // Move the low and high parts of the double to a D register in one
2792       // instruction.
2793       mov(scratch, Operand(lo));
2794       vmov(dst, scratch, scratch);
2795     } else if (extra_scratch == no_reg) {
2796       // We only have one spare scratch register.
2797       mov(scratch, Operand(lo));
2798       vmov(NeonS32, dst, 0, scratch);
2799       if (((lo & 0xFFFF) == (hi & 0xFFFF)) && CpuFeatures::IsSupported(ARMv7)) {
2800         CpuFeatureScope scope(this, ARMv7);
2801         movt(scratch, hi >> 16);
2802       } else {
2803         mov(scratch, Operand(hi));
2804       }
2805       vmov(NeonS32, dst, 1, scratch);
2806     } else {
2807       // Move the low and high parts of the double to a D register in one
2808       // instruction.
2809       mov(scratch, Operand(lo));
2810       mov(extra_scratch, Operand(hi));
2811       vmov(dst, scratch, extra_scratch);
2812     }
2813   }
2814 }
2815 
vmov(const SwVfpRegister dst,const SwVfpRegister src,const Condition cond)2816 void Assembler::vmov(const SwVfpRegister dst, const SwVfpRegister src,
2817                      const Condition cond) {
2818   // Sd = Sm
2819   // Instruction details available in ARM DDI 0406B, A8-642.
2820   int sd, d, sm, m;
2821   dst.split_code(&sd, &d);
2822   src.split_code(&sm, &m);
2823   emit(cond | 0xE * B24 | d * B22 | 0xB * B20 | sd * B12 | 0xA * B8 | B6 |
2824        m * B5 | sm);
2825 }
2826 
vmov(const DwVfpRegister dst,const DwVfpRegister src,const Condition cond)2827 void Assembler::vmov(const DwVfpRegister dst, const DwVfpRegister src,
2828                      const Condition cond) {
2829   // Dd = Dm
2830   // Instruction details available in ARM DDI 0406C.b, A8-938.
2831   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
2832   // 101(11-9) | sz=1(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
2833   DCHECK(VfpRegisterIsAvailable(dst));
2834   DCHECK(VfpRegisterIsAvailable(src));
2835   int vd, d;
2836   dst.split_code(&vd, &d);
2837   int vm, m;
2838   src.split_code(&vm, &m);
2839   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B8 | B6 |
2840        m * B5 | vm);
2841 }
2842 
vmov(const DwVfpRegister dst,const Register src1,const Register src2,const Condition cond)2843 void Assembler::vmov(const DwVfpRegister dst, const Register src1,
2844                      const Register src2, const Condition cond) {
2845   // Dm = <Rt,Rt2>.
2846   // Instruction details available in ARM DDI 0406C.b, A8-948.
2847   // cond(31-28) | 1100(27-24)| 010(23-21) | op=0(20) | Rt2(19-16) |
2848   // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm
2849   DCHECK(VfpRegisterIsAvailable(dst));
2850   DCHECK(src1 != pc && src2 != pc);
2851   int vm, m;
2852   dst.split_code(&vm, &m);
2853   emit(cond | 0xC * B24 | B22 | src2.code() * B16 | src1.code() * B12 |
2854        0xB * B8 | m * B5 | B4 | vm);
2855 }
2856 
vmov(const Register dst1,const Register dst2,const DwVfpRegister src,const Condition cond)2857 void Assembler::vmov(const Register dst1, const Register dst2,
2858                      const DwVfpRegister src, const Condition cond) {
2859   // <Rt,Rt2> = Dm.
2860   // Instruction details available in ARM DDI 0406C.b, A8-948.
2861   // cond(31-28) | 1100(27-24)| 010(23-21) | op=1(20) | Rt2(19-16) |
2862   // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm
2863   DCHECK(VfpRegisterIsAvailable(src));
2864   DCHECK(dst1 != pc && dst2 != pc);
2865   int vm, m;
2866   src.split_code(&vm, &m);
2867   emit(cond | 0xC * B24 | B22 | B20 | dst2.code() * B16 | dst1.code() * B12 |
2868        0xB * B8 | m * B5 | B4 | vm);
2869 }
2870 
vmov(const SwVfpRegister dst,const Register src,const Condition cond)2871 void Assembler::vmov(const SwVfpRegister dst, const Register src,
2872                      const Condition cond) {
2873   // Sn = Rt.
2874   // Instruction details available in ARM DDI 0406A, A8-642.
2875   // cond(31-28) | 1110(27-24)| 000(23-21) | op=0(20) | Vn(19-16) |
2876   // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0)
2877   DCHECK(src != pc);
2878   int sn, n;
2879   dst.split_code(&sn, &n);
2880   emit(cond | 0xE * B24 | sn * B16 | src.code() * B12 | 0xA * B8 | n * B7 | B4);
2881 }
2882 
vmov(const Register dst,const SwVfpRegister src,const Condition cond)2883 void Assembler::vmov(const Register dst, const SwVfpRegister src,
2884                      const Condition cond) {
2885   // Rt = Sn.
2886   // Instruction details available in ARM DDI 0406A, A8-642.
2887   // cond(31-28) | 1110(27-24)| 000(23-21) | op=1(20) | Vn(19-16) |
2888   // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0)
2889   DCHECK(dst != pc);
2890   int sn, n;
2891   src.split_code(&sn, &n);
2892   emit(cond | 0xE * B24 | B20 | sn * B16 | dst.code() * B12 | 0xA * B8 |
2893        n * B7 | B4);
2894 }
2895 
2896 // Type of data to read from or write to VFP register.
2897 // Used as specifier in generic vcvt instruction.
2898 enum VFPType { S32, U32, F32, F64 };
2899 
IsSignedVFPType(VFPType type)2900 static bool IsSignedVFPType(VFPType type) {
2901   switch (type) {
2902     case S32:
2903       return true;
2904     case U32:
2905       return false;
2906     default:
2907       UNREACHABLE();
2908   }
2909 }
2910 
IsIntegerVFPType(VFPType type)2911 static bool IsIntegerVFPType(VFPType type) {
2912   switch (type) {
2913     case S32:
2914     case U32:
2915       return true;
2916     case F32:
2917     case F64:
2918       return false;
2919     default:
2920       UNREACHABLE();
2921   }
2922 }
2923 
IsDoubleVFPType(VFPType type)2924 static bool IsDoubleVFPType(VFPType type) {
2925   switch (type) {
2926     case F32:
2927       return false;
2928     case F64:
2929       return true;
2930     default:
2931       UNREACHABLE();
2932   }
2933 }
2934 
2935 // Split five bit reg_code based on size of reg_type.
2936 //  32-bit register codes are Vm:M
2937 //  64-bit register codes are M:Vm
2938 // where Vm is four bits, and M is a single bit.
SplitRegCode(VFPType reg_type,int reg_code,int * vm,int * m)2939 static void SplitRegCode(VFPType reg_type, int reg_code, int* vm, int* m) {
2940   DCHECK((reg_code >= 0) && (reg_code <= 31));
2941   if (IsIntegerVFPType(reg_type) || !IsDoubleVFPType(reg_type)) {
2942     SwVfpRegister::split_code(reg_code, vm, m);
2943   } else {
2944     DwVfpRegister::split_code(reg_code, vm, m);
2945   }
2946 }
2947 
2948 // Encode vcvt.src_type.dst_type instruction.
EncodeVCVT(const VFPType dst_type,const int dst_code,const VFPType src_type,const int src_code,VFPConversionMode mode,const Condition cond)2949 static Instr EncodeVCVT(const VFPType dst_type, const int dst_code,
2950                         const VFPType src_type, const int src_code,
2951                         VFPConversionMode mode, const Condition cond) {
2952   DCHECK(src_type != dst_type);
2953   int D, Vd, M, Vm;
2954   SplitRegCode(src_type, src_code, &Vm, &M);
2955   SplitRegCode(dst_type, dst_code, &Vd, &D);
2956 
2957   if (IsIntegerVFPType(dst_type) || IsIntegerVFPType(src_type)) {
2958     // Conversion between IEEE floating point and 32-bit integer.
2959     // Instruction details available in ARM DDI 0406B, A8.6.295.
2960     // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 1(19) | opc2(18-16) |
2961     // Vd(15-12) | 101(11-9) | sz(8) | op(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
2962     DCHECK(!IsIntegerVFPType(dst_type) || !IsIntegerVFPType(src_type));
2963 
2964     int sz, opc2, op;
2965 
2966     if (IsIntegerVFPType(dst_type)) {
2967       opc2 = IsSignedVFPType(dst_type) ? 0x5 : 0x4;
2968       sz = IsDoubleVFPType(src_type) ? 0x1 : 0x0;
2969       op = mode;
2970     } else {
2971       DCHECK(IsIntegerVFPType(src_type));
2972       opc2 = 0x0;
2973       sz = IsDoubleVFPType(dst_type) ? 0x1 : 0x0;
2974       op = IsSignedVFPType(src_type) ? 0x1 : 0x0;
2975     }
2976 
2977     return (cond | 0xE * B24 | B23 | D * B22 | 0x3 * B20 | B19 | opc2 * B16 |
2978             Vd * B12 | 0x5 * B9 | sz * B8 | op * B7 | B6 | M * B5 | Vm);
2979   } else {
2980     // Conversion between IEEE double and single precision.
2981     // Instruction details available in ARM DDI 0406B, A8.6.298.
2982     // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0111(19-16) |
2983     // Vd(15-12) | 101(11-9) | sz(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
2984     int sz = IsDoubleVFPType(src_type) ? 0x1 : 0x0;
2985     return (cond | 0xE * B24 | B23 | D * B22 | 0x3 * B20 | 0x7 * B16 |
2986             Vd * B12 | 0x5 * B9 | sz * B8 | B7 | B6 | M * B5 | Vm);
2987   }
2988 }
2989 
vcvt_f64_s32(const DwVfpRegister dst,const SwVfpRegister src,VFPConversionMode mode,const Condition cond)2990 void Assembler::vcvt_f64_s32(const DwVfpRegister dst, const SwVfpRegister src,
2991                              VFPConversionMode mode, const Condition cond) {
2992   DCHECK(VfpRegisterIsAvailable(dst));
2993   emit(EncodeVCVT(F64, dst.code(), S32, src.code(), mode, cond));
2994 }
2995 
vcvt_f32_s32(const SwVfpRegister dst,const SwVfpRegister src,VFPConversionMode mode,const Condition cond)2996 void Assembler::vcvt_f32_s32(const SwVfpRegister dst, const SwVfpRegister src,
2997                              VFPConversionMode mode, const Condition cond) {
2998   emit(EncodeVCVT(F32, dst.code(), S32, src.code(), mode, cond));
2999 }
3000 
vcvt_f64_u32(const DwVfpRegister dst,const SwVfpRegister src,VFPConversionMode mode,const Condition cond)3001 void Assembler::vcvt_f64_u32(const DwVfpRegister dst, const SwVfpRegister src,
3002                              VFPConversionMode mode, const Condition cond) {
3003   DCHECK(VfpRegisterIsAvailable(dst));
3004   emit(EncodeVCVT(F64, dst.code(), U32, src.code(), mode, cond));
3005 }
3006 
vcvt_f32_u32(const SwVfpRegister dst,const SwVfpRegister src,VFPConversionMode mode,const Condition cond)3007 void Assembler::vcvt_f32_u32(const SwVfpRegister dst, const SwVfpRegister src,
3008                              VFPConversionMode mode, const Condition cond) {
3009   emit(EncodeVCVT(F32, dst.code(), U32, src.code(), mode, cond));
3010 }
3011 
vcvt_s32_f32(const SwVfpRegister dst,const SwVfpRegister src,VFPConversionMode mode,const Condition cond)3012 void Assembler::vcvt_s32_f32(const SwVfpRegister dst, const SwVfpRegister src,
3013                              VFPConversionMode mode, const Condition cond) {
3014   emit(EncodeVCVT(S32, dst.code(), F32, src.code(), mode, cond));
3015 }
3016 
vcvt_u32_f32(const SwVfpRegister dst,const SwVfpRegister src,VFPConversionMode mode,const Condition cond)3017 void Assembler::vcvt_u32_f32(const SwVfpRegister dst, const SwVfpRegister src,
3018                              VFPConversionMode mode, const Condition cond) {
3019   emit(EncodeVCVT(U32, dst.code(), F32, src.code(), mode, cond));
3020 }
3021 
vcvt_s32_f64(const SwVfpRegister dst,const DwVfpRegister src,VFPConversionMode mode,const Condition cond)3022 void Assembler::vcvt_s32_f64(const SwVfpRegister dst, const DwVfpRegister src,
3023                              VFPConversionMode mode, const Condition cond) {
3024   DCHECK(VfpRegisterIsAvailable(src));
3025   emit(EncodeVCVT(S32, dst.code(), F64, src.code(), mode, cond));
3026 }
3027 
vcvt_u32_f64(const SwVfpRegister dst,const DwVfpRegister src,VFPConversionMode mode,const Condition cond)3028 void Assembler::vcvt_u32_f64(const SwVfpRegister dst, const DwVfpRegister src,
3029                              VFPConversionMode mode, const Condition cond) {
3030   DCHECK(VfpRegisterIsAvailable(src));
3031   emit(EncodeVCVT(U32, dst.code(), F64, src.code(), mode, cond));
3032 }
3033 
vcvt_f64_f32(const DwVfpRegister dst,const SwVfpRegister src,VFPConversionMode mode,const Condition cond)3034 void Assembler::vcvt_f64_f32(const DwVfpRegister dst, const SwVfpRegister src,
3035                              VFPConversionMode mode, const Condition cond) {
3036   DCHECK(VfpRegisterIsAvailable(dst));
3037   emit(EncodeVCVT(F64, dst.code(), F32, src.code(), mode, cond));
3038 }
3039 
vcvt_f32_f64(const SwVfpRegister dst,const DwVfpRegister src,VFPConversionMode mode,const Condition cond)3040 void Assembler::vcvt_f32_f64(const SwVfpRegister dst, const DwVfpRegister src,
3041                              VFPConversionMode mode, const Condition cond) {
3042   DCHECK(VfpRegisterIsAvailable(src));
3043   emit(EncodeVCVT(F32, dst.code(), F64, src.code(), mode, cond));
3044 }
3045 
vcvt_f64_s32(const DwVfpRegister dst,int fraction_bits,const Condition cond)3046 void Assembler::vcvt_f64_s32(const DwVfpRegister dst, int fraction_bits,
3047                              const Condition cond) {
3048   // Instruction details available in ARM DDI 0406C.b, A8-874.
3049   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 1010(19-16) | Vd(15-12) |
3050   // 101(11-9) | sf=1(8) | sx=1(7) | 1(6) | i(5) | 0(4) | imm4(3-0)
3051   DCHECK(IsEnabled(VFPv3));
3052   DCHECK(VfpRegisterIsAvailable(dst));
3053   DCHECK(fraction_bits > 0 && fraction_bits <= 32);
3054   int vd, d;
3055   dst.split_code(&vd, &d);
3056   int imm5 = 32 - fraction_bits;
3057   int i = imm5 & 1;
3058   int imm4 = (imm5 >> 1) & 0xF;
3059   emit(cond | 0xE * B24 | B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 |
3060        vd * B12 | 0x5 * B9 | B8 | B7 | B6 | i * B5 | imm4);
3061 }
3062 
vneg(const DwVfpRegister dst,const DwVfpRegister src,const Condition cond)3063 void Assembler::vneg(const DwVfpRegister dst, const DwVfpRegister src,
3064                      const Condition cond) {
3065   // Instruction details available in ARM DDI 0406C.b, A8-968.
3066   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0001(19-16) | Vd(15-12) |
3067   // 101(11-9) | sz=1(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3068   DCHECK(VfpRegisterIsAvailable(dst));
3069   DCHECK(VfpRegisterIsAvailable(src));
3070   int vd, d;
3071   dst.split_code(&vd, &d);
3072   int vm, m;
3073   src.split_code(&vm, &m);
3074 
3075   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 |
3076        B8 | B6 | m * B5 | vm);
3077 }
3078 
vneg(const SwVfpRegister dst,const SwVfpRegister src,const Condition cond)3079 void Assembler::vneg(const SwVfpRegister dst, const SwVfpRegister src,
3080                      const Condition cond) {
3081   // Instruction details available in ARM DDI 0406C.b, A8-968.
3082   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0001(19-16) | Vd(15-12) |
3083   // 101(11-9) | sz=0(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3084   int vd, d;
3085   dst.split_code(&vd, &d);
3086   int vm, m;
3087   src.split_code(&vm, &m);
3088 
3089   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 |
3090        B6 | m * B5 | vm);
3091 }
3092 
vabs(const DwVfpRegister dst,const DwVfpRegister src,const Condition cond)3093 void Assembler::vabs(const DwVfpRegister dst, const DwVfpRegister src,
3094                      const Condition cond) {
3095   // Instruction details available in ARM DDI 0406C.b, A8-524.
3096   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
3097   // 101(11-9) | sz=1(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3098   DCHECK(VfpRegisterIsAvailable(dst));
3099   DCHECK(VfpRegisterIsAvailable(src));
3100   int vd, d;
3101   dst.split_code(&vd, &d);
3102   int vm, m;
3103   src.split_code(&vm, &m);
3104   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B8 | B7 |
3105        B6 | m * B5 | vm);
3106 }
3107 
vabs(const SwVfpRegister dst,const SwVfpRegister src,const Condition cond)3108 void Assembler::vabs(const SwVfpRegister dst, const SwVfpRegister src,
3109                      const Condition cond) {
3110   // Instruction details available in ARM DDI 0406C.b, A8-524.
3111   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
3112   // 101(11-9) | sz=0(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3113   int vd, d;
3114   dst.split_code(&vd, &d);
3115   int vm, m;
3116   src.split_code(&vm, &m);
3117   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B7 | B6 |
3118        m * B5 | vm);
3119 }
3120 
vadd(const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2,const Condition cond)3121 void Assembler::vadd(const DwVfpRegister dst, const DwVfpRegister src1,
3122                      const DwVfpRegister src2, const Condition cond) {
3123   // Dd = vadd(Dn, Dm) double precision floating point addition.
3124   // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
3125   // Instruction details available in ARM DDI 0406C.b, A8-830.
3126   // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
3127   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3128   DCHECK(VfpRegisterIsAvailable(dst));
3129   DCHECK(VfpRegisterIsAvailable(src1));
3130   DCHECK(VfpRegisterIsAvailable(src2));
3131   int vd, d;
3132   dst.split_code(&vd, &d);
3133   int vn, n;
3134   src1.split_code(&vn, &n);
3135   int vm, m;
3136   src2.split_code(&vm, &m);
3137   emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 |
3138        0x5 * B9 | B8 | n * B7 | m * B5 | vm);
3139 }
3140 
vadd(const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2,const Condition cond)3141 void Assembler::vadd(const SwVfpRegister dst, const SwVfpRegister src1,
3142                      const SwVfpRegister src2, const Condition cond) {
3143   // Sd = vadd(Sn, Sm) single precision floating point addition.
3144   // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
3145   // Instruction details available in ARM DDI 0406C.b, A8-830.
3146   // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
3147   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3148   int vd, d;
3149   dst.split_code(&vd, &d);
3150   int vn, n;
3151   src1.split_code(&vn, &n);
3152   int vm, m;
3153   src2.split_code(&vm, &m);
3154   emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 |
3155        0x5 * B9 | n * B7 | m * B5 | vm);
3156 }
3157 
vsub(const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2,const Condition cond)3158 void Assembler::vsub(const DwVfpRegister dst, const DwVfpRegister src1,
3159                      const DwVfpRegister src2, const Condition cond) {
3160   // Dd = vsub(Dn, Dm) double precision floating point subtraction.
3161   // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
3162   // Instruction details available in ARM DDI 0406C.b, A8-1086.
3163   // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
3164   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3165   DCHECK(VfpRegisterIsAvailable(dst));
3166   DCHECK(VfpRegisterIsAvailable(src1));
3167   DCHECK(VfpRegisterIsAvailable(src2));
3168   int vd, d;
3169   dst.split_code(&vd, &d);
3170   int vn, n;
3171   src1.split_code(&vn, &n);
3172   int vm, m;
3173   src2.split_code(&vm, &m);
3174   emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 |
3175        0x5 * B9 | B8 | n * B7 | B6 | m * B5 | vm);
3176 }
3177 
vsub(const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2,const Condition cond)3178 void Assembler::vsub(const SwVfpRegister dst, const SwVfpRegister src1,
3179                      const SwVfpRegister src2, const Condition cond) {
3180   // Sd = vsub(Sn, Sm) single precision floating point subtraction.
3181   // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
3182   // Instruction details available in ARM DDI 0406C.b, A8-1086.
3183   // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
3184   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3185   int vd, d;
3186   dst.split_code(&vd, &d);
3187   int vn, n;
3188   src1.split_code(&vn, &n);
3189   int vm, m;
3190   src2.split_code(&vm, &m);
3191   emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 |
3192        0x5 * B9 | n * B7 | B6 | m * B5 | vm);
3193 }
3194 
vmul(const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2,const Condition cond)3195 void Assembler::vmul(const DwVfpRegister dst, const DwVfpRegister src1,
3196                      const DwVfpRegister src2, const Condition cond) {
3197   // Dd = vmul(Dn, Dm) double precision floating point multiplication.
3198   // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
3199   // Instruction details available in ARM DDI 0406C.b, A8-960.
3200   // cond(31-28) | 11100(27-23)| D(22) | 10(21-20) | Vn(19-16) |
3201   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3202   DCHECK(VfpRegisterIsAvailable(dst));
3203   DCHECK(VfpRegisterIsAvailable(src1));
3204   DCHECK(VfpRegisterIsAvailable(src2));
3205   int vd, d;
3206   dst.split_code(&vd, &d);
3207   int vn, n;
3208   src1.split_code(&vn, &n);
3209   int vm, m;
3210   src2.split_code(&vm, &m);
3211   emit(cond | 0x1C * B23 | d * B22 | 0x2 * B20 | vn * B16 | vd * B12 |
3212        0x5 * B9 | B8 | n * B7 | m * B5 | vm);
3213 }
3214 
vmul(const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2,const Condition cond)3215 void Assembler::vmul(const SwVfpRegister dst, const SwVfpRegister src1,
3216                      const SwVfpRegister src2, const Condition cond) {
3217   // Sd = vmul(Sn, Sm) single precision floating point multiplication.
3218   // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
3219   // Instruction details available in ARM DDI 0406C.b, A8-960.
3220   // cond(31-28) | 11100(27-23)| D(22) | 10(21-20) | Vn(19-16) |
3221   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3222   int vd, d;
3223   dst.split_code(&vd, &d);
3224   int vn, n;
3225   src1.split_code(&vn, &n);
3226   int vm, m;
3227   src2.split_code(&vm, &m);
3228   emit(cond | 0x1C * B23 | d * B22 | 0x2 * B20 | vn * B16 | vd * B12 |
3229        0x5 * B9 | n * B7 | m * B5 | vm);
3230 }
3231 
vmla(const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2,const Condition cond)3232 void Assembler::vmla(const DwVfpRegister dst, const DwVfpRegister src1,
3233                      const DwVfpRegister src2, const Condition cond) {
3234   // Instruction details available in ARM DDI 0406C.b, A8-932.
3235   // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3236   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | op=0(6) | M(5) | 0(4) | Vm(3-0)
3237   DCHECK(VfpRegisterIsAvailable(dst));
3238   DCHECK(VfpRegisterIsAvailable(src1));
3239   DCHECK(VfpRegisterIsAvailable(src2));
3240   int vd, d;
3241   dst.split_code(&vd, &d);
3242   int vn, n;
3243   src1.split_code(&vn, &n);
3244   int vm, m;
3245   src2.split_code(&vm, &m);
3246   emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | B8 |
3247        n * B7 | m * B5 | vm);
3248 }
3249 
vmla(const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2,const Condition cond)3250 void Assembler::vmla(const SwVfpRegister dst, const SwVfpRegister src1,
3251                      const SwVfpRegister src2, const Condition cond) {
3252   // Instruction details available in ARM DDI 0406C.b, A8-932.
3253   // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3254   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | op=0(6) | M(5) | 0(4) | Vm(3-0)
3255   int vd, d;
3256   dst.split_code(&vd, &d);
3257   int vn, n;
3258   src1.split_code(&vn, &n);
3259   int vm, m;
3260   src2.split_code(&vm, &m);
3261   emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 |
3262        m * B5 | vm);
3263 }
3264 
vmls(const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2,const Condition cond)3265 void Assembler::vmls(const DwVfpRegister dst, const DwVfpRegister src1,
3266                      const DwVfpRegister src2, const Condition cond) {
3267   // Instruction details available in ARM DDI 0406C.b, A8-932.
3268   // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3269   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | op=1(6) | M(5) | 0(4) | Vm(3-0)
3270   DCHECK(VfpRegisterIsAvailable(dst));
3271   DCHECK(VfpRegisterIsAvailable(src1));
3272   DCHECK(VfpRegisterIsAvailable(src2));
3273   int vd, d;
3274   dst.split_code(&vd, &d);
3275   int vn, n;
3276   src1.split_code(&vn, &n);
3277   int vm, m;
3278   src2.split_code(&vm, &m);
3279   emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | B8 |
3280        n * B7 | B6 | m * B5 | vm);
3281 }
3282 
vmls(const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2,const Condition cond)3283 void Assembler::vmls(const SwVfpRegister dst, const SwVfpRegister src1,
3284                      const SwVfpRegister src2, const Condition cond) {
3285   // Instruction details available in ARM DDI 0406C.b, A8-932.
3286   // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3287   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | op=1(6) | M(5) | 0(4) | Vm(3-0)
3288   int vd, d;
3289   dst.split_code(&vd, &d);
3290   int vn, n;
3291   src1.split_code(&vn, &n);
3292   int vm, m;
3293   src2.split_code(&vm, &m);
3294   emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 |
3295        B6 | m * B5 | vm);
3296 }
3297 
vdiv(const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2,const Condition cond)3298 void Assembler::vdiv(const DwVfpRegister dst, const DwVfpRegister src1,
3299                      const DwVfpRegister src2, const Condition cond) {
3300   // Dd = vdiv(Dn, Dm) double precision floating point division.
3301   // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
3302   // Instruction details available in ARM DDI 0406C.b, A8-882.
3303   // cond(31-28) | 11101(27-23)| D(22) | 00(21-20) | Vn(19-16) |
3304   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3305   DCHECK(VfpRegisterIsAvailable(dst));
3306   DCHECK(VfpRegisterIsAvailable(src1));
3307   DCHECK(VfpRegisterIsAvailable(src2));
3308   int vd, d;
3309   dst.split_code(&vd, &d);
3310   int vn, n;
3311   src1.split_code(&vn, &n);
3312   int vm, m;
3313   src2.split_code(&vm, &m);
3314   emit(cond | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | B8 |
3315        n * B7 | m * B5 | vm);
3316 }
3317 
vdiv(const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2,const Condition cond)3318 void Assembler::vdiv(const SwVfpRegister dst, const SwVfpRegister src1,
3319                      const SwVfpRegister src2, const Condition cond) {
3320   // Sd = vdiv(Sn, Sm) single precision floating point division.
3321   // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
3322   // Instruction details available in ARM DDI 0406C.b, A8-882.
3323   // cond(31-28) | 11101(27-23)| D(22) | 00(21-20) | Vn(19-16) |
3324   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3325   int vd, d;
3326   dst.split_code(&vd, &d);
3327   int vn, n;
3328   src1.split_code(&vn, &n);
3329   int vm, m;
3330   src2.split_code(&vm, &m);
3331   emit(cond | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 |
3332        m * B5 | vm);
3333 }
3334 
vcmp(const DwVfpRegister src1,const DwVfpRegister src2,const Condition cond)3335 void Assembler::vcmp(const DwVfpRegister src1, const DwVfpRegister src2,
3336                      const Condition cond) {
3337   // vcmp(Dd, Dm) double precision floating point comparison.
3338   // Instruction details available in ARM DDI 0406C.b, A8-864.
3339   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0100(19-16) |
3340   // Vd(15-12) | 101(11-9) | sz=1(8) | E=0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3341   DCHECK(VfpRegisterIsAvailable(src1));
3342   DCHECK(VfpRegisterIsAvailable(src2));
3343   int vd, d;
3344   src1.split_code(&vd, &d);
3345   int vm, m;
3346   src2.split_code(&vm, &m);
3347   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x4 * B16 | vd * B12 |
3348        0x5 * B9 | B8 | B6 | m * B5 | vm);
3349 }
3350 
vcmp(const SwVfpRegister src1,const SwVfpRegister src2,const Condition cond)3351 void Assembler::vcmp(const SwVfpRegister src1, const SwVfpRegister src2,
3352                      const Condition cond) {
3353   // vcmp(Sd, Sm) single precision floating point comparison.
3354   // Instruction details available in ARM DDI 0406C.b, A8-864.
3355   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0100(19-16) |
3356   // Vd(15-12) | 101(11-9) | sz=0(8) | E=0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3357   int vd, d;
3358   src1.split_code(&vd, &d);
3359   int vm, m;
3360   src2.split_code(&vm, &m);
3361   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x4 * B16 | vd * B12 |
3362        0x5 * B9 | B6 | m * B5 | vm);
3363 }
3364 
vcmp(const DwVfpRegister src1,const double src2,const Condition cond)3365 void Assembler::vcmp(const DwVfpRegister src1, const double src2,
3366                      const Condition cond) {
3367   // vcmp(Dd, #0.0) double precision floating point comparison.
3368   // Instruction details available in ARM DDI 0406C.b, A8-864.
3369   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0101(19-16) |
3370   // Vd(15-12) | 101(11-9) | sz=1(8) | E=0(7) | 1(6) | 0(5) | 0(4) | 0000(3-0)
3371   DCHECK(VfpRegisterIsAvailable(src1));
3372   DCHECK_EQ(src2, 0.0);
3373   int vd, d;
3374   src1.split_code(&vd, &d);
3375   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x5 * B16 | vd * B12 |
3376        0x5 * B9 | B8 | B6);
3377 }
3378 
vcmp(const SwVfpRegister src1,const float src2,const Condition cond)3379 void Assembler::vcmp(const SwVfpRegister src1, const float src2,
3380                      const Condition cond) {
3381   // vcmp(Sd, #0.0) single precision floating point comparison.
3382   // Instruction details available in ARM DDI 0406C.b, A8-864.
3383   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0101(19-16) |
3384   // Vd(15-12) | 101(11-9) | sz=0(8) | E=0(7) | 1(6) | 0(5) | 0(4) | 0000(3-0)
3385   DCHECK_EQ(src2, 0.0);
3386   int vd, d;
3387   src1.split_code(&vd, &d);
3388   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x5 * B16 | vd * B12 |
3389        0x5 * B9 | B6);
3390 }
3391 
vmaxnm(const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2)3392 void Assembler::vmaxnm(const DwVfpRegister dst, const DwVfpRegister src1,
3393                        const DwVfpRegister src2) {
3394   // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3395   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3396   DCHECK(IsEnabled(ARMv8));
3397   int vd, d;
3398   dst.split_code(&vd, &d);
3399   int vn, n;
3400   src1.split_code(&vn, &n);
3401   int vm, m;
3402   src2.split_code(&vm, &m);
3403 
3404   emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
3405        0x5 * B9 | B8 | n * B7 | m * B5 | vm);
3406 }
3407 
vmaxnm(const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2)3408 void Assembler::vmaxnm(const SwVfpRegister dst, const SwVfpRegister src1,
3409                        const SwVfpRegister src2) {
3410   // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3411   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3412   DCHECK(IsEnabled(ARMv8));
3413   int vd, d;
3414   dst.split_code(&vd, &d);
3415   int vn, n;
3416   src1.split_code(&vn, &n);
3417   int vm, m;
3418   src2.split_code(&vm, &m);
3419 
3420   emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
3421        0x5 * B9 | n * B7 | m * B5 | vm);
3422 }
3423 
vminnm(const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2)3424 void Assembler::vminnm(const DwVfpRegister dst, const DwVfpRegister src1,
3425                        const DwVfpRegister src2) {
3426   // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3427   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3428   DCHECK(IsEnabled(ARMv8));
3429   int vd, d;
3430   dst.split_code(&vd, &d);
3431   int vn, n;
3432   src1.split_code(&vn, &n);
3433   int vm, m;
3434   src2.split_code(&vm, &m);
3435 
3436   emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
3437        0x5 * B9 | B8 | n * B7 | B6 | m * B5 | vm);
3438 }
3439 
vminnm(const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2)3440 void Assembler::vminnm(const SwVfpRegister dst, const SwVfpRegister src1,
3441                        const SwVfpRegister src2) {
3442   // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
3443   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3444   DCHECK(IsEnabled(ARMv8));
3445   int vd, d;
3446   dst.split_code(&vd, &d);
3447   int vn, n;
3448   src1.split_code(&vn, &n);
3449   int vm, m;
3450   src2.split_code(&vm, &m);
3451 
3452   emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
3453        0x5 * B9 | n * B7 | B6 | m * B5 | vm);
3454 }
3455 
vsel(Condition cond,const DwVfpRegister dst,const DwVfpRegister src1,const DwVfpRegister src2)3456 void Assembler::vsel(Condition cond, const DwVfpRegister dst,
3457                      const DwVfpRegister src1, const DwVfpRegister src2) {
3458   // cond=kSpecialCondition(31-28) | 11100(27-23) | D(22) |
3459   // vsel_cond=XX(21-20) | Vn(19-16) | Vd(15-12) | 101(11-9) | sz=1(8) | N(7) |
3460   // 0(6) | M(5) | 0(4) | Vm(3-0)
3461   DCHECK(IsEnabled(ARMv8));
3462   int vd, d;
3463   dst.split_code(&vd, &d);
3464   int vn, n;
3465   src1.split_code(&vn, &n);
3466   int vm, m;
3467   src2.split_code(&vm, &m);
3468   int sz = 1;
3469 
3470   // VSEL has a special (restricted) condition encoding.
3471   //   eq(0b0000)... -> 0b00
3472   //   ge(0b1010)... -> 0b10
3473   //   gt(0b1100)... -> 0b11
3474   //   vs(0b0110)... -> 0b01
3475   // No other conditions are supported.
3476   int vsel_cond = (cond >> 30) & 0x3;
3477   if ((cond != eq) && (cond != ge) && (cond != gt) && (cond != vs)) {
3478     // We can implement some other conditions by swapping the inputs.
3479     DCHECK((cond == ne) | (cond == lt) | (cond == le) | (cond == vc));
3480     std::swap(vn, vm);
3481     std::swap(n, m);
3482   }
3483 
3484   emit(kSpecialCondition | 0x1C * B23 | d * B22 | vsel_cond * B20 | vn * B16 |
3485        vd * B12 | 0x5 * B9 | sz * B8 | n * B7 | m * B5 | vm);
3486 }
3487 
vsel(Condition cond,const SwVfpRegister dst,const SwVfpRegister src1,const SwVfpRegister src2)3488 void Assembler::vsel(Condition cond, const SwVfpRegister dst,
3489                      const SwVfpRegister src1, const SwVfpRegister src2) {
3490   // cond=kSpecialCondition(31-28) | 11100(27-23) | D(22) |
3491   // vsel_cond=XX(21-20) | Vn(19-16) | Vd(15-12) | 101(11-9) | sz=0(8) | N(7) |
3492   // 0(6) | M(5) | 0(4) | Vm(3-0)
3493   DCHECK(IsEnabled(ARMv8));
3494   int vd, d;
3495   dst.split_code(&vd, &d);
3496   int vn, n;
3497   src1.split_code(&vn, &n);
3498   int vm, m;
3499   src2.split_code(&vm, &m);
3500   int sz = 0;
3501 
3502   // VSEL has a special (restricted) condition encoding.
3503   //   eq(0b0000)... -> 0b00
3504   //   ge(0b1010)... -> 0b10
3505   //   gt(0b1100)... -> 0b11
3506   //   vs(0b0110)... -> 0b01
3507   // No other conditions are supported.
3508   int vsel_cond = (cond >> 30) & 0x3;
3509   if ((cond != eq) && (cond != ge) && (cond != gt) && (cond != vs)) {
3510     // We can implement some other conditions by swapping the inputs.
3511     DCHECK((cond == ne) | (cond == lt) | (cond == le) | (cond == vc));
3512     std::swap(vn, vm);
3513     std::swap(n, m);
3514   }
3515 
3516   emit(kSpecialCondition | 0x1C * B23 | d * B22 | vsel_cond * B20 | vn * B16 |
3517        vd * B12 | 0x5 * B9 | sz * B8 | n * B7 | m * B5 | vm);
3518 }
3519 
vsqrt(const DwVfpRegister dst,const DwVfpRegister src,const Condition cond)3520 void Assembler::vsqrt(const DwVfpRegister dst, const DwVfpRegister src,
3521                       const Condition cond) {
3522   // Instruction details available in ARM DDI 0406C.b, A8-1058.
3523   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0001(19-16) |
3524   // Vd(15-12) | 101(11-9) | sz=1(8) | 11(7-6) | M(5) | 0(4) | Vm(3-0)
3525   DCHECK(VfpRegisterIsAvailable(dst));
3526   DCHECK(VfpRegisterIsAvailable(src));
3527   int vd, d;
3528   dst.split_code(&vd, &d);
3529   int vm, m;
3530   src.split_code(&vm, &m);
3531   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 |
3532        B8 | 0x3 * B6 | m * B5 | vm);
3533 }
3534 
vsqrt(const SwVfpRegister dst,const SwVfpRegister src,const Condition cond)3535 void Assembler::vsqrt(const SwVfpRegister dst, const SwVfpRegister src,
3536                       const Condition cond) {
3537   // Instruction details available in ARM DDI 0406C.b, A8-1058.
3538   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0001(19-16) |
3539   // Vd(15-12) | 101(11-9) | sz=0(8) | 11(7-6) | M(5) | 0(4) | Vm(3-0)
3540   int vd, d;
3541   dst.split_code(&vd, &d);
3542   int vm, m;
3543   src.split_code(&vm, &m);
3544   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 |
3545        0x3 * B6 | m * B5 | vm);
3546 }
3547 
vmsr(Register dst,Condition cond)3548 void Assembler::vmsr(Register dst, Condition cond) {
3549   // Instruction details available in ARM DDI 0406A, A8-652.
3550   // cond(31-28) | 1110 (27-24) | 1110(23-20)| 0001 (19-16) |
3551   // Rt(15-12) | 1010 (11-8) | 0(7) | 00 (6-5) | 1(4) | 0000(3-0)
3552   emit(cond | 0xE * B24 | 0xE * B20 | B16 | dst.code() * B12 | 0xA * B8 | B4);
3553 }
3554 
vmrs(Register dst,Condition cond)3555 void Assembler::vmrs(Register dst, Condition cond) {
3556   // Instruction details available in ARM DDI 0406A, A8-652.
3557   // cond(31-28) | 1110 (27-24) | 1111(23-20)| 0001 (19-16) |
3558   // Rt(15-12) | 1010 (11-8) | 0(7) | 00 (6-5) | 1(4) | 0000(3-0)
3559   emit(cond | 0xE * B24 | 0xF * B20 | B16 | dst.code() * B12 | 0xA * B8 | B4);
3560 }
3561 
vrinta(const SwVfpRegister dst,const SwVfpRegister src)3562 void Assembler::vrinta(const SwVfpRegister dst, const SwVfpRegister src) {
3563   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3564   // 10(19-18) | RM=00(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
3565   // M(5) | 0(4) | Vm(3-0)
3566   DCHECK(IsEnabled(ARMv8));
3567   int vd, d;
3568   dst.split_code(&vd, &d);
3569   int vm, m;
3570   src.split_code(&vm, &m);
3571   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | vd * B12 |
3572        0x5 * B9 | B6 | m * B5 | vm);
3573 }
3574 
vrinta(const DwVfpRegister dst,const DwVfpRegister src)3575 void Assembler::vrinta(const DwVfpRegister dst, const DwVfpRegister src) {
3576   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3577   // 10(19-18) | RM=00(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
3578   // M(5) | 0(4) | Vm(3-0)
3579   DCHECK(IsEnabled(ARMv8));
3580   int vd, d;
3581   dst.split_code(&vd, &d);
3582   int vm, m;
3583   src.split_code(&vm, &m);
3584   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | vd * B12 |
3585        0x5 * B9 | B8 | B6 | m * B5 | vm);
3586 }
3587 
vrintn(const SwVfpRegister dst,const SwVfpRegister src)3588 void Assembler::vrintn(const SwVfpRegister dst, const SwVfpRegister src) {
3589   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3590   // 10(19-18) | RM=01(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
3591   // M(5) | 0(4) | Vm(3-0)
3592   DCHECK(IsEnabled(ARMv8));
3593   int vd, d;
3594   dst.split_code(&vd, &d);
3595   int vm, m;
3596   src.split_code(&vm, &m);
3597   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x1 * B16 |
3598        vd * B12 | 0x5 * B9 | B6 | m * B5 | vm);
3599 }
3600 
vrintn(const DwVfpRegister dst,const DwVfpRegister src)3601 void Assembler::vrintn(const DwVfpRegister dst, const DwVfpRegister src) {
3602   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3603   // 10(19-18) | RM=01(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
3604   // M(5) | 0(4) | Vm(3-0)
3605   DCHECK(IsEnabled(ARMv8));
3606   int vd, d;
3607   dst.split_code(&vd, &d);
3608   int vm, m;
3609   src.split_code(&vm, &m);
3610   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x1 * B16 |
3611        vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm);
3612 }
3613 
vrintp(const SwVfpRegister dst,const SwVfpRegister src)3614 void Assembler::vrintp(const SwVfpRegister dst, const SwVfpRegister src) {
3615   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3616   // 10(19-18) | RM=10(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
3617   // M(5) | 0(4) | Vm(3-0)
3618   DCHECK(IsEnabled(ARMv8));
3619   int vd, d;
3620   dst.split_code(&vd, &d);
3621   int vm, m;
3622   src.split_code(&vm, &m);
3623   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 |
3624        vd * B12 | 0x5 * B9 | B6 | m * B5 | vm);
3625 }
3626 
vrintp(const DwVfpRegister dst,const DwVfpRegister src)3627 void Assembler::vrintp(const DwVfpRegister dst, const DwVfpRegister src) {
3628   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3629   // 10(19-18) | RM=10(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
3630   // M(5) | 0(4) | Vm(3-0)
3631   DCHECK(IsEnabled(ARMv8));
3632   int vd, d;
3633   dst.split_code(&vd, &d);
3634   int vm, m;
3635   src.split_code(&vm, &m);
3636   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 |
3637        vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm);
3638 }
3639 
vrintm(const SwVfpRegister dst,const SwVfpRegister src)3640 void Assembler::vrintm(const SwVfpRegister dst, const SwVfpRegister src) {
3641   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3642   // 10(19-18) | RM=11(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
3643   // M(5) | 0(4) | Vm(3-0)
3644   DCHECK(IsEnabled(ARMv8));
3645   int vd, d;
3646   dst.split_code(&vd, &d);
3647   int vm, m;
3648   src.split_code(&vm, &m);
3649   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x3 * B16 |
3650        vd * B12 | 0x5 * B9 | B6 | m * B5 | vm);
3651 }
3652 
vrintm(const DwVfpRegister dst,const DwVfpRegister src)3653 void Assembler::vrintm(const DwVfpRegister dst, const DwVfpRegister src) {
3654   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
3655   // 10(19-18) | RM=11(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
3656   // M(5) | 0(4) | Vm(3-0)
3657   DCHECK(IsEnabled(ARMv8));
3658   int vd, d;
3659   dst.split_code(&vd, &d);
3660   int vm, m;
3661   src.split_code(&vm, &m);
3662   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x3 * B16 |
3663        vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm);
3664 }
3665 
vrintz(const SwVfpRegister dst,const SwVfpRegister src,const Condition cond)3666 void Assembler::vrintz(const SwVfpRegister dst, const SwVfpRegister src,
3667                        const Condition cond) {
3668   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 011(19-17) | 0(16) |
3669   // Vd(15-12) | 101(11-9) | sz=0(8) | op=1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3670   DCHECK(IsEnabled(ARMv8));
3671   int vd, d;
3672   dst.split_code(&vd, &d);
3673   int vm, m;
3674   src.split_code(&vm, &m);
3675   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x3 * B17 | vd * B12 |
3676        0x5 * B9 | B7 | B6 | m * B5 | vm);
3677 }
3678 
vrintz(const DwVfpRegister dst,const DwVfpRegister src,const Condition cond)3679 void Assembler::vrintz(const DwVfpRegister dst, const DwVfpRegister src,
3680                        const Condition cond) {
3681   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 011(19-17) | 0(16) |
3682   // Vd(15-12) | 101(11-9) | sz=1(8) | op=1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3683   DCHECK(IsEnabled(ARMv8));
3684   int vd, d;
3685   dst.split_code(&vd, &d);
3686   int vm, m;
3687   src.split_code(&vm, &m);
3688   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x3 * B17 | vd * B12 |
3689        0x5 * B9 | B8 | B7 | B6 | m * B5 | vm);
3690 }
3691 
3692 // Support for NEON.
3693 
vld1(NeonSize size,const NeonListOperand & dst,const NeonMemOperand & src)3694 void Assembler::vld1(NeonSize size, const NeonListOperand& dst,
3695                      const NeonMemOperand& src) {
3696   // Instruction details available in ARM DDI 0406C.b, A8.8.320.
3697   // 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) |
3698   // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0)
3699   DCHECK(IsEnabled(NEON));
3700   int vd, d;
3701   dst.base().split_code(&vd, &d);
3702   emit(0xFU * B28 | 4 * B24 | d * B22 | 2 * B20 | src.rn().code() * B16 |
3703        vd * B12 | dst.type() * B8 | size * B6 | src.align() * B4 |
3704        src.rm().code());
3705 }
3706 
3707 // vld1s(ingle element to one lane).
vld1s(NeonSize size,const NeonListOperand & dst,uint8_t index,const NeonMemOperand & src)3708 void Assembler::vld1s(NeonSize size, const NeonListOperand& dst, uint8_t index,
3709                       const NeonMemOperand& src) {
3710   // Instruction details available in ARM DDI 0406C.b, A8.8.322.
3711   // 1111(31-28) | 01001(27-23) | D(22) | 10(21-20) | Rn(19-16) |
3712   // Vd(15-12) | size(11-10) | index_align(7-4) | Rm(3-0)
3713   // See vld1 (single element to all lanes) if size == 0x3, implemented as
3714   // vld1r(eplicate).
3715   DCHECK_NE(size, 0x3);
3716   // Check for valid lane indices.
3717   DCHECK_GT(1 << (3 - size), index);
3718   // Specifying alignment not supported, use standard alignment.
3719   uint8_t index_align = index << (size + 1);
3720 
3721   DCHECK(IsEnabled(NEON));
3722   int vd, d;
3723   dst.base().split_code(&vd, &d);
3724   emit(0xFU * B28 | 4 * B24 | 1 * B23 | d * B22 | 2 * B20 |
3725        src.rn().code() * B16 | vd * B12 | size * B10 | index_align * B4 |
3726        src.rm().code());
3727 }
3728 
3729 // vld1r(eplicate)
vld1r(NeonSize size,const NeonListOperand & dst,const NeonMemOperand & src)3730 void Assembler::vld1r(NeonSize size, const NeonListOperand& dst,
3731                       const NeonMemOperand& src) {
3732   DCHECK(IsEnabled(NEON));
3733   int vd, d;
3734   dst.base().split_code(&vd, &d);
3735   emit(0xFU * B28 | 4 * B24 | 1 * B23 | d * B22 | 2 * B20 |
3736        src.rn().code() * B16 | vd * B12 | 0xC * B8 | size * B6 |
3737        dst.length() * B5 | src.rm().code());
3738 }
3739 
vst1(NeonSize size,const NeonListOperand & src,const NeonMemOperand & dst)3740 void Assembler::vst1(NeonSize size, const NeonListOperand& src,
3741                      const NeonMemOperand& dst) {
3742   // Instruction details available in ARM DDI 0406C.b, A8.8.404.
3743   // 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) |
3744   // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0)
3745   DCHECK(IsEnabled(NEON));
3746   int vd, d;
3747   src.base().split_code(&vd, &d);
3748   emit(0xFU * B28 | 4 * B24 | d * B22 | dst.rn().code() * B16 | vd * B12 |
3749        src.type() * B8 | size * B6 | dst.align() * B4 | dst.rm().code());
3750 }
3751 
vmovl(NeonDataType dt,QwNeonRegister dst,DwVfpRegister src)3752 void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) {
3753   // Instruction details available in ARM DDI 0406C.b, A8.8.346.
3754   // 1111(31-28) | 001(27-25) | U(24) | 1(23) | D(22) | imm3(21-19) |
3755   // 000(18-16) | Vd(15-12) | 101000(11-6) | M(5) | 1(4) | Vm(3-0)
3756   DCHECK(IsEnabled(NEON));
3757   int vd, d;
3758   dst.split_code(&vd, &d);
3759   int vm, m;
3760   src.split_code(&vm, &m);
3761   int U = NeonU(dt);
3762   int imm3 = 1 << NeonSz(dt);
3763   emit(0xFU * B28 | B25 | U * B24 | B23 | d * B22 | imm3 * B19 | vd * B12 |
3764        0xA * B8 | m * B5 | B4 | vm);
3765 }
3766 
vqmovn(NeonDataType dst_dt,NeonDataType src_dt,DwVfpRegister dst,QwNeonRegister src)3767 void Assembler::vqmovn(NeonDataType dst_dt, NeonDataType src_dt,
3768                        DwVfpRegister dst, QwNeonRegister src) {
3769   // Instruction details available in ARM DDI 0406C.b, A8.8.1004.
3770   // vqmovn.<type><size> Dd, Qm. ARM vector narrowing move with saturation.
3771   // vqmovun.<type><size> Dd, Qm. Same as above, but produces unsigned results.
3772   DCHECK(IsEnabled(NEON));
3773   DCHECK_IMPLIES(NeonU(src_dt), NeonU(dst_dt));
3774   int vd, d;
3775   dst.split_code(&vd, &d);
3776   int vm, m;
3777   src.split_code(&vm, &m);
3778   int size = NeonSz(dst_dt);
3779   int op = NeonU(src_dt) ? 0b11 : NeonU(dst_dt) ? 0b01 : 0b10;
3780   emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | 0x2 * B16 | vd * B12 |
3781        0x2 * B8 | op * B6 | m * B5 | vm);
3782 }
3783 
EncodeScalar(NeonDataType dt,int index)3784 static int EncodeScalar(NeonDataType dt, int index) {
3785   int opc1_opc2 = 0;
3786   DCHECK_LE(0, index);
3787   switch (dt) {
3788     case NeonS8:
3789     case NeonU8:
3790       DCHECK_GT(8, index);
3791       opc1_opc2 = 0x8 | index;
3792       break;
3793     case NeonS16:
3794     case NeonU16:
3795       DCHECK_GT(4, index);
3796       opc1_opc2 = 0x1 | (index << 1);
3797       break;
3798     case NeonS32:
3799     case NeonU32:
3800       DCHECK_GT(2, index);
3801       opc1_opc2 = index << 2;
3802       break;
3803     default:
3804       UNREACHABLE();
3805   }
3806   return (opc1_opc2 >> 2) * B21 | (opc1_opc2 & 0x3) * B5;
3807 }
3808 
vmov(NeonDataType dt,DwVfpRegister dst,int index,Register src)3809 void Assembler::vmov(NeonDataType dt, DwVfpRegister dst, int index,
3810                      Register src) {
3811   // Instruction details available in ARM DDI 0406C.b, A8.8.940.
3812   // vmov ARM core register to scalar.
3813   DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON));
3814   int vd, d;
3815   dst.split_code(&vd, &d);
3816   int opc1_opc2 = EncodeScalar(dt, index);
3817   emit(0xEEu * B24 | vd * B16 | src.code() * B12 | 0xB * B8 | d * B7 | B4 |
3818        opc1_opc2);
3819 }
3820 
vmov(NeonDataType dt,Register dst,DwVfpRegister src,int index)3821 void Assembler::vmov(NeonDataType dt, Register dst, DwVfpRegister src,
3822                      int index) {
3823   // Instruction details available in ARM DDI 0406C.b, A8.8.942.
3824   // vmov Arm scalar to core register.
3825   DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON));
3826   int vn, n;
3827   src.split_code(&vn, &n);
3828   int opc1_opc2 = EncodeScalar(dt, index);
3829   int u = NeonU(dt);
3830   emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 |
3831        n * B7 | B4 | opc1_opc2);
3832 }
3833 
vmov(QwNeonRegister dst,QwNeonRegister src)3834 void Assembler::vmov(QwNeonRegister dst, QwNeonRegister src) {
3835   // Instruction details available in ARM DDI 0406C.b, A8-938.
3836   // vmov is encoded as vorr.
3837   vorr(dst, src, src);
3838 }
3839 
vdup(NeonSize size,QwNeonRegister dst,Register src)3840 void Assembler::vdup(NeonSize size, QwNeonRegister dst, Register src) {
3841   DCHECK(IsEnabled(NEON));
3842   // Instruction details available in ARM DDI 0406C.b, A8-886.
3843   int B = 0, E = 0;
3844   switch (size) {
3845     case Neon8:
3846       B = 1;
3847       break;
3848     case Neon16:
3849       E = 1;
3850       break;
3851     case Neon32:
3852       break;
3853     default:
3854       UNREACHABLE();
3855   }
3856   int vd, d;
3857   dst.split_code(&vd, &d);
3858 
3859   emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 |
3860        0xB * B8 | d * B7 | E * B5 | B4);
3861 }
3862 
3863 enum NeonRegType { NEON_D, NEON_Q };
3864 
NeonSplitCode(NeonRegType type,int code,int * vm,int * m,int * encoding)3865 void NeonSplitCode(NeonRegType type, int code, int* vm, int* m, int* encoding) {
3866   if (type == NEON_D) {
3867     DwVfpRegister::split_code(code, vm, m);
3868   } else {
3869     DCHECK_EQ(type, NEON_Q);
3870     QwNeonRegister::split_code(code, vm, m);
3871     *encoding |= B6;
3872   }
3873 }
3874 
EncodeNeonDupOp(NeonSize size,NeonRegType reg_type,int dst_code,DwVfpRegister src,int index)3875 static Instr EncodeNeonDupOp(NeonSize size, NeonRegType reg_type, int dst_code,
3876                              DwVfpRegister src, int index) {
3877   DCHECK_NE(Neon64, size);
3878   int sz = static_cast<int>(size);
3879   DCHECK_LE(0, index);
3880   DCHECK_GT(kSimd128Size / (1 << sz), index);
3881   int imm4 = (1 << sz) | ((index << (sz + 1)) & 0xF);
3882   int qbit = 0;
3883   int vd, d;
3884   NeonSplitCode(reg_type, dst_code, &vd, &d, &qbit);
3885   int vm, m;
3886   src.split_code(&vm, &m);
3887 
3888   return 0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 |
3889          0x18 * B7 | qbit | m * B5 | vm;
3890 }
3891 
vdup(NeonSize size,DwVfpRegister dst,DwVfpRegister src,int index)3892 void Assembler::vdup(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
3893                      int index) {
3894   DCHECK(IsEnabled(NEON));
3895   // Instruction details available in ARM DDI 0406C.b, A8-884.
3896   emit(EncodeNeonDupOp(size, NEON_D, dst.code(), src, index));
3897 }
3898 
vdup(NeonSize size,QwNeonRegister dst,DwVfpRegister src,int index)3899 void Assembler::vdup(NeonSize size, QwNeonRegister dst, DwVfpRegister src,
3900                      int index) {
3901   // Instruction details available in ARM DDI 0406C.b, A8-884.
3902   DCHECK(IsEnabled(NEON));
3903   emit(EncodeNeonDupOp(size, NEON_Q, dst.code(), src, index));
3904 }
3905 
3906 // Encode NEON vcvt.src_type.dst_type instruction.
EncodeNeonVCVT(VFPType dst_type,QwNeonRegister dst,VFPType src_type,QwNeonRegister src)3907 static Instr EncodeNeonVCVT(VFPType dst_type, QwNeonRegister dst,
3908                             VFPType src_type, QwNeonRegister src) {
3909   DCHECK(src_type != dst_type);
3910   DCHECK(src_type == F32 || dst_type == F32);
3911   // Instruction details available in ARM DDI 0406C.b, A8.8.868.
3912   int vd, d;
3913   dst.split_code(&vd, &d);
3914   int vm, m;
3915   src.split_code(&vm, &m);
3916 
3917   int op = 0;
3918   if (src_type == F32) {
3919     DCHECK(dst_type == S32 || dst_type == U32);
3920     op = dst_type == U32 ? 3 : 2;
3921   } else {
3922     DCHECK(src_type == S32 || src_type == U32);
3923     op = src_type == U32 ? 1 : 0;
3924   }
3925 
3926   return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 |
3927          B6 | m * B5 | vm;
3928 }
3929 
vcvt_f32_s32(QwNeonRegister dst,QwNeonRegister src)3930 void Assembler::vcvt_f32_s32(QwNeonRegister dst, QwNeonRegister src) {
3931   DCHECK(IsEnabled(NEON));
3932   DCHECK(VfpRegisterIsAvailable(dst));
3933   DCHECK(VfpRegisterIsAvailable(src));
3934   emit(EncodeNeonVCVT(F32, dst, S32, src));
3935 }
3936 
vcvt_f32_u32(QwNeonRegister dst,QwNeonRegister src)3937 void Assembler::vcvt_f32_u32(QwNeonRegister dst, QwNeonRegister src) {
3938   DCHECK(IsEnabled(NEON));
3939   DCHECK(VfpRegisterIsAvailable(dst));
3940   DCHECK(VfpRegisterIsAvailable(src));
3941   emit(EncodeNeonVCVT(F32, dst, U32, src));
3942 }
3943 
vcvt_s32_f32(QwNeonRegister dst,QwNeonRegister src)3944 void Assembler::vcvt_s32_f32(QwNeonRegister dst, QwNeonRegister src) {
3945   DCHECK(IsEnabled(NEON));
3946   DCHECK(VfpRegisterIsAvailable(dst));
3947   DCHECK(VfpRegisterIsAvailable(src));
3948   emit(EncodeNeonVCVT(S32, dst, F32, src));
3949 }
3950 
vcvt_u32_f32(QwNeonRegister dst,QwNeonRegister src)3951 void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) {
3952   DCHECK(IsEnabled(NEON));
3953   DCHECK(VfpRegisterIsAvailable(dst));
3954   DCHECK(VfpRegisterIsAvailable(src));
3955   emit(EncodeNeonVCVT(U32, dst, F32, src));
3956 }
3957 
3958 enum UnaryOp {
3959   VMVN,
3960   VSWP,
3961   VABS,
3962   VABSF,
3963   VNEG,
3964   VNEGF,
3965   VRINTM,
3966   VRINTN,
3967   VRINTP,
3968   VRINTZ
3969 };
3970 
EncodeNeonUnaryOp(UnaryOp op,NeonRegType reg_type,NeonSize size,int dst_code,int src_code)3971 static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
3972                                int dst_code, int src_code) {
3973   int op_encoding = 0;
3974   switch (op) {
3975     case VMVN:
3976       DCHECK_EQ(Neon8, size);  // size == 0 for vmvn
3977       op_encoding = B10 | 0x3 * B7;
3978       break;
3979     case VSWP:
3980       DCHECK_EQ(Neon8, size);  // size == 0 for vswp
3981       op_encoding = B17;
3982       break;
3983     case VABS:
3984       op_encoding = B16 | 0x6 * B7;
3985       break;
3986     case VABSF:
3987       DCHECK_EQ(Neon32, size);
3988       op_encoding = B16 | B10 | 0x6 * B7;
3989       break;
3990     case VNEG:
3991       op_encoding = B16 | 0x7 * B7;
3992       break;
3993     case VNEGF:
3994       DCHECK_EQ(Neon32, size);
3995       op_encoding = B16 | B10 | 0x7 * B7;
3996       break;
3997     case VRINTM:
3998       op_encoding = B17 | 0xD * B7;
3999       break;
4000     case VRINTN:
4001       op_encoding = B17 | 0x8 * B7;
4002       break;
4003     case VRINTP:
4004       op_encoding = B17 | 0xF * B7;
4005       break;
4006     case VRINTZ:
4007       op_encoding = B17 | 0xB * B7;
4008       break;
4009     default:
4010       UNREACHABLE();
4011   }
4012   int vd, d;
4013   NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
4014   int vm, m;
4015   NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
4016 
4017   return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | vd * B12 | m * B5 |
4018          vm | op_encoding;
4019 }
4020 
vmvn(QwNeonRegister dst,QwNeonRegister src)4021 void Assembler::vmvn(QwNeonRegister dst, QwNeonRegister src) {
4022   // Qd = vmvn(Qn, Qm) SIMD bitwise negate.
4023   // Instruction details available in ARM DDI 0406C.b, A8-966.
4024   DCHECK(IsEnabled(NEON));
4025   emit(EncodeNeonUnaryOp(VMVN, NEON_Q, Neon8, dst.code(), src.code()));
4026 }
4027 
vswp(DwVfpRegister dst,DwVfpRegister src)4028 void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) {
4029   DCHECK(IsEnabled(NEON));
4030   // Dd = vswp(Dn, Dm) SIMD d-register swap.
4031   // Instruction details available in ARM DDI 0406C.b, A8.8.418.
4032   DCHECK(IsEnabled(NEON));
4033   emit(EncodeNeonUnaryOp(VSWP, NEON_D, Neon8, dst.code(), src.code()));
4034 }
4035 
vswp(QwNeonRegister dst,QwNeonRegister src)4036 void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) {
4037   // Qd = vswp(Qn, Qm) SIMD q-register swap.
4038   // Instruction details available in ARM DDI 0406C.b, A8.8.418.
4039   DCHECK(IsEnabled(NEON));
4040   emit(EncodeNeonUnaryOp(VSWP, NEON_Q, Neon8, dst.code(), src.code()));
4041 }
4042 
vabs(QwNeonRegister dst,QwNeonRegister src)4043 void Assembler::vabs(QwNeonRegister dst, QwNeonRegister src) {
4044   // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value.
4045   // Instruction details available in ARM DDI 0406C.b, A8.8.824.
4046   DCHECK(IsEnabled(NEON));
4047   emit(EncodeNeonUnaryOp(VABSF, NEON_Q, Neon32, dst.code(), src.code()));
4048 }
4049 
vabs(NeonSize size,QwNeonRegister dst,QwNeonRegister src)4050 void Assembler::vabs(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
4051   // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value.
4052   // Instruction details available in ARM DDI 0406C.b, A8.8.824.
4053   DCHECK(IsEnabled(NEON));
4054   emit(EncodeNeonUnaryOp(VABS, NEON_Q, size, dst.code(), src.code()));
4055 }
4056 
vneg(QwNeonRegister dst,QwNeonRegister src)4057 void Assembler::vneg(QwNeonRegister dst, QwNeonRegister src) {
4058   // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate.
4059   // Instruction details available in ARM DDI 0406C.b, A8.8.968.
4060   DCHECK(IsEnabled(NEON));
4061   emit(EncodeNeonUnaryOp(VNEGF, NEON_Q, Neon32, dst.code(), src.code()));
4062 }
4063 
vneg(NeonSize size,QwNeonRegister dst,QwNeonRegister src)4064 void Assembler::vneg(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
4065   // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate.
4066   // Instruction details available in ARM DDI 0406C.b, A8.8.968.
4067   DCHECK(IsEnabled(NEON));
4068   emit(EncodeNeonUnaryOp(VNEG, NEON_Q, size, dst.code(), src.code()));
4069 }
4070 
4071 enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN };
4072 
EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op,NeonRegType reg_type,int dst_code,int src_code1,int src_code2)4073 static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, NeonRegType reg_type,
4074                                        int dst_code, int src_code1,
4075                                        int src_code2) {
4076   int op_encoding = 0;
4077   switch (op) {
4078     case VBIC:
4079       op_encoding = 0x1 * B20;
4080       break;
4081     case VBIF:
4082       op_encoding = B24 | 0x3 * B20;
4083       break;
4084     case VBIT:
4085       op_encoding = B24 | 0x2 * B20;
4086       break;
4087     case VBSL:
4088       op_encoding = B24 | 0x1 * B20;
4089       break;
4090     case VEOR:
4091       op_encoding = B24;
4092       break;
4093     case VORR:
4094       op_encoding = 0x2 * B20;
4095       break;
4096     case VORN:
4097       op_encoding = 0x3 * B20;
4098       break;
4099     case VAND:
4100       // op_encoding is 0.
4101       break;
4102     default:
4103       UNREACHABLE();
4104   }
4105   int vd, d;
4106   NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
4107   int vn, n;
4108   NeonSplitCode(reg_type, src_code1, &vn, &n, &op_encoding);
4109   int vm, m;
4110   NeonSplitCode(reg_type, src_code2, &vm, &m, &op_encoding);
4111 
4112   return 0x1E4U * B23 | op_encoding | d * B22 | vn * B16 | vd * B12 | B8 |
4113          n * B7 | m * B5 | B4 | vm;
4114 }
4115 
vand(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4116 void Assembler::vand(QwNeonRegister dst, QwNeonRegister src1,
4117                      QwNeonRegister src2) {
4118   // Qd = vand(Qn, Qm) SIMD AND.
4119   // Instruction details available in ARM DDI 0406C.b, A8.8.836.
4120   DCHECK(IsEnabled(NEON));
4121   emit(EncodeNeonBinaryBitwiseOp(VAND, NEON_Q, dst.code(), src1.code(),
4122                                  src2.code()));
4123 }
4124 
vbic(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4125 void Assembler::vbic(QwNeonRegister dst, QwNeonRegister src1,
4126                      QwNeonRegister src2) {
4127   // Qd = vbic(Qn, Qm) SIMD AND.
4128   // Instruction details available in ARM DDI 0406C.b, A8-840.
4129   DCHECK(IsEnabled(NEON));
4130   emit(EncodeNeonBinaryBitwiseOp(VBIC, NEON_Q, dst.code(), src1.code(),
4131                                  src2.code()));
4132 }
4133 
vbsl(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4134 void Assembler::vbsl(QwNeonRegister dst, QwNeonRegister src1,
4135                      QwNeonRegister src2) {
4136   // Qd = vbsl(Qn, Qm) SIMD bitwise select.
4137   // Instruction details available in ARM DDI 0406C.b, A8-844.
4138   DCHECK(IsEnabled(NEON));
4139   emit(EncodeNeonBinaryBitwiseOp(VBSL, NEON_Q, dst.code(), src1.code(),
4140                                  src2.code()));
4141 }
4142 
veor(DwVfpRegister dst,DwVfpRegister src1,DwVfpRegister src2)4143 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
4144                      DwVfpRegister src2) {
4145   // Dd = veor(Dn, Dm) SIMD exclusive OR.
4146   // Instruction details available in ARM DDI 0406C.b, A8.8.888.
4147   DCHECK(IsEnabled(NEON));
4148   emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_D, dst.code(), src1.code(),
4149                                  src2.code()));
4150 }
4151 
veor(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4152 void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1,
4153                      QwNeonRegister src2) {
4154   // Qd = veor(Qn, Qm) SIMD exclusive OR.
4155   // Instruction details available in ARM DDI 0406C.b, A8.8.888.
4156   DCHECK(IsEnabled(NEON));
4157   emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_Q, dst.code(), src1.code(),
4158                                  src2.code()));
4159 }
4160 
vorr(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4161 void Assembler::vorr(QwNeonRegister dst, QwNeonRegister src1,
4162                      QwNeonRegister src2) {
4163   // Qd = vorr(Qn, Qm) SIMD OR.
4164   // Instruction details available in ARM DDI 0406C.b, A8.8.976.
4165   DCHECK(IsEnabled(NEON));
4166   emit(EncodeNeonBinaryBitwiseOp(VORR, NEON_Q, dst.code(), src1.code(),
4167                                  src2.code()));
4168 }
4169 
4170 enum FPBinOp {
4171   VADDF,
4172   VSUBF,
4173   VMULF,
4174   VMINF,
4175   VMAXF,
4176   VRECPS,
4177   VRSQRTS,
4178   VCEQF,
4179   VCGEF,
4180   VCGTF
4181 };
4182 
EncodeNeonBinOp(FPBinOp op,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4183 static Instr EncodeNeonBinOp(FPBinOp op, QwNeonRegister dst,
4184                              QwNeonRegister src1, QwNeonRegister src2) {
4185   int op_encoding = 0;
4186   switch (op) {
4187     case VADDF:
4188       op_encoding = 0xD * B8;
4189       break;
4190     case VSUBF:
4191       op_encoding = B21 | 0xD * B8;
4192       break;
4193     case VMULF:
4194       op_encoding = B24 | 0xD * B8 | B4;
4195       break;
4196     case VMINF:
4197       op_encoding = B21 | 0xF * B8;
4198       break;
4199     case VMAXF:
4200       op_encoding = 0xF * B8;
4201       break;
4202     case VRECPS:
4203       op_encoding = 0xF * B8 | B4;
4204       break;
4205     case VRSQRTS:
4206       op_encoding = B21 | 0xF * B8 | B4;
4207       break;
4208     case VCEQF:
4209       op_encoding = 0xE * B8;
4210       break;
4211     case VCGEF:
4212       op_encoding = B24 | 0xE * B8;
4213       break;
4214     case VCGTF:
4215       op_encoding = B24 | B21 | 0xE * B8;
4216       break;
4217     default:
4218       UNREACHABLE();
4219   }
4220   int vd, d;
4221   dst.split_code(&vd, &d);
4222   int vn, n;
4223   src1.split_code(&vn, &n);
4224   int vm, m;
4225   src2.split_code(&vm, &m);
4226   return 0x1E4U * B23 | d * B22 | vn * B16 | vd * B12 | n * B7 | B6 | m * B5 |
4227          vm | op_encoding;
4228 }
4229 
4230 enum IntegerBinOp {
4231   VADD,
4232   VQADD,
4233   VSUB,
4234   VQSUB,
4235   VMUL,
4236   VMIN,
4237   VMAX,
4238   VTST,
4239   VCEQ,
4240   VCGE,
4241   VCGT,
4242   VRHADD
4243 };
4244 
EncodeNeonBinOp(IntegerBinOp op,NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4245 static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,
4246                              QwNeonRegister dst, QwNeonRegister src1,
4247                              QwNeonRegister src2) {
4248   int op_encoding = 0;
4249   switch (op) {
4250     case VADD:
4251       op_encoding = 0x8 * B8;
4252       break;
4253     case VQADD:
4254       op_encoding = B4;
4255       break;
4256     case VSUB:
4257       op_encoding = B24 | 0x8 * B8;
4258       break;
4259     case VQSUB:
4260       op_encoding = 0x2 * B8 | B4;
4261       break;
4262     case VMUL:
4263       op_encoding = 0x9 * B8 | B4;
4264       break;
4265     case VMIN:
4266       op_encoding = 0x6 * B8 | B4;
4267       break;
4268     case VMAX:
4269       op_encoding = 0x6 * B8;
4270       break;
4271     case VTST:
4272       op_encoding = 0x8 * B8 | B4;
4273       break;
4274     case VCEQ:
4275       op_encoding = B24 | 0x8 * B8 | B4;
4276       break;
4277     case VCGE:
4278       op_encoding = 0x3 * B8 | B4;
4279       break;
4280     case VCGT:
4281       op_encoding = 0x3 * B8;
4282       break;
4283     case VRHADD:
4284       op_encoding = B8;
4285       break;
4286     default:
4287       UNREACHABLE();
4288   }
4289   int vd, d;
4290   dst.split_code(&vd, &d);
4291   int vn, n;
4292   src1.split_code(&vn, &n);
4293   int vm, m;
4294   src2.split_code(&vm, &m);
4295   int size = NeonSz(dt);
4296   int u = NeonU(dt);
4297   return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 |
4298          n * B7 | B6 | m * B5 | vm | op_encoding;
4299 }
4300 
EncodeNeonBinOp(IntegerBinOp op,NeonSize size,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4301 static Instr EncodeNeonBinOp(IntegerBinOp op, NeonSize size, QwNeonRegister dst,
4302                              QwNeonRegister src1, QwNeonRegister src2) {
4303   // Map NeonSize values to the signed values in NeonDataType, so the U bit
4304   // will be 0.
4305   return EncodeNeonBinOp(op, static_cast<NeonDataType>(size), dst, src1, src2);
4306 }
4307 
vadd(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4308 void Assembler::vadd(QwNeonRegister dst, QwNeonRegister src1,
4309                      QwNeonRegister src2) {
4310   DCHECK(IsEnabled(NEON));
4311   // Qd = vadd(Qn, Qm) SIMD floating point addition.
4312   // Instruction details available in ARM DDI 0406C.b, A8-830.
4313   emit(EncodeNeonBinOp(VADDF, dst, src1, src2));
4314 }
4315 
vadd(NeonSize size,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4316 void Assembler::vadd(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4317                      QwNeonRegister src2) {
4318   DCHECK(IsEnabled(NEON));
4319   // Qd = vadd(Qn, Qm) SIMD integer addition.
4320   // Instruction details available in ARM DDI 0406C.b, A8-828.
4321   emit(EncodeNeonBinOp(VADD, size, dst, src1, src2));
4322 }
4323 
vqadd(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4324 void Assembler::vqadd(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4325                       QwNeonRegister src2) {
4326   DCHECK(IsEnabled(NEON));
4327   // Qd = vqadd(Qn, Qm) SIMD integer saturating addition.
4328   // Instruction details available in ARM DDI 0406C.b, A8-996.
4329   emit(EncodeNeonBinOp(VQADD, dt, dst, src1, src2));
4330 }
4331 
vsub(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4332 void Assembler::vsub(QwNeonRegister dst, QwNeonRegister src1,
4333                      QwNeonRegister src2) {
4334   DCHECK(IsEnabled(NEON));
4335   // Qd = vsub(Qn, Qm) SIMD floating point subtraction.
4336   // Instruction details available in ARM DDI 0406C.b, A8-1086.
4337   emit(EncodeNeonBinOp(VSUBF, dst, src1, src2));
4338 }
4339 
vsub(NeonSize size,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4340 void Assembler::vsub(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4341                      QwNeonRegister src2) {
4342   DCHECK(IsEnabled(NEON));
4343   // Qd = vsub(Qn, Qm) SIMD integer subtraction.
4344   // Instruction details available in ARM DDI 0406C.b, A8-1084.
4345   emit(EncodeNeonBinOp(VSUB, size, dst, src1, src2));
4346 }
4347 
vqsub(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4348 void Assembler::vqsub(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4349                       QwNeonRegister src2) {
4350   DCHECK(IsEnabled(NEON));
4351   // Qd = vqsub(Qn, Qm) SIMD integer saturating subtraction.
4352   // Instruction details available in ARM DDI 0406C.b, A8-1020.
4353   emit(EncodeNeonBinOp(VQSUB, dt, dst, src1, src2));
4354 }
4355 
vmlal(NeonDataType dt,QwNeonRegister dst,DwVfpRegister src1,DwVfpRegister src2)4356 void Assembler::vmlal(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src1,
4357                       DwVfpRegister src2) {
4358   DCHECK(IsEnabled(NEON));
4359   // Qd = vmlal(Dn, Dm) Vector Multiply Accumulate Long (integer)
4360   // Instruction details available in ARM DDI 0406C.b, A8-931.
4361   int vd, d;
4362   dst.split_code(&vd, &d);
4363   int vn, n;
4364   src1.split_code(&vn, &n);
4365   int vm, m;
4366   src2.split_code(&vm, &m);
4367   int size = NeonSz(dt);
4368   int u = NeonU(dt);
4369   if (!u) UNIMPLEMENTED();
4370   DCHECK_NE(size, 3);  // SEE "Related encodings"
4371   emit(0xFU * B28 | B25 | u * B24 | B23 | d * B22 | size * B20 | vn * B16 |
4372        vd * B12 | 0x8 * B8 | n * B7 | m * B5 | vm);
4373 }
4374 
vmul(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4375 void Assembler::vmul(QwNeonRegister dst, QwNeonRegister src1,
4376                      QwNeonRegister src2) {
4377   DCHECK(IsEnabled(NEON));
4378   // Qd = vadd(Qn, Qm) SIMD floating point multiply.
4379   // Instruction details available in ARM DDI 0406C.b, A8-958.
4380   emit(EncodeNeonBinOp(VMULF, dst, src1, src2));
4381 }
4382 
vmul(NeonSize size,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4383 void Assembler::vmul(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4384                      QwNeonRegister src2) {
4385   DCHECK(IsEnabled(NEON));
4386   // Qd = vadd(Qn, Qm) SIMD integer multiply.
4387   // Instruction details available in ARM DDI 0406C.b, A8-960.
4388   emit(EncodeNeonBinOp(VMUL, size, dst, src1, src2));
4389 }
4390 
vmull(NeonDataType dt,QwNeonRegister dst,DwVfpRegister src1,DwVfpRegister src2)4391 void Assembler::vmull(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src1,
4392                       DwVfpRegister src2) {
4393   DCHECK(IsEnabled(NEON));
4394   // Qd = vmull(Dn, Dm) Vector Multiply Long (integer).
4395   // Instruction details available in ARM DDI 0406C.b, A8-960.
4396   int vd, d;
4397   dst.split_code(&vd, &d);
4398   int vn, n;
4399   src1.split_code(&vn, &n);
4400   int vm, m;
4401   src2.split_code(&vm, &m);
4402   int size = NeonSz(dt);
4403   int u = NeonU(dt);
4404   emit(0xFU * B28 | B25 | u * B24 | B23 | d * B22 | size * B20 | vn * B16 |
4405        vd * B12 | 0xC * B8 | n * B7 | m * B5 | vm);
4406 }
4407 
vmin(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4408 void Assembler::vmin(QwNeonRegister dst, QwNeonRegister src1,
4409                      QwNeonRegister src2) {
4410   DCHECK(IsEnabled(NEON));
4411   // Qd = vmin(Qn, Qm) SIMD floating point MIN.
4412   // Instruction details available in ARM DDI 0406C.b, A8-928.
4413   emit(EncodeNeonBinOp(VMINF, dst, src1, src2));
4414 }
4415 
vmin(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4416 void Assembler::vmin(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4417                      QwNeonRegister src2) {
4418   DCHECK(IsEnabled(NEON));
4419   // Qd = vmin(Qn, Qm) SIMD integer MIN.
4420   // Instruction details available in ARM DDI 0406C.b, A8-926.
4421   emit(EncodeNeonBinOp(VMIN, dt, dst, src1, src2));
4422 }
4423 
vmax(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4424 void Assembler::vmax(QwNeonRegister dst, QwNeonRegister src1,
4425                      QwNeonRegister src2) {
4426   DCHECK(IsEnabled(NEON));
4427   // Qd = vmax(Qn, Qm) SIMD floating point MAX.
4428   // Instruction details available in ARM DDI 0406C.b, A8-928.
4429   emit(EncodeNeonBinOp(VMAXF, dst, src1, src2));
4430 }
4431 
vmax(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4432 void Assembler::vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4433                      QwNeonRegister src2) {
4434   DCHECK(IsEnabled(NEON));
4435   // Qd = vmax(Qn, Qm) SIMD integer MAX.
4436   // Instruction details available in ARM DDI 0406C.b, A8-926.
4437   emit(EncodeNeonBinOp(VMAX, dt, dst, src1, src2));
4438 }
4439 
4440 enum NeonShiftOp { VSHL, VSHR, VSLI, VSRI };
4441 
EncodeNeonShiftRegisterOp(NeonShiftOp op,NeonDataType dt,NeonRegType reg_type,int dst_code,int src_code,int shift_code)4442 static Instr EncodeNeonShiftRegisterOp(NeonShiftOp op, NeonDataType dt,
4443                                        NeonRegType reg_type, int dst_code,
4444                                        int src_code, int shift_code) {
4445   DCHECK_EQ(op, VSHL);
4446   int op_encoding = 0;
4447   int vd, d;
4448   NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
4449   int vm, m;
4450   NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
4451   int vn, n;
4452   NeonSplitCode(reg_type, shift_code, &vn, &n, &op_encoding);
4453   int size = NeonSz(dt);
4454   int u = NeonU(dt);
4455 
4456   return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 |
4457          0x4 * B8 | n * B7 | m * B5 | vm | op_encoding;
4458 }
4459 
EncodeNeonShiftOp(NeonShiftOp op,NeonSize size,bool is_unsigned,NeonRegType reg_type,int dst_code,int src_code,int shift)4460 static Instr EncodeNeonShiftOp(NeonShiftOp op, NeonSize size, bool is_unsigned,
4461                                NeonRegType reg_type, int dst_code, int src_code,
4462                                int shift) {
4463   int size_in_bits = kBitsPerByte << static_cast<int>(size);
4464   int op_encoding = 0, imm6 = 0, L = 0;
4465   switch (op) {
4466     case VSHL: {
4467       DCHECK(shift >= 0 && size_in_bits > shift);
4468       imm6 = size_in_bits + shift;
4469       op_encoding = 0x5 * B8;
4470       break;
4471     }
4472     case VSHR: {
4473       DCHECK(shift > 0 && size_in_bits >= shift);
4474       imm6 = 2 * size_in_bits - shift;
4475       if (is_unsigned) op_encoding |= B24;
4476       break;
4477     }
4478     case VSLI: {
4479       DCHECK(shift >= 0 && size_in_bits > shift);
4480       imm6 = size_in_bits + shift;
4481       op_encoding = B24 | 0x5 * B8;
4482       break;
4483     }
4484     case VSRI: {
4485       DCHECK(shift > 0 && size_in_bits >= shift);
4486       imm6 = 2 * size_in_bits - shift;
4487       op_encoding = B24 | 0x4 * B8;
4488       break;
4489     }
4490     default:
4491       UNREACHABLE();
4492   }
4493 
4494   L = imm6 >> 6;
4495   imm6 &= 0x3F;
4496 
4497   int vd, d;
4498   NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
4499   int vm, m;
4500   NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
4501 
4502   return 0x1E5U * B23 | d * B22 | imm6 * B16 | vd * B12 | L * B7 | m * B5 | B4 |
4503          vm | op_encoding;
4504 }
4505 
vshl(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src,int shift)4506 void Assembler::vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
4507                      int shift) {
4508   DCHECK(IsEnabled(NEON));
4509   // Qd = vshl(Qm, bits) SIMD shift left immediate.
4510   // Instruction details available in ARM DDI 0406C.b, A8-1046.
4511   emit(EncodeNeonShiftOp(VSHL, NeonDataTypeToSize(dt), false, NEON_Q,
4512                          dst.code(), src.code(), shift));
4513 }
4514 
vshl(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src,QwNeonRegister shift)4515 void Assembler::vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
4516                      QwNeonRegister shift) {
4517   DCHECK(IsEnabled(NEON));
4518   // Qd = vshl(Qm, Qn) SIMD shift left Register.
4519   // Instruction details available in ARM DDI 0487A.a, F8-3340..
4520   emit(EncodeNeonShiftRegisterOp(VSHL, dt, NEON_Q, dst.code(), src.code(),
4521                                  shift.code()));
4522 }
4523 
vshr(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src,int shift)4524 void Assembler::vshr(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
4525                      int shift) {
4526   DCHECK(IsEnabled(NEON));
4527   // Qd = vshl(Qm, bits) SIMD shift right immediate.
4528   // Instruction details available in ARM DDI 0406C.b, A8-1052.
4529   emit(EncodeNeonShiftOp(VSHR, NeonDataTypeToSize(dt), NeonU(dt), NEON_Q,
4530                          dst.code(), src.code(), shift));
4531 }
4532 
vsli(NeonSize size,DwVfpRegister dst,DwVfpRegister src,int shift)4533 void Assembler::vsli(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
4534                      int shift) {
4535   DCHECK(IsEnabled(NEON));
4536   // Dd = vsli(Dm, bits) SIMD shift left and insert.
4537   // Instruction details available in ARM DDI 0406C.b, A8-1056.
4538   emit(EncodeNeonShiftOp(VSLI, size, false, NEON_D, dst.code(), src.code(),
4539                          shift));
4540 }
4541 
vsri(NeonSize size,DwVfpRegister dst,DwVfpRegister src,int shift)4542 void Assembler::vsri(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
4543                      int shift) {
4544   DCHECK(IsEnabled(NEON));
4545   // Dd = vsri(Dm, bits) SIMD shift right and insert.
4546   // Instruction details available in ARM DDI 0406C.b, A8-1062.
4547   emit(EncodeNeonShiftOp(VSRI, size, false, NEON_D, dst.code(), src.code(),
4548                          shift));
4549 }
4550 
EncodeNeonEstimateOp(bool is_rsqrt,QwNeonRegister dst,QwNeonRegister src)4551 static Instr EncodeNeonEstimateOp(bool is_rsqrt, QwNeonRegister dst,
4552                                   QwNeonRegister src) {
4553   int vd, d;
4554   dst.split_code(&vd, &d);
4555   int vm, m;
4556   src.split_code(&vm, &m);
4557   int rsqrt = is_rsqrt ? 1 : 0;
4558   return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x5 * B8 |
4559          rsqrt * B7 | B6 | m * B5 | vm;
4560 }
4561 
vrecpe(QwNeonRegister dst,QwNeonRegister src)4562 void Assembler::vrecpe(QwNeonRegister dst, QwNeonRegister src) {
4563   DCHECK(IsEnabled(NEON));
4564   // Qd = vrecpe(Qm) SIMD reciprocal estimate.
4565   // Instruction details available in ARM DDI 0406C.b, A8-1024.
4566   emit(EncodeNeonEstimateOp(false, dst, src));
4567 }
4568 
vrsqrte(QwNeonRegister dst,QwNeonRegister src)4569 void Assembler::vrsqrte(QwNeonRegister dst, QwNeonRegister src) {
4570   DCHECK(IsEnabled(NEON));
4571   // Qd = vrsqrte(Qm) SIMD reciprocal square root estimate.
4572   // Instruction details available in ARM DDI 0406C.b, A8-1038.
4573   emit(EncodeNeonEstimateOp(true, dst, src));
4574 }
4575 
vrecps(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4576 void Assembler::vrecps(QwNeonRegister dst, QwNeonRegister src1,
4577                        QwNeonRegister src2) {
4578   DCHECK(IsEnabled(NEON));
4579   // Qd = vrecps(Qn, Qm) SIMD reciprocal refinement step.
4580   // Instruction details available in ARM DDI 0406C.b, A8-1026.
4581   emit(EncodeNeonBinOp(VRECPS, dst, src1, src2));
4582 }
4583 
vrsqrts(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4584 void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1,
4585                         QwNeonRegister src2) {
4586   DCHECK(IsEnabled(NEON));
4587   // Qd = vrsqrts(Qn, Qm) SIMD reciprocal square root refinement step.
4588   // Instruction details available in ARM DDI 0406C.b, A8-1040.
4589   emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2));
4590 }
4591 
4592 enum NeonPairwiseOp { VPADD, VPMIN, VPMAX };
4593 
EncodeNeonPairwiseOp(NeonPairwiseOp op,NeonDataType dt,DwVfpRegister dst,DwVfpRegister src1,DwVfpRegister src2)4594 static Instr EncodeNeonPairwiseOp(NeonPairwiseOp op, NeonDataType dt,
4595                                   DwVfpRegister dst, DwVfpRegister src1,
4596                                   DwVfpRegister src2) {
4597   int op_encoding = 0;
4598   switch (op) {
4599     case VPADD:
4600       op_encoding = 0xB * B8 | B4;
4601       break;
4602     case VPMIN:
4603       op_encoding = 0xA * B8 | B4;
4604       break;
4605     case VPMAX:
4606       op_encoding = 0xA * B8;
4607       break;
4608     default:
4609       UNREACHABLE();
4610   }
4611   int vd, d;
4612   dst.split_code(&vd, &d);
4613   int vn, n;
4614   src1.split_code(&vn, &n);
4615   int vm, m;
4616   src2.split_code(&vm, &m);
4617   int size = NeonSz(dt);
4618   int u = NeonU(dt);
4619   return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 |
4620          n * B7 | m * B5 | vm | op_encoding;
4621 }
4622 
vpadd(DwVfpRegister dst,DwVfpRegister src1,DwVfpRegister src2)4623 void Assembler::vpadd(DwVfpRegister dst, DwVfpRegister src1,
4624                       DwVfpRegister src2) {
4625   DCHECK(IsEnabled(NEON));
4626   // Dd = vpadd(Dn, Dm) SIMD integer pairwise ADD.
4627   // Instruction details available in ARM DDI 0406C.b, A8-982.
4628   int vd, d;
4629   dst.split_code(&vd, &d);
4630   int vn, n;
4631   src1.split_code(&vn, &n);
4632   int vm, m;
4633   src2.split_code(&vm, &m);
4634 
4635   emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 |
4636        m * B5 | vm);
4637 }
4638 
vpadd(NeonSize size,DwVfpRegister dst,DwVfpRegister src1,DwVfpRegister src2)4639 void Assembler::vpadd(NeonSize size, DwVfpRegister dst, DwVfpRegister src1,
4640                       DwVfpRegister src2) {
4641   DCHECK(IsEnabled(NEON));
4642   // Dd = vpadd(Dn, Dm) SIMD integer pairwise ADD.
4643   // Instruction details available in ARM DDI 0406C.b, A8-980.
4644   emit(EncodeNeonPairwiseOp(VPADD, NeonSizeToDataType(size), dst, src1, src2));
4645 }
4646 
vpmin(NeonDataType dt,DwVfpRegister dst,DwVfpRegister src1,DwVfpRegister src2)4647 void Assembler::vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
4648                       DwVfpRegister src2) {
4649   DCHECK(IsEnabled(NEON));
4650   // Dd = vpmin(Dn, Dm) SIMD integer pairwise MIN.
4651   // Instruction details available in ARM DDI 0406C.b, A8-986.
4652   emit(EncodeNeonPairwiseOp(VPMIN, dt, dst, src1, src2));
4653 }
4654 
vpmax(NeonDataType dt,DwVfpRegister dst,DwVfpRegister src1,DwVfpRegister src2)4655 void Assembler::vpmax(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
4656                       DwVfpRegister src2) {
4657   DCHECK(IsEnabled(NEON));
4658   // Dd = vpmax(Dn, Dm) SIMD integer pairwise MAX.
4659   // Instruction details available in ARM DDI 0406C.b, A8-986.
4660   emit(EncodeNeonPairwiseOp(VPMAX, dt, dst, src1, src2));
4661 }
4662 
vrintm(NeonDataType dt,const QwNeonRegister dst,const QwNeonRegister src)4663 void Assembler::vrintm(NeonDataType dt, const QwNeonRegister dst,
4664                        const QwNeonRegister src) {
4665   // SIMD vector round floating-point to integer towards -Infinity.
4666   // See ARM DDI 0487F.b, F6-5493.
4667   DCHECK(IsEnabled(ARMv8));
4668   emit(EncodeNeonUnaryOp(VRINTM, NEON_Q, NeonSize(dt), dst.code(), src.code()));
4669 }
4670 
vrintn(NeonDataType dt,const QwNeonRegister dst,const QwNeonRegister src)4671 void Assembler::vrintn(NeonDataType dt, const QwNeonRegister dst,
4672                        const QwNeonRegister src) {
4673   // SIMD vector round floating-point to integer to Nearest.
4674   // See ARM DDI 0487F.b, F6-5497.
4675   DCHECK(IsEnabled(ARMv8));
4676   emit(EncodeNeonUnaryOp(VRINTN, NEON_Q, NeonSize(dt), dst.code(), src.code()));
4677 }
4678 
vrintp(NeonDataType dt,const QwNeonRegister dst,const QwNeonRegister src)4679 void Assembler::vrintp(NeonDataType dt, const QwNeonRegister dst,
4680                        const QwNeonRegister src) {
4681   // SIMD vector round floating-point to integer towards +Infinity.
4682   // See ARM DDI 0487F.b, F6-5501.
4683   DCHECK(IsEnabled(ARMv8));
4684   emit(EncodeNeonUnaryOp(VRINTP, NEON_Q, NeonSize(dt), dst.code(), src.code()));
4685 }
4686 
vrintz(NeonDataType dt,const QwNeonRegister dst,const QwNeonRegister src)4687 void Assembler::vrintz(NeonDataType dt, const QwNeonRegister dst,
4688                        const QwNeonRegister src) {
4689   // SIMD vector round floating-point to integer towards Zero.
4690   // See ARM DDI 0487F.b, F6-5511.
4691   DCHECK(IsEnabled(ARMv8));
4692   emit(EncodeNeonUnaryOp(VRINTZ, NEON_Q, NeonSize(dt), dst.code(), src.code()));
4693 }
4694 
vtst(NeonSize size,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4695 void Assembler::vtst(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4696                      QwNeonRegister src2) {
4697   DCHECK(IsEnabled(NEON));
4698   // Qd = vtst(Qn, Qm) SIMD test integer operands.
4699   // Instruction details available in ARM DDI 0406C.b, A8-1098.
4700   emit(EncodeNeonBinOp(VTST, size, dst, src1, src2));
4701 }
4702 
vceq(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4703 void Assembler::vceq(QwNeonRegister dst, QwNeonRegister src1,
4704                      QwNeonRegister src2) {
4705   DCHECK(IsEnabled(NEON));
4706   // Qd = vceq(Qn, Qm) SIMD floating point compare equal.
4707   // Instruction details available in ARM DDI 0406C.b, A8-844.
4708   emit(EncodeNeonBinOp(VCEQF, dst, src1, src2));
4709 }
4710 
vceq(NeonSize size,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4711 void Assembler::vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
4712                      QwNeonRegister src2) {
4713   DCHECK(IsEnabled(NEON));
4714   // Qd = vceq(Qn, Qm) SIMD integer compare equal.
4715   // Instruction details available in ARM DDI 0406C.b, A8-844.
4716   emit(EncodeNeonBinOp(VCEQ, size, dst, src1, src2));
4717 }
4718 
vcge(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4719 void Assembler::vcge(QwNeonRegister dst, QwNeonRegister src1,
4720                      QwNeonRegister src2) {
4721   DCHECK(IsEnabled(NEON));
4722   // Qd = vcge(Qn, Qm) SIMD floating point compare greater or equal.
4723   // Instruction details available in ARM DDI 0406C.b, A8-848.
4724   emit(EncodeNeonBinOp(VCGEF, dst, src1, src2));
4725 }
4726 
vcge(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4727 void Assembler::vcge(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4728                      QwNeonRegister src2) {
4729   DCHECK(IsEnabled(NEON));
4730   // Qd = vcge(Qn, Qm) SIMD integer compare greater or equal.
4731   // Instruction details available in ARM DDI 0406C.b, A8-848.
4732   emit(EncodeNeonBinOp(VCGE, dt, dst, src1, src2));
4733 }
4734 
vcgt(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4735 void Assembler::vcgt(QwNeonRegister dst, QwNeonRegister src1,
4736                      QwNeonRegister src2) {
4737   DCHECK(IsEnabled(NEON));
4738   // Qd = vcgt(Qn, Qm) SIMD floating point compare greater than.
4739   // Instruction details available in ARM DDI 0406C.b, A8-852.
4740   emit(EncodeNeonBinOp(VCGTF, dst, src1, src2));
4741 }
4742 
vcgt(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4743 void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4744                      QwNeonRegister src2) {
4745   DCHECK(IsEnabled(NEON));
4746   // Qd = vcgt(Qn, Qm) SIMD integer compare greater than.
4747   // Instruction details available in ARM DDI 0406C.b, A8-852.
4748   emit(EncodeNeonBinOp(VCGT, dt, dst, src1, src2));
4749 }
4750 
vrhadd(NeonDataType dt,QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2)4751 void Assembler::vrhadd(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4752                        QwNeonRegister src2) {
4753   DCHECK(IsEnabled(NEON));
4754   // Qd = vrhadd(Qn, Qm) SIMD integer rounding halving add.
4755   // Instruction details available in ARM DDI 0406C.b, A8-1030.
4756   emit(EncodeNeonBinOp(VRHADD, dt, dst, src1, src2));
4757 }
4758 
vext(QwNeonRegister dst,QwNeonRegister src1,QwNeonRegister src2,int bytes)4759 void Assembler::vext(QwNeonRegister dst, QwNeonRegister src1,
4760                      QwNeonRegister src2, int bytes) {
4761   DCHECK(IsEnabled(NEON));
4762   // Qd = vext(Qn, Qm) SIMD byte extract.
4763   // Instruction details available in ARM DDI 0406C.b, A8-890.
4764   int vd, d;
4765   dst.split_code(&vd, &d);
4766   int vn, n;
4767   src1.split_code(&vn, &n);
4768   int vm, m;
4769   src2.split_code(&vm, &m);
4770   DCHECK_GT(16, bytes);
4771   emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 |
4772        n * B7 | B6 | m * B5 | vm);
4773 }
4774 
4775 enum NeonSizedOp { VZIP, VUZP, VREV16, VREV32, VREV64, VTRN };
4776 
EncodeNeonSizedOp(NeonSizedOp op,NeonRegType reg_type,NeonSize size,int dst_code,int src_code)4777 static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonRegType reg_type,
4778                                NeonSize size, int dst_code, int src_code) {
4779   int op_encoding = 0;
4780   switch (op) {
4781     case VZIP:
4782       op_encoding = 0x2 * B16 | 0x3 * B7;
4783       break;
4784     case VUZP:
4785       op_encoding = 0x2 * B16 | 0x2 * B7;
4786       break;
4787     case VREV16:
4788       op_encoding = 0x2 * B7;
4789       break;
4790     case VREV32:
4791       op_encoding = 0x1 * B7;
4792       break;
4793     case VREV64:
4794       // op_encoding is 0;
4795       break;
4796     case VTRN:
4797       op_encoding = 0x2 * B16 | B7;
4798       break;
4799     default:
4800       UNREACHABLE();
4801   }
4802   int vd, d;
4803   NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
4804   int vm, m;
4805   NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
4806 
4807   int sz = static_cast<int>(size);
4808   return 0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | m * B5 |
4809          vm | op_encoding;
4810 }
4811 
vzip(NeonSize size,DwVfpRegister src1,DwVfpRegister src2)4812 void Assembler::vzip(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
4813   if (size == Neon32) {  // vzip.32 Dd, Dm is a pseudo-op for vtrn.32 Dd, Dm.
4814     vtrn(size, src1, src2);
4815   } else {
4816     DCHECK(IsEnabled(NEON));
4817     // vzip.<size>(Dn, Dm) SIMD zip (interleave).
4818     // Instruction details available in ARM DDI 0406C.b, A8-1102.
4819     emit(EncodeNeonSizedOp(VZIP, NEON_D, size, src1.code(), src2.code()));
4820   }
4821 }
4822 
vzip(NeonSize size,QwNeonRegister src1,QwNeonRegister src2)4823 void Assembler::vzip(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
4824   DCHECK(IsEnabled(NEON));
4825   // vzip.<size>(Qn, Qm) SIMD zip (interleave).
4826   // Instruction details available in ARM DDI 0406C.b, A8-1102.
4827   emit(EncodeNeonSizedOp(VZIP, NEON_Q, size, src1.code(), src2.code()));
4828 }
4829 
vuzp(NeonSize size,DwVfpRegister src1,DwVfpRegister src2)4830 void Assembler::vuzp(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
4831   if (size == Neon32) {  // vuzp.32 Dd, Dm is a pseudo-op for vtrn.32 Dd, Dm.
4832     vtrn(size, src1, src2);
4833   } else {
4834     DCHECK(IsEnabled(NEON));
4835     // vuzp.<size>(Dn, Dm) SIMD un-zip (de-interleave).
4836     // Instruction details available in ARM DDI 0406C.b, A8-1100.
4837     emit(EncodeNeonSizedOp(VUZP, NEON_D, size, src1.code(), src2.code()));
4838   }
4839 }
4840 
vuzp(NeonSize size,QwNeonRegister src1,QwNeonRegister src2)4841 void Assembler::vuzp(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
4842   DCHECK(IsEnabled(NEON));
4843   // vuzp.<size>(Qn, Qm) SIMD un-zip (de-interleave).
4844   // Instruction details available in ARM DDI 0406C.b, A8-1100.
4845   emit(EncodeNeonSizedOp(VUZP, NEON_Q, size, src1.code(), src2.code()));
4846 }
4847 
vrev16(NeonSize size,QwNeonRegister dst,QwNeonRegister src)4848 void Assembler::vrev16(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
4849   DCHECK(IsEnabled(NEON));
4850   // Qd = vrev16.<size>(Qm) SIMD element reverse.
4851   // Instruction details available in ARM DDI 0406C.b, A8-1028.
4852   emit(EncodeNeonSizedOp(VREV16, NEON_Q, size, dst.code(), src.code()));
4853 }
4854 
vrev32(NeonSize size,QwNeonRegister dst,QwNeonRegister src)4855 void Assembler::vrev32(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
4856   DCHECK(IsEnabled(NEON));
4857   // Qd = vrev32.<size>(Qm) SIMD element reverse.
4858   // Instruction details available in ARM DDI 0406C.b, A8-1028.
4859   emit(EncodeNeonSizedOp(VREV32, NEON_Q, size, dst.code(), src.code()));
4860 }
4861 
vrev64(NeonSize size,QwNeonRegister dst,QwNeonRegister src)4862 void Assembler::vrev64(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
4863   DCHECK(IsEnabled(NEON));
4864   // Qd = vrev64.<size>(Qm) SIMD element reverse.
4865   // Instruction details available in ARM DDI 0406C.b, A8-1028.
4866   emit(EncodeNeonSizedOp(VREV64, NEON_Q, size, dst.code(), src.code()));
4867 }
4868 
vtrn(NeonSize size,DwVfpRegister src1,DwVfpRegister src2)4869 void Assembler::vtrn(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
4870   DCHECK(IsEnabled(NEON));
4871   // vtrn.<size>(Dn, Dm) SIMD element transpose.
4872   // Instruction details available in ARM DDI 0406C.b, A8-1096.
4873   emit(EncodeNeonSizedOp(VTRN, NEON_D, size, src1.code(), src2.code()));
4874 }
4875 
vtrn(NeonSize size,QwNeonRegister src1,QwNeonRegister src2)4876 void Assembler::vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
4877   DCHECK(IsEnabled(NEON));
4878   // vtrn.<size>(Qn, Qm) SIMD element transpose.
4879   // Instruction details available in ARM DDI 0406C.b, A8-1096.
4880   emit(EncodeNeonSizedOp(VTRN, NEON_Q, size, src1.code(), src2.code()));
4881 }
4882 
4883 // Encode NEON vtbl / vtbx instruction.
EncodeNeonVTB(DwVfpRegister dst,const NeonListOperand & list,DwVfpRegister index,bool vtbx)4884 static Instr EncodeNeonVTB(DwVfpRegister dst, const NeonListOperand& list,
4885                            DwVfpRegister index, bool vtbx) {
4886   // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices.
4887   // Instruction details available in ARM DDI 0406C.b, A8-1094.
4888   // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices.
4889   // Instruction details available in ARM DDI 0406C.b, A8-1094.
4890   int vd, d;
4891   dst.split_code(&vd, &d);
4892   int vn, n;
4893   list.base().split_code(&vn, &n);
4894   int vm, m;
4895   index.split_code(&vm, &m);
4896   int op = vtbx ? 1 : 0;  // vtbl = 0, vtbx = 1.
4897   return 0x1E7U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 |
4898          list.length() * B8 | n * B7 | op * B6 | m * B5 | vm;
4899 }
4900 
vtbl(DwVfpRegister dst,const NeonListOperand & list,DwVfpRegister index)4901 void Assembler::vtbl(DwVfpRegister dst, const NeonListOperand& list,
4902                      DwVfpRegister index) {
4903   DCHECK(IsEnabled(NEON));
4904   emit(EncodeNeonVTB(dst, list, index, false));
4905 }
4906 
vtbx(DwVfpRegister dst,const NeonListOperand & list,DwVfpRegister index)4907 void Assembler::vtbx(DwVfpRegister dst, const NeonListOperand& list,
4908                      DwVfpRegister index) {
4909   DCHECK(IsEnabled(NEON));
4910   emit(EncodeNeonVTB(dst, list, index, true));
4911 }
4912 
4913 // Pseudo instructions.
nop(int type)4914 void Assembler::nop(int type) {
4915   // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes
4916   // some of the CPU's pipeline and has to issue. Older ARM chips simply used
4917   // MOV Rx, Rx as NOP and it performs better even in newer CPUs.
4918   // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode
4919   // a type.
4920   DCHECK(0 <= type && type <= 14);  // mov pc, pc isn't a nop.
4921   emit(al | 13 * B21 | type * B12 | type);
4922 }
4923 
pop()4924 void Assembler::pop() { add(sp, sp, Operand(kPointerSize)); }
4925 
IsMovT(Instr instr)4926 bool Assembler::IsMovT(Instr instr) {
4927   instr &= ~(((kNumberOfConditions - 1) << 28) |  // Mask off conditions
4928              ((kNumRegisters - 1) * B12) |        // mask out register
4929              EncodeMovwImmediate(0xFFFF));        // mask out immediate value
4930   return instr == kMovtPattern;
4931 }
4932 
IsMovW(Instr instr)4933 bool Assembler::IsMovW(Instr instr) {
4934   instr &= ~(((kNumberOfConditions - 1) << 28) |  // Mask off conditions
4935              ((kNumRegisters - 1) * B12) |        // mask out destination
4936              EncodeMovwImmediate(0xFFFF));        // mask out immediate value
4937   return instr == kMovwPattern;
4938 }
4939 
GetMovTPattern()4940 Instr Assembler::GetMovTPattern() { return kMovtPattern; }
4941 
GetMovWPattern()4942 Instr Assembler::GetMovWPattern() { return kMovwPattern; }
4943 
EncodeMovwImmediate(uint32_t immediate)4944 Instr Assembler::EncodeMovwImmediate(uint32_t immediate) {
4945   DCHECK_LT(immediate, 0x10000);
4946   return ((immediate & 0xF000) << 4) | (immediate & 0xFFF);
4947 }
4948 
PatchMovwImmediate(Instr instruction,uint32_t immediate)4949 Instr Assembler::PatchMovwImmediate(Instr instruction, uint32_t immediate) {
4950   instruction &= ~EncodeMovwImmediate(0xFFFF);
4951   return instruction | EncodeMovwImmediate(immediate);
4952 }
4953 
DecodeShiftImm(Instr instr)4954 int Assembler::DecodeShiftImm(Instr instr) {
4955   int rotate = Instruction::RotateValue(instr) * 2;
4956   int immed8 = Instruction::Immed8Value(instr);
4957   return base::bits::RotateRight32(immed8, rotate);
4958 }
4959 
PatchShiftImm(Instr instr,int immed)4960 Instr Assembler::PatchShiftImm(Instr instr, int immed) {
4961   uint32_t rotate_imm = 0;
4962   uint32_t immed_8 = 0;
4963   bool immed_fits = FitsShifter(immed, &rotate_imm, &immed_8, nullptr);
4964   DCHECK(immed_fits);
4965   USE(immed_fits);
4966   return (instr & ~kOff12Mask) | (rotate_imm << 8) | immed_8;
4967 }
4968 
IsNop(Instr instr,int type)4969 bool Assembler::IsNop(Instr instr, int type) {
4970   DCHECK(0 <= type && type <= 14);  // mov pc, pc isn't a nop.
4971   // Check for mov rx, rx where x = type.
4972   return instr == (al | 13 * B21 | type * B12 | type);
4973 }
4974 
IsMovImmed(Instr instr)4975 bool Assembler::IsMovImmed(Instr instr) {
4976   return (instr & kMovImmedMask) == kMovImmedPattern;
4977 }
4978 
IsOrrImmed(Instr instr)4979 bool Assembler::IsOrrImmed(Instr instr) {
4980   return (instr & kOrrImmedMask) == kOrrImmedPattern;
4981 }
4982 
4983 // static
ImmediateFitsAddrMode1Instruction(int32_t imm32)4984 bool Assembler::ImmediateFitsAddrMode1Instruction(int32_t imm32) {
4985   uint32_t dummy1;
4986   uint32_t dummy2;
4987   return FitsShifter(imm32, &dummy1, &dummy2, nullptr);
4988 }
4989 
ImmediateFitsAddrMode2Instruction(int32_t imm32)4990 bool Assembler::ImmediateFitsAddrMode2Instruction(int32_t imm32) {
4991   return is_uint12(abs(imm32));
4992 }
4993 
4994 // Debugging.
RecordConstPool(int size)4995 void Assembler::RecordConstPool(int size) {
4996   // We only need this for debugger support, to correctly compute offsets in the
4997   // code.
4998   RecordRelocInfo(RelocInfo::CONST_POOL, static_cast<intptr_t>(size));
4999 }
5000 
GrowBuffer()5001 void Assembler::GrowBuffer() {
5002   DCHECK_EQ(buffer_start_, buffer_->start());
5003 
5004   // Compute new buffer size.
5005   int old_size = buffer_->size();
5006   int new_size = std::min(2 * old_size, old_size + 1 * MB);
5007 
5008   // Some internal data structures overflow for very large buffers,
5009   // they must ensure that kMaximalBufferSize is not too large.
5010   if (new_size > kMaximalBufferSize) {
5011     V8::FatalProcessOutOfMemory(nullptr, "Assembler::GrowBuffer");
5012   }
5013 
5014   // Set up new buffer.
5015   std::unique_ptr<AssemblerBuffer> new_buffer = buffer_->Grow(new_size);
5016   DCHECK_EQ(new_size, new_buffer->size());
5017   byte* new_start = new_buffer->start();
5018 
5019   // Copy the data.
5020   int pc_delta = new_start - buffer_start_;
5021   int rc_delta = (new_start + new_size) - (buffer_start_ + old_size);
5022   size_t reloc_size = (buffer_start_ + old_size) - reloc_info_writer.pos();
5023   MemMove(new_start, buffer_start_, pc_offset());
5024   byte* new_reloc_start = reinterpret_cast<byte*>(
5025       reinterpret_cast<Address>(reloc_info_writer.pos()) + rc_delta);
5026   MemMove(new_reloc_start, reloc_info_writer.pos(), reloc_size);
5027 
5028   // Switch buffers.
5029   buffer_ = std::move(new_buffer);
5030   buffer_start_ = new_start;
5031   pc_ = reinterpret_cast<byte*>(reinterpret_cast<Address>(pc_) + pc_delta);
5032   byte* new_last_pc = reinterpret_cast<byte*>(
5033       reinterpret_cast<Address>(reloc_info_writer.last_pc()) + pc_delta);
5034   reloc_info_writer.Reposition(new_reloc_start, new_last_pc);
5035 
5036   // None of our relocation types are pc relative pointing outside the code
5037   // buffer nor pc absolute pointing inside the code buffer, so there is no need
5038   // to relocate any emitted relocation entries.
5039 }
5040 
db(uint8_t data)5041 void Assembler::db(uint8_t data) {
5042   // db is used to write raw data. The constant pool should be emitted or
5043   // blocked before using db.
5044   DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty());
5045   CheckBuffer();
5046   *reinterpret_cast<uint8_t*>(pc_) = data;
5047   pc_ += sizeof(uint8_t);
5048 }
5049 
dd(uint32_t data)5050 void Assembler::dd(uint32_t data) {
5051   // dd is used to write raw data. The constant pool should be emitted or
5052   // blocked before using dd.
5053   DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty());
5054   CheckBuffer();
5055   base::WriteUnalignedValue(reinterpret_cast<Address>(pc_), data);
5056   pc_ += sizeof(uint32_t);
5057 }
5058 
dq(uint64_t value)5059 void Assembler::dq(uint64_t value) {
5060   // dq is used to write raw data. The constant pool should be emitted or
5061   // blocked before using dq.
5062   DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty());
5063   CheckBuffer();
5064   base::WriteUnalignedValue(reinterpret_cast<Address>(pc_), value);
5065   pc_ += sizeof(uint64_t);
5066 }
5067 
RecordRelocInfo(RelocInfo::Mode rmode,intptr_t data)5068 void Assembler::RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data) {
5069   if (!ShouldRecordRelocInfo(rmode)) return;
5070   DCHECK_GE(buffer_space(), kMaxRelocSize);  // too late to grow buffer here
5071   RelocInfo rinfo(reinterpret_cast<Address>(pc_), rmode, data, Code());
5072   reloc_info_writer.Write(&rinfo);
5073 }
5074 
ConstantPoolAddEntry(int position,RelocInfo::Mode rmode,intptr_t value)5075 void Assembler::ConstantPoolAddEntry(int position, RelocInfo::Mode rmode,
5076                                      intptr_t value) {
5077   DCHECK(rmode != RelocInfo::CONST_POOL);
5078   // We can share CODE_TARGETs and embedded objects, but we must make sure we
5079   // only emit one reloc info for them (thus delta patching will apply the delta
5080   // only once). At the moment, we do not deduplicate heap object request which
5081   // are indicated by value == 0.
5082   bool sharing_ok = RelocInfo::IsShareableRelocMode(rmode) ||
5083                     (rmode == RelocInfo::CODE_TARGET && value != 0) ||
5084                     (RelocInfo::IsEmbeddedObjectMode(rmode) && value != 0);
5085   DCHECK_LT(pending_32_bit_constants_.size(), kMaxNumPending32Constants);
5086   if (pending_32_bit_constants_.empty()) {
5087     first_const_pool_32_use_ = position;
5088   }
5089   ConstantPoolEntry entry(position, value, sharing_ok, rmode);
5090 
5091   bool shared = false;
5092   if (sharing_ok) {
5093     // Merge the constant, if possible.
5094     for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) {
5095       ConstantPoolEntry& current_entry = pending_32_bit_constants_[i];
5096       if (!current_entry.sharing_ok()) continue;
5097       if (entry.value() == current_entry.value() &&
5098           entry.rmode() == current_entry.rmode()) {
5099         entry.set_merged_index(i);
5100         shared = true;
5101         break;
5102       }
5103     }
5104   }
5105 
5106   pending_32_bit_constants_.push_back(entry);
5107 
5108   // Make sure the constant pool is not emitted in place of the next
5109   // instruction for which we just recorded relocation info.
5110   BlockConstPoolFor(1);
5111 
5112   // Emit relocation info.
5113   if (MustOutputRelocInfo(rmode, this) && !shared) {
5114     RecordRelocInfo(rmode);
5115   }
5116 }
5117 
BlockConstPoolFor(int instructions)5118 void Assembler::BlockConstPoolFor(int instructions) {
5119   int pc_limit = pc_offset() + instructions * kInstrSize;
5120   if (no_const_pool_before_ < pc_limit) {
5121     // Max pool start (if we need a jump and an alignment).
5122 #ifdef DEBUG
5123     int start = pc_limit + kInstrSize + 2 * kPointerSize;
5124     DCHECK(pending_32_bit_constants_.empty() ||
5125            (start < first_const_pool_32_use_ + kMaxDistToIntPool));
5126 #endif
5127     no_const_pool_before_ = pc_limit;
5128   }
5129 
5130   if (next_buffer_check_ < no_const_pool_before_) {
5131     next_buffer_check_ = no_const_pool_before_;
5132   }
5133 }
5134 
CheckConstPool(bool force_emit,bool require_jump)5135 void Assembler::CheckConstPool(bool force_emit, bool require_jump) {
5136   // Some short sequence of instruction mustn't be broken up by constant pool
5137   // emission, such sequences are protected by calls to BlockConstPoolFor and
5138   // BlockConstPoolScope.
5139   if (is_const_pool_blocked()) {
5140     // Something is wrong if emission is forced and blocked at the same time.
5141     DCHECK(!force_emit);
5142     return;
5143   }
5144 
5145   // There is nothing to do if there are no pending constant pool entries.
5146   if (pending_32_bit_constants_.empty()) {
5147     // Calculate the offset of the next check.
5148     next_buffer_check_ = pc_offset() + kCheckPoolInterval;
5149     return;
5150   }
5151 
5152   // Check that the code buffer is large enough before emitting the constant
5153   // pool (include the jump over the pool and the constant pool marker and
5154   // the gap to the relocation information).
5155   int jump_instr = require_jump ? kInstrSize : 0;
5156   int size_up_to_marker = jump_instr + kInstrSize;
5157   int estimated_size_after_marker =
5158       pending_32_bit_constants_.size() * kPointerSize;
5159   int estimated_size = size_up_to_marker + estimated_size_after_marker;
5160 
5161   // We emit a constant pool when:
5162   //  * requested to do so by parameter force_emit (e.g. after each function).
5163   //  * the distance from the first instruction accessing the constant pool to
5164   //    any of the constant pool entries will exceed its limit the next
5165   //    time the pool is checked. This is overly restrictive, but we don't emit
5166   //    constant pool entries in-order so it's conservatively correct.
5167   //  * the instruction doesn't require a jump after itself to jump over the
5168   //    constant pool, and we're getting close to running out of range.
5169   if (!force_emit) {
5170     DCHECK(!pending_32_bit_constants_.empty());
5171     bool need_emit = false;
5172     int dist32 = pc_offset() + estimated_size - first_const_pool_32_use_;
5173     if ((dist32 >= kMaxDistToIntPool - kCheckPoolInterval) ||
5174         (!require_jump && (dist32 >= kMaxDistToIntPool / 2))) {
5175       need_emit = true;
5176     }
5177     if (!need_emit) return;
5178   }
5179 
5180   // Deduplicate constants.
5181   int size_after_marker = estimated_size_after_marker;
5182 
5183   for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) {
5184     ConstantPoolEntry& entry = pending_32_bit_constants_[i];
5185     if (entry.is_merged()) size_after_marker -= kPointerSize;
5186   }
5187 
5188   int size = size_up_to_marker + size_after_marker;
5189 
5190   int needed_space = size + kGap;
5191   while (buffer_space() <= needed_space) GrowBuffer();
5192 
5193   {
5194     // Block recursive calls to CheckConstPool.
5195     BlockConstPoolScope block_const_pool(this);
5196     RecordComment("[ Constant Pool");
5197     RecordConstPool(size);
5198 
5199     Label size_check;
5200     bind(&size_check);
5201 
5202     // Emit jump over constant pool if necessary.
5203     Label after_pool;
5204     if (require_jump) {
5205       b(&after_pool);
5206     }
5207 
5208     // Put down constant pool marker "Undefined instruction".
5209     // The data size helps disassembly know what to print.
5210     emit(kConstantPoolMarker |
5211          EncodeConstantPoolLength(size_after_marker / kPointerSize));
5212 
5213     // Emit 32-bit constant pool entries.
5214     for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) {
5215       ConstantPoolEntry& entry = pending_32_bit_constants_[i];
5216       Instr instr = instr_at(entry.position());
5217 
5218       // 64-bit loads shouldn't get here.
5219       DCHECK(!IsVldrDPcImmediateOffset(instr));
5220       DCHECK(!IsMovW(instr));
5221       DCHECK(IsLdrPcImmediateOffset(instr) &&
5222              GetLdrRegisterImmediateOffset(instr) == 0);
5223 
5224       int delta = pc_offset() - entry.position() - Instruction::kPcLoadDelta;
5225       DCHECK(is_uint12(delta));
5226       // 0 is the smallest delta:
5227       //   ldr rd, [pc, #0]
5228       //   constant pool marker
5229       //   data
5230 
5231       if (entry.is_merged()) {
5232         DCHECK(entry.sharing_ok());
5233         ConstantPoolEntry& merged =
5234             pending_32_bit_constants_[entry.merged_index()];
5235         DCHECK(entry.value() == merged.value());
5236         Instr merged_instr = instr_at(merged.position());
5237         DCHECK(IsLdrPcImmediateOffset(merged_instr));
5238         delta = GetLdrRegisterImmediateOffset(merged_instr);
5239         delta += merged.position() - entry.position();
5240       }
5241       instr_at_put(entry.position(),
5242                    SetLdrRegisterImmediateOffset(instr, delta));
5243       if (!entry.is_merged()) {
5244         emit(entry.value());
5245       }
5246     }
5247 
5248     pending_32_bit_constants_.clear();
5249 
5250     first_const_pool_32_use_ = -1;
5251 
5252     RecordComment("]");
5253 
5254     DCHECK_EQ(size, SizeOfCodeGeneratedSince(&size_check));
5255 
5256     if (after_pool.is_linked()) {
5257       bind(&after_pool);
5258     }
5259   }
5260 
5261   // Since a constant pool was just emitted, move the check offset forward by
5262   // the standard interval.
5263   next_buffer_check_ = pc_offset() + kCheckPoolInterval;
5264 }
5265 
PatchingAssembler(const AssemblerOptions & options,byte * address,int instructions)5266 PatchingAssembler::PatchingAssembler(const AssemblerOptions& options,
5267                                      byte* address, int instructions)
5268     : Assembler(options, ExternalAssemblerBuffer(
5269                              address, instructions * kInstrSize + kGap)) {
5270   DCHECK_EQ(reloc_info_writer.pos(), buffer_start_ + buffer_->size());
5271 }
5272 
~PatchingAssembler()5273 PatchingAssembler::~PatchingAssembler() {
5274   // Check that we don't have any pending constant pools.
5275   DCHECK(pending_32_bit_constants_.empty());
5276 
5277   // Check that the code was patched as expected.
5278   DCHECK_EQ(pc_, buffer_start_ + buffer_->size() - kGap);
5279   DCHECK_EQ(reloc_info_writer.pos(), buffer_start_ + buffer_->size());
5280 }
5281 
Emit(Address addr)5282 void PatchingAssembler::Emit(Address addr) { emit(static_cast<Instr>(addr)); }
5283 
PadWithNops()5284 void PatchingAssembler::PadWithNops() {
5285   DCHECK_LE(pc_, buffer_start_ + buffer_->size() - kGap);
5286   while (pc_ < buffer_start_ + buffer_->size() - kGap) {
5287     nop();
5288   }
5289 }
5290 
UseScratchRegisterScope(Assembler * assembler)5291 UseScratchRegisterScope::UseScratchRegisterScope(Assembler* assembler)
5292     : assembler_(assembler),
5293       old_available_(*assembler->GetScratchRegisterList()),
5294       old_available_vfp_(*assembler->GetScratchVfpRegisterList()) {}
5295 
~UseScratchRegisterScope()5296 UseScratchRegisterScope::~UseScratchRegisterScope() {
5297   *assembler_->GetScratchRegisterList() = old_available_;
5298   *assembler_->GetScratchVfpRegisterList() = old_available_vfp_;
5299 }
5300 
Acquire()5301 Register UseScratchRegisterScope::Acquire() {
5302   RegList* available = assembler_->GetScratchRegisterList();
5303   DCHECK_NOT_NULL(available);
5304   DCHECK_NE(*available, 0);
5305   int index = static_cast<int>(base::bits::CountTrailingZeros32(*available));
5306   Register reg = Register::from_code(index);
5307   *available &= ~reg.bit();
5308   return reg;
5309 }
5310 
5311 }  // namespace internal
5312 }  // namespace v8
5313 
5314 #endif  // V8_TARGET_ARCH_ARM
5315