• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <iostream>
18 #include <type_traits>
19 
20 #include "assembler_arm_vixl.h"
21 #include "base/bit_utils.h"
22 #include "base/bit_utils_iterator.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "thread.h"
25 
26 using namespace vixl::aarch32;  // NOLINT(build/namespaces)
27 
28 using vixl::ExactAssemblyScope;
29 using vixl::CodeBufferCheckScope;
30 
31 namespace art {
32 namespace arm {
33 
34 #ifdef ___
35 #error "ARM Assembler macro already defined."
36 #else
37 #define ___   vixl_masm_.
38 #endif
39 
40 // Thread register definition.
41 extern const vixl32::Register tr(TR);
42 // Marking register definition.
43 extern const vixl32::Register mr(MR);
44 
FinalizeCode()45 void ArmVIXLAssembler::FinalizeCode() {
46   vixl_masm_.FinalizeCode();
47 }
48 
CodeSize() const49 size_t ArmVIXLAssembler::CodeSize() const {
50   return vixl_masm_.GetSizeOfCodeGenerated();
51 }
52 
CodeBufferBaseAddress() const53 const uint8_t* ArmVIXLAssembler::CodeBufferBaseAddress() const {
54   return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>();
55 }
56 
FinalizeInstructions(const MemoryRegion & region)57 void ArmVIXLAssembler::FinalizeInstructions(const MemoryRegion& region) {
58   // Copy the instructions from the buffer.
59   MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize());
60   region.CopyFrom(0, from);
61 }
62 
PoisonHeapReference(vixl::aarch32::Register reg)63 void ArmVIXLAssembler::PoisonHeapReference(vixl::aarch32::Register reg) {
64   // reg = -reg.
65   ___ Rsb(reg, reg, 0);
66 }
67 
UnpoisonHeapReference(vixl::aarch32::Register reg)68 void ArmVIXLAssembler::UnpoisonHeapReference(vixl::aarch32::Register reg) {
69   // reg = -reg.
70   ___ Rsb(reg, reg, 0);
71 }
72 
MaybePoisonHeapReference(vixl32::Register reg)73 void ArmVIXLAssembler::MaybePoisonHeapReference(vixl32::Register reg) {
74   if (kPoisonHeapReferences) {
75     PoisonHeapReference(reg);
76   }
77 }
78 
MaybeUnpoisonHeapReference(vixl32::Register reg)79 void ArmVIXLAssembler::MaybeUnpoisonHeapReference(vixl32::Register reg) {
80   if (kPoisonHeapReferences) {
81     UnpoisonHeapReference(reg);
82   }
83 }
84 
LoadImmediate(vixl32::Register rd,int32_t value)85 void ArmVIXLAssembler::LoadImmediate(vixl32::Register rd, int32_t value) {
86   // TODO(VIXL): Implement this optimization in VIXL.
87   if (!ShifterOperandCanAlwaysHold(value) && ShifterOperandCanAlwaysHold(~value)) {
88     ___ Mvn(rd, ~value);
89   } else {
90     ___ Mov(rd, value);
91   }
92 }
93 
ShifterOperandCanAlwaysHold(uint32_t immediate)94 bool ArmVIXLAssembler::ShifterOperandCanAlwaysHold(uint32_t immediate) {
95   return vixl_masm_.IsModifiedImmediate(immediate);
96 }
97 
ShifterOperandCanHold(Opcode opcode,uint32_t immediate,SetCc set_cc)98 bool ArmVIXLAssembler::ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc) {
99   switch (opcode) {
100     case ADD:
101     case SUB:
102       // Less than (or equal to) 12 bits can be done if we don't need to set condition codes.
103       if (IsUint<12>(immediate) && set_cc != kCcSet) {
104         return true;
105       }
106       return ShifterOperandCanAlwaysHold(immediate);
107 
108     case MOV:
109       // TODO: Support less than or equal to 12bits.
110       return ShifterOperandCanAlwaysHold(immediate);
111 
112     case MVN:
113     default:
114       return ShifterOperandCanAlwaysHold(immediate);
115   }
116 }
117 
CanSplitLoadStoreOffset(int32_t allowed_offset_bits,int32_t offset,int32_t * add_to_base,int32_t * offset_for_load_store)118 bool ArmVIXLAssembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
119                                                int32_t offset,
120                                                /*out*/ int32_t* add_to_base,
121                                                /*out*/ int32_t* offset_for_load_store) {
122   int32_t other_bits = offset & ~allowed_offset_bits;
123   if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) {
124     *add_to_base = offset & ~allowed_offset_bits;
125     *offset_for_load_store = offset & allowed_offset_bits;
126     return true;
127   }
128   return false;
129 }
130 
AdjustLoadStoreOffset(int32_t allowed_offset_bits,vixl32::Register temp,vixl32::Register base,int32_t offset)131 int32_t ArmVIXLAssembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits,
132                                                 vixl32::Register temp,
133                                                 vixl32::Register base,
134                                                 int32_t offset) {
135   DCHECK_NE(offset & ~allowed_offset_bits, 0);
136   int32_t add_to_base, offset_for_load;
137   if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
138     ___ Add(temp, base, add_to_base);
139     return offset_for_load;
140   } else {
141     ___ Mov(temp, offset);
142     ___ Add(temp, temp, base);
143     return 0;
144   }
145 }
146 
147 // TODO(VIXL): Implement this in VIXL.
GetAllowedLoadOffsetBits(LoadOperandType type)148 int32_t ArmVIXLAssembler::GetAllowedLoadOffsetBits(LoadOperandType type) {
149   switch (type) {
150     case kLoadSignedByte:
151     case kLoadSignedHalfword:
152     case kLoadUnsignedHalfword:
153     case kLoadUnsignedByte:
154     case kLoadWord:
155       // We can encode imm12 offset.
156       return 0xfff;
157     case kLoadSWord:
158     case kLoadDWord:
159     case kLoadWordPair:
160       // We can encode imm8:'00' offset.
161       return 0xff << 2;
162     default:
163       LOG(FATAL) << "UNREACHABLE";
164       UNREACHABLE();
165   }
166 }
167 
168 // TODO(VIXL): Implement this in VIXL.
GetAllowedStoreOffsetBits(StoreOperandType type)169 int32_t ArmVIXLAssembler::GetAllowedStoreOffsetBits(StoreOperandType type) {
170   switch (type) {
171     case kStoreHalfword:
172     case kStoreByte:
173     case kStoreWord:
174       // We can encode imm12 offset.
175       return 0xfff;
176     case kStoreSWord:
177     case kStoreDWord:
178     case kStoreWordPair:
179       // We can encode imm8:'00' offset.
180       return 0xff << 2;
181     default:
182       LOG(FATAL) << "UNREACHABLE";
183       UNREACHABLE();
184   }
185 }
186 
187 // TODO(VIXL): Implement this in VIXL.
CanHoldLoadOffsetThumb(LoadOperandType type,int offset)188 static bool CanHoldLoadOffsetThumb(LoadOperandType type, int offset) {
189   switch (type) {
190     case kLoadSignedByte:
191     case kLoadSignedHalfword:
192     case kLoadUnsignedHalfword:
193     case kLoadUnsignedByte:
194     case kLoadWord:
195       return IsAbsoluteUint<12>(offset);
196     case kLoadSWord:
197     case kLoadDWord:
198       return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);  // VFP addressing mode.
199     case kLoadWordPair:
200       return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);
201     default:
202       LOG(FATAL) << "UNREACHABLE";
203       UNREACHABLE();
204   }
205 }
206 
207 // TODO(VIXL): Implement this in VIXL.
CanHoldStoreOffsetThumb(StoreOperandType type,int offset)208 static bool CanHoldStoreOffsetThumb(StoreOperandType type, int offset) {
209   switch (type) {
210     case kStoreHalfword:
211     case kStoreByte:
212     case kStoreWord:
213       return IsAbsoluteUint<12>(offset);
214     case kStoreSWord:
215     case kStoreDWord:
216       return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);  // VFP addressing mode.
217     case kStoreWordPair:
218       return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);
219     default:
220       LOG(FATAL) << "UNREACHABLE";
221       UNREACHABLE();
222   }
223 }
224 
225 // Implementation note: this method must emit at most one instruction when
226 // Address::CanHoldStoreOffsetThumb.
227 // TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL.
StoreToOffset(StoreOperandType type,vixl32::Register reg,vixl32::Register base,int32_t offset)228 void ArmVIXLAssembler::StoreToOffset(StoreOperandType type,
229                                      vixl32::Register reg,
230                                      vixl32::Register base,
231                                      int32_t offset) {
232   vixl32::Register tmp_reg;
233   UseScratchRegisterScope temps(&vixl_masm_);
234 
235   if (!CanHoldStoreOffsetThumb(type, offset)) {
236     CHECK_NE(base.GetCode(), kIpCode);
237     if ((reg.GetCode() != kIpCode) &&
238         (!vixl_masm_.GetScratchRegisterList()->IsEmpty()) &&
239         ((type != kStoreWordPair) || (reg.GetCode() + 1 != kIpCode))) {
240       tmp_reg = temps.Acquire();
241     } else {
242       // Be careful not to use ip twice (for `reg` (or `reg` + 1 in
243       // the case of a word-pair store) and `base`) to build the
244       // Address object used by the store instruction(s) below.
245       // Instead, save R5 on the stack (or R6 if R5 is already used by
246       // `base`), use it as secondary temporary register, and restore
247       // it after the store instruction has been emitted.
248       tmp_reg = (base.GetCode() != 5) ? r5 : r6;
249       ___ Push(tmp_reg);
250       if (base.GetCode() == kSpCode) {
251         offset += kRegisterSize;
252       }
253     }
254     // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset()
255     // and in the "unsplittable" path get rid of the "add" by using the store indexed instead.
256     offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset);
257     base = tmp_reg;
258   }
259   DCHECK(CanHoldStoreOffsetThumb(type, offset));
260   switch (type) {
261     case kStoreByte:
262       ___ Strb(reg, MemOperand(base, offset));
263       break;
264     case kStoreHalfword:
265       ___ Strh(reg, MemOperand(base, offset));
266       break;
267     case kStoreWord:
268       ___ Str(reg, MemOperand(base, offset));
269       break;
270     case kStoreWordPair:
271       ___ Strd(reg, vixl32::Register(reg.GetCode() + 1), MemOperand(base, offset));
272       break;
273     default:
274       LOG(FATAL) << "UNREACHABLE";
275       UNREACHABLE();
276   }
277   if ((tmp_reg.IsValid()) && (tmp_reg.GetCode() != kIpCode)) {
278     CHECK(tmp_reg.Is(r5) || tmp_reg.Is(r6)) << tmp_reg;
279     ___ Pop(tmp_reg);
280   }
281 }
282 
283 // Implementation note: this method must emit at most one instruction when
284 // Address::CanHoldLoadOffsetThumb.
285 // TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL.
LoadFromOffset(LoadOperandType type,vixl32::Register dest,vixl32::Register base,int32_t offset)286 void ArmVIXLAssembler::LoadFromOffset(LoadOperandType type,
287                                       vixl32::Register dest,
288                                       vixl32::Register base,
289                                       int32_t offset) {
290   if (!CanHoldLoadOffsetThumb(type, offset)) {
291     CHECK(!base.Is(ip));
292     // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks.
293     int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type);
294     DCHECK_NE(offset & ~allowed_offset_bits, 0);
295     int32_t add_to_base, offset_for_load;
296     if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
297       // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
298       AddConstant(dest, base, add_to_base);
299       base = dest;
300       offset = offset_for_load;
301     } else {
302       UseScratchRegisterScope temps(&vixl_masm_);
303       vixl32::Register temp = (dest.Is(base)) ? temps.Acquire() : dest;
304       LoadImmediate(temp, offset);
305       // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD.
306       // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
307       ___ Add(dest, dest, (dest.Is(base)) ? temp : base);
308       base = dest;
309       offset = 0;
310     }
311   }
312 
313   DCHECK(CanHoldLoadOffsetThumb(type, offset));
314   switch (type) {
315     case kLoadSignedByte:
316       ___ Ldrsb(dest, MemOperand(base, offset));
317       break;
318     case kLoadUnsignedByte:
319       ___ Ldrb(dest, MemOperand(base, offset));
320       break;
321     case kLoadSignedHalfword:
322       ___ Ldrsh(dest, MemOperand(base, offset));
323       break;
324     case kLoadUnsignedHalfword:
325       ___ Ldrh(dest, MemOperand(base, offset));
326       break;
327     case kLoadWord:
328       CHECK(!dest.IsSP());
329       ___ Ldr(dest, MemOperand(base, offset));
330       break;
331     case kLoadWordPair:
332       ___ Ldrd(dest, vixl32::Register(dest.GetCode() + 1), MemOperand(base, offset));
333       break;
334     default:
335       LOG(FATAL) << "UNREACHABLE";
336       UNREACHABLE();
337   }
338 }
339 
StoreSToOffset(vixl32::SRegister source,vixl32::Register base,int32_t offset)340 void ArmVIXLAssembler::StoreSToOffset(vixl32::SRegister source,
341                                       vixl32::Register base,
342                                       int32_t offset) {
343   ___ Vstr(source, MemOperand(base, offset));
344 }
345 
StoreDToOffset(vixl32::DRegister source,vixl32::Register base,int32_t offset)346 void ArmVIXLAssembler::StoreDToOffset(vixl32::DRegister source,
347                                       vixl32::Register base,
348                                       int32_t offset) {
349   ___ Vstr(source, MemOperand(base, offset));
350 }
351 
LoadSFromOffset(vixl32::SRegister reg,vixl32::Register base,int32_t offset)352 void ArmVIXLAssembler::LoadSFromOffset(vixl32::SRegister reg,
353                                        vixl32::Register base,
354                                        int32_t offset) {
355   ___ Vldr(reg, MemOperand(base, offset));
356 }
357 
LoadDFromOffset(vixl32::DRegister reg,vixl32::Register base,int32_t offset)358 void ArmVIXLAssembler::LoadDFromOffset(vixl32::DRegister reg,
359                                        vixl32::Register base,
360                                        int32_t offset) {
361   ___ Vldr(reg, MemOperand(base, offset));
362 }
363 
364 // Prefer Str to Add/Stm in ArmVIXLAssembler::StoreRegisterList and
365 // ArmVIXLAssembler::LoadRegisterList where this generates less code (size).
366 static constexpr int kRegListThreshold = 4;
367 
StoreRegisterList(RegList regs,size_t stack_offset)368 void ArmVIXLAssembler::StoreRegisterList(RegList regs, size_t stack_offset) {
369   int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs));
370   if (number_of_regs != 0) {
371     if (number_of_regs > kRegListThreshold) {
372       UseScratchRegisterScope temps(GetVIXLAssembler());
373       vixl32::Register base = sp;
374       if (stack_offset != 0) {
375         base = temps.Acquire();
376         DCHECK_EQ(regs & (1u << base.GetCode()), 0u);
377         ___ Add(base, sp, Operand::From(stack_offset));
378       }
379       ___ Stm(base, NO_WRITE_BACK, RegisterList(regs));
380     } else {
381       for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) {
382         ___ Str(vixl32::Register(i), MemOperand(sp, stack_offset));
383         stack_offset += kRegSizeInBytes;
384       }
385     }
386   }
387 }
388 
LoadRegisterList(RegList regs,size_t stack_offset)389 void ArmVIXLAssembler::LoadRegisterList(RegList regs, size_t stack_offset) {
390   int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs));
391   if (number_of_regs != 0) {
392     if (number_of_regs > kRegListThreshold) {
393       UseScratchRegisterScope temps(GetVIXLAssembler());
394       vixl32::Register base = sp;
395       if (stack_offset != 0) {
396         base = temps.Acquire();
397         ___ Add(base, sp, Operand::From(stack_offset));
398       }
399       ___ Ldm(base, NO_WRITE_BACK, RegisterList(regs));
400     } else {
401       for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) {
402         ___ Ldr(vixl32::Register(i), MemOperand(sp, stack_offset));
403         stack_offset += kRegSizeInBytes;
404       }
405     }
406   }
407 }
408 
AddConstant(vixl32::Register rd,int32_t value)409 void ArmVIXLAssembler::AddConstant(vixl32::Register rd, int32_t value) {
410   AddConstant(rd, rd, value);
411 }
412 
413 // TODO(VIXL): think about using adds which updates flags where possible.
AddConstant(vixl32::Register rd,vixl32::Register rn,int32_t value)414 void ArmVIXLAssembler::AddConstant(vixl32::Register rd,
415                                    vixl32::Register rn,
416                                    int32_t value) {
417   DCHECK(vixl_masm_.OutsideITBlock());
418   // TODO(VIXL): implement this optimization in VIXL.
419   if (value == 0) {
420     if (!rd.Is(rn)) {
421       ___ Mov(rd, rn);
422     }
423     return;
424   }
425   ___ Add(rd, rn, value);
426 }
427 
428 // Inside IT block we must use assembler, macroassembler instructions are not permitted.
AddConstantInIt(vixl32::Register rd,vixl32::Register rn,int32_t value,vixl32::Condition cond)429 void ArmVIXLAssembler::AddConstantInIt(vixl32::Register rd,
430                                        vixl32::Register rn,
431                                        int32_t value,
432                                        vixl32::Condition cond) {
433   DCHECK(vixl_masm_.InITBlock());
434   if (value == 0) {
435     ___ mov(cond, rd, rn);
436   } else {
437     ___ add(cond, rd, rn, value);
438   }
439 }
440 
CompareAndBranchIfZero(vixl32::Register rn,vixl32::Label * label,bool is_far_target)441 void ArmVIXLMacroAssembler::CompareAndBranchIfZero(vixl32::Register rn,
442                                                    vixl32::Label* label,
443                                                    bool is_far_target) {
444   if (!is_far_target && rn.IsLow() && !label->IsBound()) {
445     // In T32, Cbz/Cbnz instructions have following limitations:
446     // - There are only 7 bits (i:imm5:0) to encode branch target address (cannot be far target).
447     // - Only low registers (i.e R0 .. R7) can be encoded.
448     // - Only forward branches (unbound labels) are supported.
449     Cbz(rn, label);
450     return;
451   }
452   Cmp(rn, 0);
453   B(eq, label, is_far_target);
454 }
455 
CompareAndBranchIfNonZero(vixl32::Register rn,vixl32::Label * label,bool is_far_target)456 void ArmVIXLMacroAssembler::CompareAndBranchIfNonZero(vixl32::Register rn,
457                                                       vixl32::Label* label,
458                                                       bool is_far_target) {
459   if (!is_far_target && rn.IsLow() && !label->IsBound()) {
460     Cbnz(rn, label);
461     return;
462   }
463   Cmp(rn, 0);
464   B(ne, label, is_far_target);
465 }
466 
B(vixl32::Label * label)467 void ArmVIXLMacroAssembler::B(vixl32::Label* label) {
468   if (!label->IsBound()) {
469     // Try to use 16-bit T2 encoding of B instruction.
470     DCHECK(OutsideITBlock());
471     ExactAssemblyScope guard(this,
472                              k16BitT32InstructionSizeInBytes,
473                              CodeBufferCheckScope::kMaximumSize);
474     b(al, Narrow, label);
475     AddBranchLabel(label);
476     return;
477   }
478   MacroAssembler::B(label);
479 }
480 
B(vixl32::Condition cond,vixl32::Label * label,bool is_far_target)481 void ArmVIXLMacroAssembler::B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target) {
482   if (!label->IsBound() && !is_far_target) {
483     // Try to use 16-bit T2 encoding of B instruction.
484     DCHECK(OutsideITBlock());
485     ExactAssemblyScope guard(this,
486                              k16BitT32InstructionSizeInBytes,
487                              CodeBufferCheckScope::kMaximumSize);
488     b(cond, Narrow, label);
489     AddBranchLabel(label);
490     return;
491   }
492   // To further reduce the Bcc encoding size and use 16-bit T1 encoding,
493   // we can provide a hint to this function: i.e. far_target=false.
494   // By default this function uses 'EncodingSizeType::Best' which generates 32-bit T3 encoding.
495   MacroAssembler::B(cond, label);
496 }
497 
498 }  // namespace arm
499 }  // namespace art
500